In [8]:
# ==== Instalación de dependencias (Colab) ====


# !pip install -q torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121
# !pip install -q torchmetrics opencv-python matplotlib tqdm albumentations
# !pip install -q pycocotools  # métricas tipo COCO si las requieres

import sys, platform
print("Python:", sys.version)
import torch, torchvision, matplotlib
print("Torch:", torch.__version__, "| Torchvision:", torchvision.__version__)
print("CUDA disponible:", torch.cuda.is_available())
print("Matplotlib:", matplotlib.__version__)
Python: 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]
Torch: 2.6.0+cu124 | Torchvision: 0.21.0+cu124
CUDA disponible: False
Matplotlib: 3.10.0
In [9]:
!rm -rf datasets/african-wildlife
!mkdir -p datasets

!wget -O datasets/african-wildlife.zip https://github.com/ultralytics/assets/releases/download/v0.0.0/african-wildlife.zip

import zipfile
with zipfile.ZipFile("datasets/african-wildlife.zip", 'r') as z:
    z.extractall("datasets")

print("✅ Dataset descargado y descomprimido otra vez")
--2025-08-19 01:53:18--  https://github.com/ultralytics/assets/releases/download/v0.0.0/african-wildlife.zip
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/521807533/094d4bd8-8950-4e21-97ba-d821a75c9abc?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-08-19T02%3A48%3A10Z&rscd=attachment%3B+filename%3Dafrican-wildlife.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-08-19T01%3A47%3A47Z&ske=2025-08-19T02%3A48%3A10Z&sks=b&skv=2018-11-09&sig=MjQAK9wZEb61DNCDtL9DI24cPeeTJTSx3VQTjzxDkKA%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NTU2ODU5OSwibmJmIjoxNzU1NTY4Mjk5LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG9iLmNvcmUud2luZG93cy5uZXQifQ.ZzIym9Q-88eLHlPWoshyTr9-1YGfZEaPkT-xrN41SHQ&response-content-disposition=attachment%3B%20filename%3Dafrican-wildlife.zip&response-content-type=application%2Foctet-stream [following]
--2025-08-19 01:53:18--  https://release-assets.githubusercontent.com/github-production-release-asset/521807533/094d4bd8-8950-4e21-97ba-d821a75c9abc?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-08-19T02%3A48%3A10Z&rscd=attachment%3B+filename%3Dafrican-wildlife.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-08-19T01%3A47%3A47Z&ske=2025-08-19T02%3A48%3A10Z&sks=b&skv=2018-11-09&sig=MjQAK9wZEb61DNCDtL9DI24cPeeTJTSx3VQTjzxDkKA%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NTU2ODU5OSwibmJmIjoxNzU1NTY4Mjk5LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG9iLmNvcmUud2luZG93cy5uZXQifQ.ZzIym9Q-88eLHlPWoshyTr9-1YGfZEaPkT-xrN41SHQ&response-content-disposition=attachment%3B%20filename%3Dafrican-wildlife.zip&response-content-type=application%2Foctet-stream
Resolving release-assets.githubusercontent.com (release-assets.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to release-assets.githubusercontent.com (release-assets.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 105102519 (100M) [application/octet-stream]
Saving to: ‘datasets/african-wildlife.zip’

datasets/african-wi 100%[===================>] 100.23M   219MB/s    in 0.5s    

2025-08-19 01:53:18 (219 MB/s) - ‘datasets/african-wildlife.zip’ saved [105102519/105102519]

✅ Dataset descargado y descomprimido otra vez
In [10]:
import os

base_dir = 'datasets'

for split in ['train', 'val', 'test']:
    image_path = os.path.join(base_dir, 'images', split)
    label_path = os.path.join(base_dir, 'labels', split)

    image_files = os.listdir(image_path)
    label_files = os.listdir(label_path)

    print(f"{split.upper()} - imágenes: {len(image_files)}, labels: {len(label_files)}")
TRAIN - imágenes: 1052, labels: 1052
VAL - imágenes: 225, labels: 225
TEST - imágenes: 227, labels: 227
In [11]:
# Create a robust, Colab-ready notebook tailored to Sergio's folder layout.
# It auto-detects dataset root among: "dataset", "datasets/african-wildlife", "datasets"
# and provides: EDA → RetinaNet training (torchvision) → mAP eval → best-model save →
# random inference → export images (and an optional montage for the poster).
#
# The file will be saved at /mnt/data/AW_End2End_AutoPath.ipynb

import nbformat as nbf
from textwrap import dedent
from datetime import datetime
import os # Import the os module

nb = nbf.v4.new_notebook()
cells = []

cells.append(nbf.v4.new_markdown_cell(dedent(f"""
# African Wildlife — End-to-End (AutoPath, Colab-ready)
**EDA → RetinaNet (torchvision) → mAP → Export para póster**
*(Generado {datetime.now().strftime('%Y-%m-%d %H:%M:%S')})*

- Detecta la raíz del dataset automáticamente: `dataset`, `datasets/african-wildlife` o `datasets`.
- Guarda el **mejor modelo** en `outputs/retinanet_best.pth` y el último en `outputs/retinanet_last.pth`.
- Exporta predicciones a `outputs/preds/<timestamp>/` y una **lámina-montaje** opcional.
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
# ==== Instalación (ejecútala una vez por sesión de Colab) ====
# !pip install -q torchmetrics opencv-python matplotlib tqdm albumentations pycocotools

import sys, os, glob, random, time, csv
import torch, torchvision, matplotlib
import numpy as np
import cv2
from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import retinanet_resnet50_fpn_v2
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torchvision.transforms.functional as TF
import matplotlib.pyplot as plt

print("Python:", sys.version)
print("Torch:", torch.__version__, "| Torchvision:", torchvision.__version__)
print("CUDA disponible:", torch.cuda.is_available())

os.makedirs("outputs/eda", exist_ok=True)
os.makedirs("outputs/preds", exist_ok=True)
os.makedirs("outputs", exist_ok=True)
""")))

cells.append(nbf.v4.new_markdown_cell(dedent("""
## Detectar la raíz del dataset automáticamente
Busca `images/{train,val,test}` y `labels/{train,val,test}` en varias ubicaciones comunes.
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
def find_aw_dir():
    candidates = ["dataset", "datasets/african-wildlife", "datasets"]
    # Opcional: detectar capas anidadas "african-wildlife/african-wildlife"
    for root in list(candidates):
        nested = os.path.join(root, "african-wildlife")
        if os.path.isdir(nested):
            candidates.append(nested)
    for root in candidates:
        if all(os.path.isdir(os.path.join(root, "images", s)) for s in ["train","val","test"]) and \
           all(os.path.isdir(os.path.join(root, "labels", s)) for s in ["train","val","test"]):
            return root
    raise FileNotFoundError("No encuentro una raíz con images/{train,val,test} y labels/{train,val,test}. Revisa tu estructura.")

AW_DIR = find_aw_dir()
print("Dataset root:", AW_DIR)

# Clases
AW_CLASSES = ["buffalo","elephant","rhino","zebra"]
NUM_CLASSES = len(AW_CLASSES) + 1  # + background implícito
CLASS_TO_ID = {name:i+1 for i,name in enumerate(AW_CLASSES)}  # 1..K

# Utilidad: listar conteos
def list_counts():
    for split in ["train","val","test"]:
        imgs = []
        for ext in ["*.jpg","*.jpeg","*.png","*.JPG","*.JPEG","*.PNG"]:
            imgs += glob.glob(os.path.join(AW_DIR, "images", split, ext))
        lbls = glob.glob(os.path.join(AW_DIR, "labels", split, "*.txt"))
        print(f"[{split}] imágenes: {len(imgs)} | labels: {len(lbls)}")
list_counts()
""")))

cells.append(nbf.v4.new_markdown_cell(dedent("""
## EDA: conteos y muestras con cajas
Guarda ejemplos en `outputs/eda/`.
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
from collections import Counter

def read_yolo_labels(txt_path):
    boxes = []
    if not os.path.exists(txt_path):
        return boxes
    with open(txt_path, "r") as f:
        for line in f.read().strip().splitlines():
            if not line:
                continue
            c, xc, yc, w, h = map(float, line.split())
            boxes.append((int(c), xc, yc, w, h))
    return boxes

def iter_images(split):
    img_dir = os.path.join(AW_DIR, "images", split)
    paths = []
    for ext in ["*.jpg","*.jpeg","*.png","*.JPG","*.JPEG","*.PNG"]:
        paths += glob.glob(os.path.join(img_dir, ext))
    return sorted(paths)

def eda_split(split="train", show_n=3):
    counts = Counter()
    missing = 0
    paths = iter_images(split)
    for ip in paths:
        name = os.path.splitext(os.path.basename(ip))[0]
        lp = os.path.join(AW_DIR, "labels", split, name + ".txt")
        boxes = read_yolo_labels(lp)
        if not boxes:
            missing += 1
        for (c, *_rest) in boxes:
            counts[c] += 1

    print(f"[{split}] imágenes={len(paths)} | sin_labels={missing}")
    for cid in range(len(AW_CLASSES)):
        print(f"  {AW_CLASSES[cid]}: {counts[cid]} bbox")

    for ip in paths[:show_n]:
        name = os.path.splitext(os.path.basename(ip))[0]
        lp = os.path.join(AW_DIR, "labels", split, name + ".txt")
        im = cv2.imread(ip)
        if im is None:
            continue
        h, w = im.shape[:2]
        vis = im.copy()
        for (c, xc, yc, bw, bh) in read_yolo_labels(lp):
            x1 = int((xc - bw/2) * w); y1 = int((yc - bh/2) * h)
            x2 = int((xc + bw/2) * w); y2 = int((yc + bh/2) * h)
            cv2.rectangle(vis, (x1,y1), (x2,y2), (0,255,0), 2)
            cv2.putText(vis, AW_CLASSES[c], (x1, max(15,y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
        out_path = f"outputs/eda/{split}_{name}.jpg"
        cv2.imwrite(out_path, vis)
        plt.figure(figsize=(6,4)); plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)); plt.axis("off"); plt.title(f"{split}: {name}")
        plt.show()

# Ejemplo (descomenta para ejecutar)
# eda_split("train", 3); eda_split("val", 3)
""")))

cells.append(nbf.v4.new_markdown_cell(dedent("""
## Dataset PyTorch (YOLO → xyxy)
Convierte las etiquetas YOLO normalizadas a cajas `[x1,y1,x2,y2]` en píxeles.
Acepta `.jpg/.jpeg/.png` (mayúsculas y minúsculas).
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
class AfricanWildlifeDetection(Dataset):
    def __init__(self, split="train", transforms=None):
        self.split = split
        self.img_dir = os.path.join(AW_DIR, "images", split)
        self.lbl_dir = os.path.join(AW_DIR, "labels", split)
        self.img_paths = iter_images(split)
        self.transforms = transforms

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        ip = self.img_paths[idx]
        name = os.path.splitext(os.path.basename(ip))[0]
        lp = os.path.join(self.lbl_dir, name + ".txt")

        img = cv2.imread(ip)
        if img is None:
            raise FileNotFoundError(f"No pude leer la imagen: {ip}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]

        boxes, labels = [], []
        for (c, xc, yc, bw, bh) in read_yolo_labels(lp):
            x1 = (xc - bw/2) * w; y1 = (yc - bh/2) * h
            x2 = (xc + bw/2) * w; y2 = (yc + bh/2) * h
            boxes.append([x1, y1, x2, y2])
            labels.append(CLASS_TO_ID[AW_CLASSES[c]])

        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0,4), dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)

        img_t = TF.to_tensor(img)  # [0,1], CxHxW
        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}
        if self.transforms:
            img_t = self.transforms(img_t)
        return img_t, target

def collate_fn(batch):
    imgs, targets = list(zip(*batch))
    return list(imgs), list(targets)

# Prueba rápida (opcional):
# ds = AfricanWildlifeDetection("train"); len(ds), ds[0][0].shape, ds[0][1]["boxes"].shape
""")))

cells.append(nbf.v4.new_markdown_cell(dedent("""
## Modelo: RetinaNet (torchvision) y entrenamiento
Guarda el **mejor** checkpoint por `mAP@.50` en `outputs/retinanet_best.pth` y siempre guarda el **último** en `outputs/retinanet_last.pth`.
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

train_ds = AfricanWildlifeDetection("train")
val_ds   = AfricanWildlifeDetection("val")

# Ajusta batch_size si estás en CPU/GPU
BATCH_SIZE = 4 if torch.cuda.is_available() else 2
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2, collate_fn=collate_fn)

def create_retinanet(num_classes):
    model = retinanet_resnet50_fpn_v2(weights="DEFAULT", num_classes=num_classes)
    return model

model = create_retinanet(NUM_CLASSES).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=5e-4, momentum=0.9, weight_decay=5e-4)
lr_sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

best_map50 = -1.0
best_path = "outputs/retinanet_best.pth"
last_path = "outputs/retinanet_last.pth"

def evaluate_map(model, loader):
    metric = MeanAveragePrecision(iou_type="bbox")
    model.eval()
    with torch.no_grad():
        for images, targets in loader:
            images = [im.to(device) for im in images]
            outs = model(images)
            metric.update([{k: v.cpu() for k,v in o.items()} for o in outs],
                          [{k: v.cpu() for k,v in t.items()} for t in targets])
    res = metric.compute()
    map_50_95 = res.get("map", torch.tensor(0.)).item()
    map_50 = res.get("map_50", torch.tensor(0.)).item()
    return map_50_95, map_50

EPOCHS = 12  # ajusta según tu GPU/tiempo
for epoch in range(1, EPOCHS+1):
    model.train()
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}")
    for images, targets in pbar:
        images = [im.to(device) for im in images]
        targets = [{k: v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = model(images, targets)  # dict de pérdidas
        loss = sum(loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        pbar.set_postfix({k: f"{v.item():.3f}" for k,v in loss_dict.items()})

    lr_sched.step()

    # Eval y guardado
    map_50_95, map_50 = evaluate_map(model, val_loader)
    print(f"Val mAP@[.50:.95]={map_50_95:.4f} | mAP@.50={map_50:.4f}")

    torch.save(model.state_dict(), last_path)  # siempre guarda el último
    if map_50 > best_map50:
        best_map50 = map_50
        torch.save(model.state_dict(), best_path)
        print(f"✅ Nuevo mejor modelo (mAP@.50={best_map50:.4f}) guardado en {best_path}")
""")))

cells.append(nbf.v4.new_markdown_cell(dedent("""
## Inferencia aleatoria + export (para póster)
Crea una carpeta `outputs/preds/<timestamp>/` con imágenes anotadas.
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
@torch.no_grad()
def load_model(path):
    m = retinanet_resnet50_fpn_v2(weights="DEFAULT", num_classes=NUM_CLASSES).to(device)
    m.load_state_dict(torch.load(path, map_location=device))
    m.eval()
    return m

def sample_images(split="val", k=6):
    paths = iter_images(split)
    assert len(paths) > 0, f"No hay imágenes en {AW_DIR}/images/{split}"
    random.seed(0)
    return random.sample(paths, min(k, len(paths)))

def infer_and_draw(model, img_paths, conf_thr=0.40, save_dir="outputs/preds"):
    ts_dir = os.path.join(save_dir, time.strftime("%Y%m%d_%H%M%S"))
    os.makedirs(ts_dir, exist_ok=True)
    out_paths = []
    for ip in img_paths:
        im_bgr = cv2.imread(ip)
        im_rgb = cv2.cvtColor(im_bgr, cv2.COLOR_BGR2RGB)
        t = TF.to_tensor(im_rgb).to(device).unsqueeze(0)
        out = model(t)[0]
        boxes = out["boxes"].cpu().numpy()
        scores = out["scores"].cpu().numpy()
        labels = out["labels"].cpu().numpy()

        vis = im_bgr.copy()
        for (x1,y1,x2,y2), sc, lb in zip(boxes, scores, labels):
            if sc < conf_thr:
                continue
            c = int(lb) - 1  # 0 es background
            cls = AW_CLASSES[c] if 0 <= c < len(AW_CLASSES) else str(lb)
            cv2.rectangle(vis, (int(x1),int(y1)), (int(x2),int(y2)), (0,255,0), 2)
            cv2.putText(vis, f\"{cls}:{sc:.2f}\", (int(x1), max(15,int(y1)-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

        name = os.path.splitext(os.path.basename(ip))[0]
        out_path = os.path.join(ts_dir, f\"pred_{name}.jpg\")
        cv2.imwrite(out_path, vis)
        out_paths.append(out_path)

        plt.figure(figsize=(7,5)); plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)); plt.axis("off"); plt.title(os.path.basename(out_path)); plt.show()
    return out_paths

# Uso sugerido:
# best = load_model("outputs/retinanet_best.pth")
# paths = sample_images("val", k=6)
# out_paths = infer_and_draw(best, paths, conf_thr=0.40)
""")))

cells.append(nbf.v4.new_markdown_cell(dedent("""
## (Opcional) Crear una lámina/montaje para el póster
Genera un collage `poster_panel.png` con varias predicciones.
""")))

cells.append(nbf.v4.new_code_cell(dedent("""
from math import ceil

def make_montage(image_paths, ncols=3, pad=10, out_path="outputs/poster_panel.png"):
    imgs = [cv2.imread(p) for p in image_paths if os.path.exists(p)]
    imgs = [cv2.cvtColor(im, cv2.COLOR_BGR2RGB) for im in imgs if im is not None]
    if not imgs:
        raise RuntimeError("Lista de imágenes vacía para el montaje.")
    h = max(im.shape[0] for im in imgs)
    w = max(im.shape[1] for im in imgs)
    n = len(imgs); nrows = ceil(n / ncols)
    canvas = np.ones((nrows*h + pad*(nrows+1), ncols*w + pad*(ncols+1), 3), dtype=np.uint8) * 255
    idx = 0
    for r in range(nrows):
        for c in range(ncols):
            if idx >= n: break
            y = pad + r*(h+pad); x = pad + c*(w+pad)
            im = imgs[idx]
            # pad to same size
            top = (h - im.shape[0])//2; left = (w - im.shape[1])//2
            tile = np.ones((h, w, 3), dtype=np.uint8)*255
            tile[top:top+im.shape[0], left:left+im.shape[1]] = im
            canvas[y:y+h, x:x+w] = tile
            idx += 1
    plt.figure(figsize=(12, 8)); plt.imshow(canvas); plt.axis("off"); plt.title("Predicciones — panel para póster")
    plt.savefig(out_path, dpi=200, bbox_inches="tight")
    plt.show()
    print("Panel guardado en:", out_path)

# Ejemplo:
# make_montage(out_paths, ncols=3, out_path="outputs/poster_panel.png")
""")))

nb["cells"] = cells
path = "/mnt/data/AW_End2End_AutoPath.ipynb"
# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
    nbf.write(nb, f)

path
Out[11]:
'/mnt/data/AW_End2End_AutoPath.ipynb'

Pruebas 1 errores

In [12]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_transforms():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        ToTensorV2()
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
In [17]:
from torch.utils.data import Dataset

class AfricanWildlifeDetection(Dataset):
    def __init__(self, split="train", transforms=None):
        # inicialización...
        ...

    def __len__(self):
        # longitud del dataset
        ...

    def __getitem__(self, idx):
        # cargar imagen y etiquetas, aplicar transformaciones
        ...
In [23]:
!pip install torchmetrics
from torchmetrics.detection.mean_ap import MeanAveragePrecision
Collecting torchmetrics
  Downloading torchmetrics-1.8.1-py3-none-any.whl.metadata (22 kB)
Requirement already satisfied: numpy>1.20.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (2.0.2)
Requirement already satisfied: packaging>17.1 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (25.0)
Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (2.6.0+cu124)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (75.2.0)
Requirement already satisfied: typing_extensions in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (4.14.1)
Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.18.0)
Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.5)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.1.6)
Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (2025.3.0)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (0.6.2)
Collecting nvidia-nccl-cu12==2.21.5 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)
Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127)
Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.2.0)
Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (1.13.1)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.0.0->torchmetrics) (1.3.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.0.0->torchmetrics) (3.0.2)
Downloading torchmetrics-1.8.1-py3-none-any.whl (982 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 983.0/983.0 kB 35.9 MB/s eta 0:00:00
Downloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 363.4/363.4 MB 4.4 MB/s eta 0:00:00
Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.8/13.8 MB 38.0 MB/s eta 0:00:00
Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.6/24.6 MB 21.9 MB/s eta 0:00:00
Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 883.7/883.7 kB 26.4 MB/s eta 0:00:00
Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 2.5 MB/s eta 0:00:00
Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.5/211.5 MB 6.5 MB/s eta 0:00:00
Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.3/56.3 MB 13.1 MB/s eta 0:00:00
Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.9/127.9 MB 7.1 MB/s eta 0:00:00
Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.5/207.5 MB 7.5 MB/s eta 0:00:00
Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 188.7/188.7 MB 5.2 MB/s eta 0:00:00
Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/21.1 MB 43.3 MB/s eta 0:00:00
Installing collected packages: nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, lightning-utilities, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, torchmetrics
  Attempting uninstall: nvidia-nvjitlink-cu12
    Found existing installation: nvidia-nvjitlink-cu12 12.5.82
    Uninstalling nvidia-nvjitlink-cu12-12.5.82:
      Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82
  Attempting uninstall: nvidia-nccl-cu12
    Found existing installation: nvidia-nccl-cu12 2.23.4
    Uninstalling nvidia-nccl-cu12-2.23.4:
      Successfully uninstalled nvidia-nccl-cu12-2.23.4
  Attempting uninstall: nvidia-curand-cu12
    Found existing installation: nvidia-curand-cu12 10.3.6.82
    Uninstalling nvidia-curand-cu12-10.3.6.82:
      Successfully uninstalled nvidia-curand-cu12-10.3.6.82
  Attempting uninstall: nvidia-cufft-cu12
    Found existing installation: nvidia-cufft-cu12 11.2.3.61
    Uninstalling nvidia-cufft-cu12-11.2.3.61:
      Successfully uninstalled nvidia-cufft-cu12-11.2.3.61
  Attempting uninstall: nvidia-cuda-runtime-cu12
    Found existing installation: nvidia-cuda-runtime-cu12 12.5.82
    Uninstalling nvidia-cuda-runtime-cu12-12.5.82:
      Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82
  Attempting uninstall: nvidia-cuda-nvrtc-cu12
    Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82
    Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:
      Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82
  Attempting uninstall: nvidia-cuda-cupti-cu12
    Found existing installation: nvidia-cuda-cupti-cu12 12.5.82
    Uninstalling nvidia-cuda-cupti-cu12-12.5.82:
      Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82
  Attempting uninstall: nvidia-cublas-cu12
    Found existing installation: nvidia-cublas-cu12 12.5.3.2
    Uninstalling nvidia-cublas-cu12-12.5.3.2:
      Successfully uninstalled nvidia-cublas-cu12-12.5.3.2
  Attempting uninstall: nvidia-cusparse-cu12
    Found existing installation: nvidia-cusparse-cu12 12.5.1.3
    Uninstalling nvidia-cusparse-cu12-12.5.1.3:
      Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3
  Attempting uninstall: nvidia-cudnn-cu12
    Found existing installation: nvidia-cudnn-cu12 9.3.0.75
    Uninstalling nvidia-cudnn-cu12-9.3.0.75:
      Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75
  Attempting uninstall: nvidia-cusolver-cu12
    Found existing installation: nvidia-cusolver-cu12 11.6.3.83
    Uninstalling nvidia-cusolver-cu12-11.6.3.83:
      Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83
Successfully installed lightning-utilities-0.15.2 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.4.127 torchmetrics-1.8.1
In [25]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
from PIL import Image
import os

# =========================
# 1. Dataset personalizado
# =========================
class CustomDataset(Dataset):
    def __init__(self, images_dir, annotations, transforms=None):
        """
        images_dir: carpeta con imágenes
        annotations: diccionario con {nombre_imagen: {'boxes': [[x1,y1,x2,y2], ...], 'labels':[1,2,...]}}
        transforms: transformaciones a aplicar a la imagen
        """
        self.images_dir = images_dir
        self.annotations = annotations
        self.image_names = list(annotations.keys())
        self.transforms = transforms

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.images_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        target = {}
        target['boxes'] = torch.tensor(self.annotations[img_name]['boxes'], dtype=torch.float32)
        target['labels'] = torch.tensor(self.annotations[img_name]['labels'], dtype=torch.int64)

        if self.transforms:
            img = self.transforms(img)

        return img, target

# =========================
# 2. Transformaciones
# =========================
transform = transforms.Compose([
    transforms.ToTensor(),  # convierte a tensor y normaliza a [0,1]
])

# =========================
# 3. collate_fn necesario para detección
# =========================
def collate_fn(batch):
    return tuple(zip(*batch))

# =========================
# 4. Preparar datasets y dataloaders
# =========================
# Suponiendo que ya tengas un diccionario de anotaciones
train_annotations = {
    "img1.jpg": {"boxes": [[34,50,200,220]], "labels": [1]},
    "img2.jpg": {"boxes": [[10,30,100,150],[120,40,200,180]], "labels": [1,2]}
}
val_annotations = {
    "img3.jpg": {"boxes": [[20,40,150,200]], "labels": [1]}
}

train_ds = CustomDataset("ruta/a/imagenes/train", train_annotations, transforms=transform)
val_ds   = CustomDataset("ruta/a/imagenes/val",   val_annotations, transforms=transform)

BATCH_SIZE = 4 if torch.cuda.is_available() else 2

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2, collate_fn=collate_fn)

# =========================
# 5. Verificación rápida
# =========================
for imgs, targets in train_loader:
    print(f"Número de imágenes en batch: {len(imgs)}")
    print(f"Targets del primer elemento: {targets[0]}")
    break
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipython-input-2295898616.py in <cell line: 0>()
     74 # 5. Verificación rápida
     75 # =========================
---> 76 for imgs, targets in train_loader:
     77     print(f"Número de imágenes en batch: {len(imgs)}")
     78     print(f"Targets del primer elemento: {targets[0]}")

/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    706                 # TODO(https://github.com/pytorch/pytorch/issues/76750)
    707                 self._reset()  # type: ignore[call-arg]
--> 708             data = self._next_data()
    709             self._num_yielded += 1
    710             if (

/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
   1478                 del self._task_info[idx]
   1479                 self._rcvd_idx += 1
-> 1480                 return self._process_data(data)
   1481 
   1482     def _try_put_index(self):

/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data)
   1503         self._try_put_index()
   1504         if isinstance(data, ExceptionWrapper):
-> 1505             data.reraise()
   1506         return data
   1507 

/usr/local/lib/python3.11/dist-packages/torch/_utils.py in reraise(self)
    731             # instantiate since we don't know how to
    732             raise RuntimeError(msg) from None
--> 733         raise exception
    734 
    735 

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/tmp/ipython-input-2295898616.py", line 29, in __getitem__
    img = Image.open(img_path).convert("RGB")
          ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/PIL/Image.py", line 3513, in open
    fp = builtins.open(filename, "rb")
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'ruta/a/imagenes/train/img2.jpg'