In [8]:
# ==== Instalación de dependencias (Colab) ====
# !pip install -q torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121
# !pip install -q torchmetrics opencv-python matplotlib tqdm albumentations
# !pip install -q pycocotools # métricas tipo COCO si las requieres
import sys, platform
print("Python:", sys.version)
import torch, torchvision, matplotlib
print("Torch:", torch.__version__, "| Torchvision:", torchvision.__version__)
print("CUDA disponible:", torch.cuda.is_available())
print("Matplotlib:", matplotlib.__version__)
Python: 3.11.13 (main, Jun 4 2025, 08:57:29) [GCC 11.4.0] Torch: 2.6.0+cu124 | Torchvision: 0.21.0+cu124 CUDA disponible: False Matplotlib: 3.10.0
In [9]:
!rm -rf datasets/african-wildlife
!mkdir -p datasets
!wget -O datasets/african-wildlife.zip https://github.com/ultralytics/assets/releases/download/v0.0.0/african-wildlife.zip
import zipfile
with zipfile.ZipFile("datasets/african-wildlife.zip", 'r') as z:
z.extractall("datasets")
print("✅ Dataset descargado y descomprimido otra vez")
--2025-08-19 01:53:18-- https://github.com/ultralytics/assets/releases/download/v0.0.0/african-wildlife.zip Resolving github.com (github.com)... 140.82.121.3 Connecting to github.com (github.com)|140.82.121.3|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://release-assets.githubusercontent.com/github-production-release-asset/521807533/094d4bd8-8950-4e21-97ba-d821a75c9abc?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-08-19T02%3A48%3A10Z&rscd=attachment%3B+filename%3Dafrican-wildlife.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-08-19T01%3A47%3A47Z&ske=2025-08-19T02%3A48%3A10Z&sks=b&skv=2018-11-09&sig=MjQAK9wZEb61DNCDtL9DI24cPeeTJTSx3VQTjzxDkKA%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NTU2ODU5OSwibmJmIjoxNzU1NTY4Mjk5LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG9iLmNvcmUud2luZG93cy5uZXQifQ.ZzIym9Q-88eLHlPWoshyTr9-1YGfZEaPkT-xrN41SHQ&response-content-disposition=attachment%3B%20filename%3Dafrican-wildlife.zip&response-content-type=application%2Foctet-stream [following] --2025-08-19 01:53:18-- https://release-assets.githubusercontent.com/github-production-release-asset/521807533/094d4bd8-8950-4e21-97ba-d821a75c9abc?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-08-19T02%3A48%3A10Z&rscd=attachment%3B+filename%3Dafrican-wildlife.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-08-19T01%3A47%3A47Z&ske=2025-08-19T02%3A48%3A10Z&sks=b&skv=2018-11-09&sig=MjQAK9wZEb61DNCDtL9DI24cPeeTJTSx3VQTjzxDkKA%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NTU2ODU5OSwibmJmIjoxNzU1NTY4Mjk5LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG9iLmNvcmUud2luZG93cy5uZXQifQ.ZzIym9Q-88eLHlPWoshyTr9-1YGfZEaPkT-xrN41SHQ&response-content-disposition=attachment%3B%20filename%3Dafrican-wildlife.zip&response-content-type=application%2Foctet-stream Resolving release-assets.githubusercontent.com (release-assets.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ... Connecting to release-assets.githubusercontent.com (release-assets.githubusercontent.com)|185.199.108.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 105102519 (100M) [application/octet-stream] Saving to: ‘datasets/african-wildlife.zip’ datasets/african-wi 100%[===================>] 100.23M 219MB/s in 0.5s 2025-08-19 01:53:18 (219 MB/s) - ‘datasets/african-wildlife.zip’ saved [105102519/105102519] ✅ Dataset descargado y descomprimido otra vez
In [10]:
import os
base_dir = 'datasets'
for split in ['train', 'val', 'test']:
image_path = os.path.join(base_dir, 'images', split)
label_path = os.path.join(base_dir, 'labels', split)
image_files = os.listdir(image_path)
label_files = os.listdir(label_path)
print(f"{split.upper()} - imágenes: {len(image_files)}, labels: {len(label_files)}")
TRAIN - imágenes: 1052, labels: 1052 VAL - imágenes: 225, labels: 225 TEST - imágenes: 227, labels: 227
In [11]:
# Create a robust, Colab-ready notebook tailored to Sergio's folder layout.
# It auto-detects dataset root among: "dataset", "datasets/african-wildlife", "datasets"
# and provides: EDA → RetinaNet training (torchvision) → mAP eval → best-model save →
# random inference → export images (and an optional montage for the poster).
#
# The file will be saved at /mnt/data/AW_End2End_AutoPath.ipynb
import nbformat as nbf
from textwrap import dedent
from datetime import datetime
import os # Import the os module
nb = nbf.v4.new_notebook()
cells = []
cells.append(nbf.v4.new_markdown_cell(dedent(f"""
# African Wildlife — End-to-End (AutoPath, Colab-ready)
**EDA → RetinaNet (torchvision) → mAP → Export para póster**
*(Generado {datetime.now().strftime('%Y-%m-%d %H:%M:%S')})*
- Detecta la raíz del dataset automáticamente: `dataset`, `datasets/african-wildlife` o `datasets`.
- Guarda el **mejor modelo** en `outputs/retinanet_best.pth` y el último en `outputs/retinanet_last.pth`.
- Exporta predicciones a `outputs/preds/<timestamp>/` y una **lámina-montaje** opcional.
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
# ==== Instalación (ejecútala una vez por sesión de Colab) ====
# !pip install -q torchmetrics opencv-python matplotlib tqdm albumentations pycocotools
import sys, os, glob, random, time, csv
import torch, torchvision, matplotlib
import numpy as np
import cv2
from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import retinanet_resnet50_fpn_v2
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torchvision.transforms.functional as TF
import matplotlib.pyplot as plt
print("Python:", sys.version)
print("Torch:", torch.__version__, "| Torchvision:", torchvision.__version__)
print("CUDA disponible:", torch.cuda.is_available())
os.makedirs("outputs/eda", exist_ok=True)
os.makedirs("outputs/preds", exist_ok=True)
os.makedirs("outputs", exist_ok=True)
""")))
cells.append(nbf.v4.new_markdown_cell(dedent("""
## Detectar la raíz del dataset automáticamente
Busca `images/{train,val,test}` y `labels/{train,val,test}` en varias ubicaciones comunes.
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
def find_aw_dir():
candidates = ["dataset", "datasets/african-wildlife", "datasets"]
# Opcional: detectar capas anidadas "african-wildlife/african-wildlife"
for root in list(candidates):
nested = os.path.join(root, "african-wildlife")
if os.path.isdir(nested):
candidates.append(nested)
for root in candidates:
if all(os.path.isdir(os.path.join(root, "images", s)) for s in ["train","val","test"]) and \
all(os.path.isdir(os.path.join(root, "labels", s)) for s in ["train","val","test"]):
return root
raise FileNotFoundError("No encuentro una raíz con images/{train,val,test} y labels/{train,val,test}. Revisa tu estructura.")
AW_DIR = find_aw_dir()
print("Dataset root:", AW_DIR)
# Clases
AW_CLASSES = ["buffalo","elephant","rhino","zebra"]
NUM_CLASSES = len(AW_CLASSES) + 1 # + background implícito
CLASS_TO_ID = {name:i+1 for i,name in enumerate(AW_CLASSES)} # 1..K
# Utilidad: listar conteos
def list_counts():
for split in ["train","val","test"]:
imgs = []
for ext in ["*.jpg","*.jpeg","*.png","*.JPG","*.JPEG","*.PNG"]:
imgs += glob.glob(os.path.join(AW_DIR, "images", split, ext))
lbls = glob.glob(os.path.join(AW_DIR, "labels", split, "*.txt"))
print(f"[{split}] imágenes: {len(imgs)} | labels: {len(lbls)}")
list_counts()
""")))
cells.append(nbf.v4.new_markdown_cell(dedent("""
## EDA: conteos y muestras con cajas
Guarda ejemplos en `outputs/eda/`.
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
from collections import Counter
def read_yolo_labels(txt_path):
boxes = []
if not os.path.exists(txt_path):
return boxes
with open(txt_path, "r") as f:
for line in f.read().strip().splitlines():
if not line:
continue
c, xc, yc, w, h = map(float, line.split())
boxes.append((int(c), xc, yc, w, h))
return boxes
def iter_images(split):
img_dir = os.path.join(AW_DIR, "images", split)
paths = []
for ext in ["*.jpg","*.jpeg","*.png","*.JPG","*.JPEG","*.PNG"]:
paths += glob.glob(os.path.join(img_dir, ext))
return sorted(paths)
def eda_split(split="train", show_n=3):
counts = Counter()
missing = 0
paths = iter_images(split)
for ip in paths:
name = os.path.splitext(os.path.basename(ip))[0]
lp = os.path.join(AW_DIR, "labels", split, name + ".txt")
boxes = read_yolo_labels(lp)
if not boxes:
missing += 1
for (c, *_rest) in boxes:
counts[c] += 1
print(f"[{split}] imágenes={len(paths)} | sin_labels={missing}")
for cid in range(len(AW_CLASSES)):
print(f" {AW_CLASSES[cid]}: {counts[cid]} bbox")
for ip in paths[:show_n]:
name = os.path.splitext(os.path.basename(ip))[0]
lp = os.path.join(AW_DIR, "labels", split, name + ".txt")
im = cv2.imread(ip)
if im is None:
continue
h, w = im.shape[:2]
vis = im.copy()
for (c, xc, yc, bw, bh) in read_yolo_labels(lp):
x1 = int((xc - bw/2) * w); y1 = int((yc - bh/2) * h)
x2 = int((xc + bw/2) * w); y2 = int((yc + bh/2) * h)
cv2.rectangle(vis, (x1,y1), (x2,y2), (0,255,0), 2)
cv2.putText(vis, AW_CLASSES[c], (x1, max(15,y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
out_path = f"outputs/eda/{split}_{name}.jpg"
cv2.imwrite(out_path, vis)
plt.figure(figsize=(6,4)); plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)); plt.axis("off"); plt.title(f"{split}: {name}")
plt.show()
# Ejemplo (descomenta para ejecutar)
# eda_split("train", 3); eda_split("val", 3)
""")))
cells.append(nbf.v4.new_markdown_cell(dedent("""
## Dataset PyTorch (YOLO → xyxy)
Convierte las etiquetas YOLO normalizadas a cajas `[x1,y1,x2,y2]` en píxeles.
Acepta `.jpg/.jpeg/.png` (mayúsculas y minúsculas).
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
class AfricanWildlifeDetection(Dataset):
def __init__(self, split="train", transforms=None):
self.split = split
self.img_dir = os.path.join(AW_DIR, "images", split)
self.lbl_dir = os.path.join(AW_DIR, "labels", split)
self.img_paths = iter_images(split)
self.transforms = transforms
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
ip = self.img_paths[idx]
name = os.path.splitext(os.path.basename(ip))[0]
lp = os.path.join(self.lbl_dir, name + ".txt")
img = cv2.imread(ip)
if img is None:
raise FileNotFoundError(f"No pude leer la imagen: {ip}")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w = img.shape[:2]
boxes, labels = [], []
for (c, xc, yc, bw, bh) in read_yolo_labels(lp):
x1 = (xc - bw/2) * w; y1 = (yc - bh/2) * h
x2 = (xc + bw/2) * w; y2 = (yc + bh/2) * h
boxes.append([x1, y1, x2, y2])
labels.append(CLASS_TO_ID[AW_CLASSES[c]])
boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0,4), dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)
img_t = TF.to_tensor(img) # [0,1], CxHxW
target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}
if self.transforms:
img_t = self.transforms(img_t)
return img_t, target
def collate_fn(batch):
imgs, targets = list(zip(*batch))
return list(imgs), list(targets)
# Prueba rápida (opcional):
# ds = AfricanWildlifeDetection("train"); len(ds), ds[0][0].shape, ds[0][1]["boxes"].shape
""")))
cells.append(nbf.v4.new_markdown_cell(dedent("""
## Modelo: RetinaNet (torchvision) y entrenamiento
Guarda el **mejor** checkpoint por `mAP@.50` en `outputs/retinanet_best.pth` y siempre guarda el **último** en `outputs/retinanet_last.pth`.
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
train_ds = AfricanWildlifeDetection("train")
val_ds = AfricanWildlifeDetection("val")
# Ajusta batch_size si estás en CPU/GPU
BATCH_SIZE = 4 if torch.cuda.is_available() else 2
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, collate_fn=collate_fn)
def create_retinanet(num_classes):
model = retinanet_resnet50_fpn_v2(weights="DEFAULT", num_classes=num_classes)
return model
model = create_retinanet(NUM_CLASSES).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=5e-4, momentum=0.9, weight_decay=5e-4)
lr_sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
best_map50 = -1.0
best_path = "outputs/retinanet_best.pth"
last_path = "outputs/retinanet_last.pth"
def evaluate_map(model, loader):
metric = MeanAveragePrecision(iou_type="bbox")
model.eval()
with torch.no_grad():
for images, targets in loader:
images = [im.to(device) for im in images]
outs = model(images)
metric.update([{k: v.cpu() for k,v in o.items()} for o in outs],
[{k: v.cpu() for k,v in t.items()} for t in targets])
res = metric.compute()
map_50_95 = res.get("map", torch.tensor(0.)).item()
map_50 = res.get("map_50", torch.tensor(0.)).item()
return map_50_95, map_50
EPOCHS = 12 # ajusta según tu GPU/tiempo
for epoch in range(1, EPOCHS+1):
model.train()
pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}")
for images, targets in pbar:
images = [im.to(device) for im in images]
targets = [{k: v.to(device) for k,v in t.items()} for t in targets]
loss_dict = model(images, targets) # dict de pérdidas
loss = sum(loss_dict.values())
optimizer.zero_grad()
loss.backward()
optimizer.step()
pbar.set_postfix({k: f"{v.item():.3f}" for k,v in loss_dict.items()})
lr_sched.step()
# Eval y guardado
map_50_95, map_50 = evaluate_map(model, val_loader)
print(f"Val mAP@[.50:.95]={map_50_95:.4f} | mAP@.50={map_50:.4f}")
torch.save(model.state_dict(), last_path) # siempre guarda el último
if map_50 > best_map50:
best_map50 = map_50
torch.save(model.state_dict(), best_path)
print(f"✅ Nuevo mejor modelo (mAP@.50={best_map50:.4f}) guardado en {best_path}")
""")))
cells.append(nbf.v4.new_markdown_cell(dedent("""
## Inferencia aleatoria + export (para póster)
Crea una carpeta `outputs/preds/<timestamp>/` con imágenes anotadas.
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
@torch.no_grad()
def load_model(path):
m = retinanet_resnet50_fpn_v2(weights="DEFAULT", num_classes=NUM_CLASSES).to(device)
m.load_state_dict(torch.load(path, map_location=device))
m.eval()
return m
def sample_images(split="val", k=6):
paths = iter_images(split)
assert len(paths) > 0, f"No hay imágenes en {AW_DIR}/images/{split}"
random.seed(0)
return random.sample(paths, min(k, len(paths)))
def infer_and_draw(model, img_paths, conf_thr=0.40, save_dir="outputs/preds"):
ts_dir = os.path.join(save_dir, time.strftime("%Y%m%d_%H%M%S"))
os.makedirs(ts_dir, exist_ok=True)
out_paths = []
for ip in img_paths:
im_bgr = cv2.imread(ip)
im_rgb = cv2.cvtColor(im_bgr, cv2.COLOR_BGR2RGB)
t = TF.to_tensor(im_rgb).to(device).unsqueeze(0)
out = model(t)[0]
boxes = out["boxes"].cpu().numpy()
scores = out["scores"].cpu().numpy()
labels = out["labels"].cpu().numpy()
vis = im_bgr.copy()
for (x1,y1,x2,y2), sc, lb in zip(boxes, scores, labels):
if sc < conf_thr:
continue
c = int(lb) - 1 # 0 es background
cls = AW_CLASSES[c] if 0 <= c < len(AW_CLASSES) else str(lb)
cv2.rectangle(vis, (int(x1),int(y1)), (int(x2),int(y2)), (0,255,0), 2)
cv2.putText(vis, f\"{cls}:{sc:.2f}\", (int(x1), max(15,int(y1)-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
name = os.path.splitext(os.path.basename(ip))[0]
out_path = os.path.join(ts_dir, f\"pred_{name}.jpg\")
cv2.imwrite(out_path, vis)
out_paths.append(out_path)
plt.figure(figsize=(7,5)); plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)); plt.axis("off"); plt.title(os.path.basename(out_path)); plt.show()
return out_paths
# Uso sugerido:
# best = load_model("outputs/retinanet_best.pth")
# paths = sample_images("val", k=6)
# out_paths = infer_and_draw(best, paths, conf_thr=0.40)
""")))
cells.append(nbf.v4.new_markdown_cell(dedent("""
## (Opcional) Crear una lámina/montaje para el póster
Genera un collage `poster_panel.png` con varias predicciones.
""")))
cells.append(nbf.v4.new_code_cell(dedent("""
from math import ceil
def make_montage(image_paths, ncols=3, pad=10, out_path="outputs/poster_panel.png"):
imgs = [cv2.imread(p) for p in image_paths if os.path.exists(p)]
imgs = [cv2.cvtColor(im, cv2.COLOR_BGR2RGB) for im in imgs if im is not None]
if not imgs:
raise RuntimeError("Lista de imágenes vacía para el montaje.")
h = max(im.shape[0] for im in imgs)
w = max(im.shape[1] for im in imgs)
n = len(imgs); nrows = ceil(n / ncols)
canvas = np.ones((nrows*h + pad*(nrows+1), ncols*w + pad*(ncols+1), 3), dtype=np.uint8) * 255
idx = 0
for r in range(nrows):
for c in range(ncols):
if idx >= n: break
y = pad + r*(h+pad); x = pad + c*(w+pad)
im = imgs[idx]
# pad to same size
top = (h - im.shape[0])//2; left = (w - im.shape[1])//2
tile = np.ones((h, w, 3), dtype=np.uint8)*255
tile[top:top+im.shape[0], left:left+im.shape[1]] = im
canvas[y:y+h, x:x+w] = tile
idx += 1
plt.figure(figsize=(12, 8)); plt.imshow(canvas); plt.axis("off"); plt.title("Predicciones — panel para póster")
plt.savefig(out_path, dpi=200, bbox_inches="tight")
plt.show()
print("Panel guardado en:", out_path)
# Ejemplo:
# make_montage(out_paths, ncols=3, out_path="outputs/poster_panel.png")
""")))
nb["cells"] = cells
path = "/mnt/data/AW_End2End_AutoPath.ipynb"
# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
nbf.write(nb, f)
path
Out[11]:
'/mnt/data/AW_End2End_AutoPath.ipynb'
Pruebas 1 errores
In [12]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
def get_transforms():
return A.Compose([
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
In [17]:
from torch.utils.data import Dataset
class AfricanWildlifeDetection(Dataset):
def __init__(self, split="train", transforms=None):
# inicialización...
...
def __len__(self):
# longitud del dataset
...
def __getitem__(self, idx):
# cargar imagen y etiquetas, aplicar transformaciones
...
In [23]:
!pip install torchmetrics
from torchmetrics.detection.mean_ap import MeanAveragePrecision
Collecting torchmetrics Downloading torchmetrics-1.8.1-py3-none-any.whl.metadata (22 kB) Requirement already satisfied: numpy>1.20.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (2.0.2) Requirement already satisfied: packaging>17.1 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (25.0) Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from torchmetrics) (2.6.0+cu124) Collecting lightning-utilities>=0.8.0 (from torchmetrics) Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB) Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (75.2.0) Requirement already satisfied: typing_extensions in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (4.14.1) Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.18.0) Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.5) Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.1.6) Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (2025.3.0) Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.0.0->torchmetrics) Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch>=2.0.0->torchmetrics) Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (0.6.2) Collecting nvidia-nccl-cu12==2.21.5 (from torch>=2.0.0->torchmetrics) Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB) Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (12.4.127) Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch>=2.0.0->torchmetrics) Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (3.2.0) Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2.0.0->torchmetrics) (1.13.1) Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2.0.0->torchmetrics) (1.3.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2.0.0->torchmetrics) (3.0.2) Downloading torchmetrics-1.8.1-py3-none-any.whl (982 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 983.0/983.0 kB 35.9 MB/s eta 0:00:00 Downloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB) Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 363.4/363.4 MB 4.4 MB/s eta 0:00:00 Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.8/13.8 MB 38.0 MB/s eta 0:00:00 Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.6/24.6 MB 21.9 MB/s eta 0:00:00 Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 883.7/883.7 kB 26.4 MB/s eta 0:00:00 Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 2.5 MB/s eta 0:00:00 Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.5/211.5 MB 6.5 MB/s eta 0:00:00 Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.3/56.3 MB 13.1 MB/s eta 0:00:00 Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.9/127.9 MB 7.1 MB/s eta 0:00:00 Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.5/207.5 MB 7.5 MB/s eta 0:00:00 Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 188.7/188.7 MB 5.2 MB/s eta 0:00:00 Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/21.1 MB 43.3 MB/s eta 0:00:00 Installing collected packages: nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, lightning-utilities, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, torchmetrics Attempting uninstall: nvidia-nvjitlink-cu12 Found existing installation: nvidia-nvjitlink-cu12 12.5.82 Uninstalling nvidia-nvjitlink-cu12-12.5.82: Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82 Attempting uninstall: nvidia-nccl-cu12 Found existing installation: nvidia-nccl-cu12 2.23.4 Uninstalling nvidia-nccl-cu12-2.23.4: Successfully uninstalled nvidia-nccl-cu12-2.23.4 Attempting uninstall: nvidia-curand-cu12 Found existing installation: nvidia-curand-cu12 10.3.6.82 Uninstalling nvidia-curand-cu12-10.3.6.82: Successfully uninstalled nvidia-curand-cu12-10.3.6.82 Attempting uninstall: nvidia-cufft-cu12 Found existing installation: nvidia-cufft-cu12 11.2.3.61 Uninstalling nvidia-cufft-cu12-11.2.3.61: Successfully uninstalled nvidia-cufft-cu12-11.2.3.61 Attempting uninstall: nvidia-cuda-runtime-cu12 Found existing installation: nvidia-cuda-runtime-cu12 12.5.82 Uninstalling nvidia-cuda-runtime-cu12-12.5.82: Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82 Attempting uninstall: nvidia-cuda-nvrtc-cu12 Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82 Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82: Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82 Attempting uninstall: nvidia-cuda-cupti-cu12 Found existing installation: nvidia-cuda-cupti-cu12 12.5.82 Uninstalling nvidia-cuda-cupti-cu12-12.5.82: Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82 Attempting uninstall: nvidia-cublas-cu12 Found existing installation: nvidia-cublas-cu12 12.5.3.2 Uninstalling nvidia-cublas-cu12-12.5.3.2: Successfully uninstalled nvidia-cublas-cu12-12.5.3.2 Attempting uninstall: nvidia-cusparse-cu12 Found existing installation: nvidia-cusparse-cu12 12.5.1.3 Uninstalling nvidia-cusparse-cu12-12.5.1.3: Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3 Attempting uninstall: nvidia-cudnn-cu12 Found existing installation: nvidia-cudnn-cu12 9.3.0.75 Uninstalling nvidia-cudnn-cu12-9.3.0.75: Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75 Attempting uninstall: nvidia-cusolver-cu12 Found existing installation: nvidia-cusolver-cu12 11.6.3.83 Uninstalling nvidia-cusolver-cu12-11.6.3.83: Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83 Successfully installed lightning-utilities-0.15.2 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.4.127 torchmetrics-1.8.1
In [25]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
from PIL import Image
import os
# =========================
# 1. Dataset personalizado
# =========================
class CustomDataset(Dataset):
def __init__(self, images_dir, annotations, transforms=None):
"""
images_dir: carpeta con imágenes
annotations: diccionario con {nombre_imagen: {'boxes': [[x1,y1,x2,y2], ...], 'labels':[1,2,...]}}
transforms: transformaciones a aplicar a la imagen
"""
self.images_dir = images_dir
self.annotations = annotations
self.image_names = list(annotations.keys())
self.transforms = transforms
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img_name = self.image_names[idx]
img_path = os.path.join(self.images_dir, img_name)
img = Image.open(img_path).convert("RGB")
target = {}
target['boxes'] = torch.tensor(self.annotations[img_name]['boxes'], dtype=torch.float32)
target['labels'] = torch.tensor(self.annotations[img_name]['labels'], dtype=torch.int64)
if self.transforms:
img = self.transforms(img)
return img, target
# =========================
# 2. Transformaciones
# =========================
transform = transforms.Compose([
transforms.ToTensor(), # convierte a tensor y normaliza a [0,1]
])
# =========================
# 3. collate_fn necesario para detección
# =========================
def collate_fn(batch):
return tuple(zip(*batch))
# =========================
# 4. Preparar datasets y dataloaders
# =========================
# Suponiendo que ya tengas un diccionario de anotaciones
train_annotations = {
"img1.jpg": {"boxes": [[34,50,200,220]], "labels": [1]},
"img2.jpg": {"boxes": [[10,30,100,150],[120,40,200,180]], "labels": [1,2]}
}
val_annotations = {
"img3.jpg": {"boxes": [[20,40,150,200]], "labels": [1]}
}
train_ds = CustomDataset("ruta/a/imagenes/train", train_annotations, transforms=transform)
val_ds = CustomDataset("ruta/a/imagenes/val", val_annotations, transforms=transform)
BATCH_SIZE = 4 if torch.cuda.is_available() else 2
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, collate_fn=collate_fn)
# =========================
# 5. Verificación rápida
# =========================
for imgs, targets in train_loader:
print(f"Número de imágenes en batch: {len(imgs)}")
print(f"Targets del primer elemento: {targets[0]}")
break
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) /tmp/ipython-input-2295898616.py in <cell line: 0>() 74 # 5. Verificación rápida 75 # ========================= ---> 76 for imgs, targets in train_loader: 77 print(f"Número de imágenes en batch: {len(imgs)}") 78 print(f"Targets del primer elemento: {targets[0]}") /usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py in __next__(self) 706 # TODO(https://github.com/pytorch/pytorch/issues/76750) 707 self._reset() # type: ignore[call-arg] --> 708 data = self._next_data() 709 self._num_yielded += 1 710 if ( /usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py in _next_data(self) 1478 del self._task_info[idx] 1479 self._rcvd_idx += 1 -> 1480 return self._process_data(data) 1481 1482 def _try_put_index(self): /usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data) 1503 self._try_put_index() 1504 if isinstance(data, ExceptionWrapper): -> 1505 data.reraise() 1506 return data 1507 /usr/local/lib/python3.11/dist-packages/torch/_utils.py in reraise(self) 731 # instantiate since we don't know how to 732 raise RuntimeError(msg) from None --> 733 raise exception 734 735 FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0. Original Traceback (most recent call last): File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop data = fetcher.fetch(index) # type: ignore[possibly-undefined] ^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp> data = [self.dataset[idx] for idx in possibly_batched_index] ~~~~~~~~~~~~^^^^^ File "/tmp/ipython-input-2295898616.py", line 29, in __getitem__ img = Image.open(img_path).convert("RGB") ^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/dist-packages/PIL/Image.py", line 3513, in open fp = builtins.open(filename, "rb") ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FileNotFoundError: [Errno 2] No such file or directory: 'ruta/a/imagenes/train/img2.jpg'