# Comprueba GPU
!nvidia-smi

# Paquetes base
%pip -q install ultralytics opencv-python matplotlib pandas tqdm

from ultralytics import YOLO

# Load a model
model = YOLO("yolo11n.pt")  # load a pretrained model (recommended for training)

# Train the model
results = model.train(data="african-wildlife.yaml", epochs=10, imgsz=640)

Ultralytics 8.3.178 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
engine/trainer: agnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=african-wildlife.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train7, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, pretrained=True, profile=False, project=None, rect=False, resume=False, retina_masks=False, save=True, save_conf=False, save_crop=False, save_dir=runs/detect/train7, save_frames=False, save_json=False, save_period=-1, save_txt=False, scale=0.5, seed=0, shear=0.0, show=False, show_boxes=True, show_conf=True, show_labels=True, simplify=True, single_cls=False, source=None, split=val, stream_buffer=False, task=detect, time=None, tracker=botsort.yaml, translate=0.1, val=True, verbose=True, vid_stride=1, visualize=False, warmup_bias_lr=0.1, warmup_epochs=3.0, warmup_momentum=0.8, weight_decay=0.0005, workers=8, workspace=None
Overriding model.yaml nc=80 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      6640  ultralytics.nn.modules.block.C3k2            [32, 64, 1, False, 0.25]      
  3                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
  4                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  5                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  6                  -1  1     87040  ultralytics.nn.modules.block.C3k2            [128, 128, 1, True]           
  7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  8                  -1  1    346112  ultralytics.nn.modules.block.C3k2            [256, 256, 1, True]           
  9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
 10                  -1  1    249728  ultralytics.nn.modules.block.C2PSA           [256, 256, 1]                 
 11                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 12             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 13                  -1  1    111296  ultralytics.nn.modules.block.C3k2            [384, 128, 1, False]          
 14                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 15             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 16                  -1  1     32096  ultralytics.nn.modules.block.C3k2            [256, 64, 1, False]           
 17                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
 18            [-1, 13]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 19                  -1  1     86720  ultralytics.nn.modules.block.C3k2            [192, 128, 1, False]          
 20                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 21            [-1, 10]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 22                  -1  1    378880  ultralytics.nn.modules.block.C3k2            [384, 256, 1, True]           
 23        [16, 19, 22]  1    431452  ultralytics.nn.modules.head.Detect           [4, [64, 128, 256]]           
YOLO11n summary: 181 layers, 2,590,620 parameters, 2,590,604 gradients, 6.4 GFLOPs

Transferred 448/499 items from pretrained weights
Freezing layer 'model.23.dfl.conv.weight'
AMP: running Automatic Mixed Precision (AMP) checks...
AMP: checks passed ✅
train: Fast image access ✅ (ping: 0.0±0.0 ms, read: 1181.5±453.6 MB/s, size: 54.6 KB)

train: Scanning /content/datasets/african-wildlife/labels/train.cache... 1052 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1052/1052 [00:00<?, ?it/s]

albumentations: Blur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

val: Fast image access ✅ (ping: 0.0±0.0 ms, read: 251.1±135.3 MB/s, size: 42.2 KB)

val: Scanning /content/datasets/african-wildlife/labels/val.cache... 225 images, 0 backgrounds, 0 corrupt: 100%|██████████| 225/225 [00:00<?, ?it/s]

Plotting labels to runs/detect/train7/labels.jpg... 
optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
optimizer: AdamW(lr=0.00125, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to runs/detect/train7
Starting training for 10 epochs...
Closing dataloader mosaic
albumentations: Blur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size

       1/10      2.48G      0.795      2.789      1.199         16        640: 100%|██████████| 66/66 [00:22<00:00,  2.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:01<00:00,  4.20it/s]

                   all        225        379      0.918     0.0624      0.695      0.498

# Load a model
model = YOLO("/content/runs/detect/train/weights/best.pt")  # pretrained YOLO11n model

# Run batched inference on a list of images
results = model("/content/datasets/african-wildlife/images/test/1 (128).jpg", save=True)  # return a list of Results objects

image 1/1 /content/datasets/african-wildlife/images/test/1 (128).jpg: 416x640 1 buffalo, 51.8ms
Speed: 2.7ms preprocess, 51.8ms inference, 2.3ms postprocess per image at shape (1, 3, 416, 640)
Results saved to runs/detect/predict

# Paquetes extra para el informe
%pip -q install python-docx pillow markdown

import os, glob, io, json, yaml, shutil, random, textwrap
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageOps, ImageDraw, ImageFont
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from markdown import markdown

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0.0/253.0 kB ? eta -:--:--
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 253.0/253.0 kB 8.6 MB/s eta 0:00:00

# Ajusta si tu run es otro
RUN_DIR = Path("runs/detect/train7")
assert RUN_DIR.exists(), f"No existe {RUN_DIR}. Revisa el nombre del run."

# Resultados de entrenamiento por época
csv_path = RUN_DIR / "results.csv"
df = pd.read_csv(csv_path)
last = df.iloc[-1].to_dict()

# Métricas clave finales
metrics = {
    "precision": float(last.get("metrics/precision(B)", np.nan)),
    "recall": float(last.get("metrics/recall(B)", np.nan)),
    "mAP50": float(last.get("metrics/mAP50(B)", np.nan)),
    "mAP50-95": float(last.get("metrics/mAP50-95(B)", np.nan)),
    "box_loss": float(last.get("train/box_loss", np.nan)),
    "cls_loss": float(last.get("train/cls_loss", np.nan)),
    "dfl_loss": float(last.get("train/dfl_loss", np.nan)),
}

# Intenta leer resumen de validación por clase desde 'val.json' si existe
val_json = RUN_DIR / "val_predictions.json"  # (depende de versión; a veces no se guarda)
per_class = []
if val_json.exists():
    try:
        import json
        with open(val_json, "r") as f:
            vj = json.load(f)
        # Estructura puede variar. Si no hay datos por clase, se ignora.
    except Exception as e:
        print("No se pudo parsear val_predictions.json:", e)

# Imprime métricas resumen
metrics

{'precision': 0.90045,
 'recall': 0.90455,
 'mAP50': 0.93372,
 'mAP50-95': 0.76123,
 'box_loss': 0.64558,
 'cls_loss': 0.81559,
 'dfl_loss': 1.06295}

# Curvas: P, R, mAP50, mAP50-95
plt.figure(); df["metrics/precision(B)"].plot(); plt.title("Precision (B)"); plt.xlabel("epoch"); plt.ylabel("precision"); plt.grid(True); plt.savefig(RUN_DIR/"curve_precision.png", dpi=150, bbox_inches="tight"); plt.close()
plt.figure(); df["metrics/recall(B)"].plot(); plt.title("Recall (B)"); plt.xlabel("epoch"); plt.ylabel("recall"); plt.grid(True); plt.savefig(RUN_DIR/"curve_recall.png", dpi=150, bbox_inches="tight"); plt.close()
plt.figure(); df["metrics/mAP50(B)"].plot(); plt.title("mAP@0.5 (B)"); plt.xlabel("epoch"); plt.ylabel("mAP50"); plt.grid(True); plt.savefig(RUN_DIR/"curve_map50.png", dpi=150, bbox_inches="tight"); plt.close()
plt.figure(); df["metrics/mAP50-95(B)"].plot(); plt.title("mAP@0.5:0.95 (B)"); plt.xlabel("epoch"); plt.ylabel("mAP50-95"); plt.grid(True); plt.savefig(RUN_DIR/"curve_map5095.png", dpi=150, bbox_inches="tight"); plt.close()

# Si Ultralytics ya generó figuras, solo las referenciamos:
pre_made_imgs = {
    "labels": RUN_DIR/"labels.jpg",
    "results": RUN_DIR/"results.png",
    "confmat": RUN_DIR/"confusion_matrix.png",        # puede no existir con algunas versiones
    "pr_curve": RUN_DIR/"PR_curve.png",               # idem
}
pre_made_imgs

{'labels': PosixPath('runs/detect/train7/labels.jpg'),
 'results': PosixPath('runs/detect/train7/results.png'),
 'confmat': PosixPath('runs/detect/train7/confusion_matrix.png'),
 'pr_curve': PosixPath('runs/detect/train7/PR_curve.png')}

# Usa tu mejor peso
best_pt = RUN_DIR / "weights" / "best.pt"
from ultralytics import YOLO
model = YOLO(str(best_pt))

# Busca imágenes de test
TEST_DIR = Path("/content/datasets/african-wildlife/images/test")
test_imgs = sorted(glob.glob(str(TEST_DIR/"*.jpg"))) + sorted(glob.glob(str(TEST_DIR/"*.png")))
assert len(test_imgs)>0, f"No se encontraron imágenes en {TEST_DIR}"

# Toma 12 aleatorias y predice guardando crops
sample = random.sample(test_imgs, k=min(12, len(test_imgs)))
pred_dir = RUN_DIR / "report_preds"
pred_dir.mkdir(exist_ok=True, parents=True)

# Ejecuta predicciones y guarda los PNG con anotaciones
for im_path in sample:
    _ = model.predict(im_path, save=True, save_txt=False, project=str(pred_dir), name="pred", exist_ok=True)

# Ultralytics guarda en pred_dir/pred/ imagenes con las cajas
pred_imgs = sorted(glob.glob(str(pred_dir / "pred" / "*.*")))
pred_imgs[:3]

image 1/1 /content/datasets/african-wildlife/images/test/3 (209).jpg: 384x640 1 elephant, 2 rhinos, 48.7ms
Speed: 2.7ms preprocess, 48.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/2 (366).jpg: 448x640 1 elephant, 47.3ms
Speed: 2.8ms preprocess, 47.3ms inference, 1.5ms postprocess per image at shape (1, 3, 448, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/3 (54).jpg: 480x640 1 rhino, 60.0ms
Speed: 2.2ms preprocess, 60.0ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/2 (122).jpg: 480x640 1 elephant, 8.6ms
Speed: 1.5ms preprocess, 8.6ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/3 (240).jpg: 384x640 1 rhino, 9.0ms
Speed: 1.7ms preprocess, 9.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/1 (356).jpg: 384x640 1 buffalo, 8.7ms
Speed: 2.2ms preprocess, 8.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/2 (246).jpg: 384x640 2 elephants, 8.9ms
Speed: 2.0ms preprocess, 8.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/4 (234).jpg: 576x640 1 zebra, 45.8ms
Speed: 2.5ms preprocess, 45.8ms inference, 1.4ms postprocess per image at shape (1, 3, 576, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/1 (231).jpg: 448x640 1 buffalo, 10.4ms
Speed: 2.2ms preprocess, 10.4ms inference, 1.3ms postprocess per image at shape (1, 3, 448, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/3 (90).jpg: 448x640 2 rhinos, 8.8ms
Speed: 2.3ms preprocess, 8.8ms inference, 1.3ms postprocess per image at shape (1, 3, 448, 640)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/4 (275).jpg: 640x448 3 zebras, 45.5ms
Speed: 2.0ms preprocess, 45.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 448)
Results saved to runs/detect/train7/report_preds/pred

image 1/1 /content/datasets/african-wildlife/images/test/2 (182).jpg: 480x640 1 elephant, 9.2ms
Speed: 2.2ms preprocess, 9.2ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)
Results saved to runs/detect/train7/report_preds/pred

['runs/detect/train7/report_preds/pred/1 (231).jpg',
 'runs/detect/train7/report_preds/pred/1 (356).jpg',
 'runs/detect/train7/report_preds/pred/2 (122).jpg']

def make_grid(img_paths, grid=(3,4), tile_size=(640, 480), save_path=None):
    cols, rows = grid[1], grid[0]
    W, H = tile_size
    canvas = Image.new("RGB", (cols*W, rows*H), (255,255,255))
    for i, p in enumerate(img_paths[:rows*cols]):
        im = Image.open(p).convert("RGB")
        im = ImageOps.fit(im, (W, H))
        r, c = divmod(i, cols)
        canvas.paste(im, (c*W, r*H))
    if save_path:
        canvas.save(save_path, "PNG")
    return canvas

grid_img = RUN_DIR/"grid_predictions.png"
make_grid(pred_imgs, grid=(3,4), tile_size=(640,480), save_path=grid_img)
grid_img

PosixPath('runs/detect/train7/grid_predictions.png')

# Lee hparams (si existen) desde args.yaml
args_yaml = RUN_DIR / "args.yaml"
hparams = {}
if args_yaml.exists():
    with open(args_yaml, "r") as f:
        hparams = yaml.safe_load(f)

df_summary = pd.DataFrame({
    "Métrica": ["Precision", "Recall", "mAP50", "mAP50-95", "Box loss", "Cls loss", "DFL loss"],
    "Valor": [metrics["precision"], metrics["recall"], metrics["mAP50"], metrics["mAP50-95"], metrics["box_loss"], metrics["cls_loss"], metrics["dfl_loss"]]
})
df_summary

doc = Document()

# Estilos básicos
style = doc.styles['Normal']
style.font.name = 'Calibri'
style.font.size = Pt(11)

# Portada
title = doc.add_heading('Informe de Detección de Fauna — African Wildlife (YOLO11n)', level=0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
p = doc.add_paragraph('Experimento en Google Colab\n')
p.alignment = WD_ALIGN_PARAGRAPH.CENTER

doc.add_paragraph('Objetivos:\n'
                  '- Entrenar YOLO11n sobre el dataset African Wildlife (Ultralytics).\n'
                  '- Evaluar métricas (P, R, mAP@0.5, mAP@0.5:0.95).\n'
                  '- Generar visualizaciones (curvas, ejemplos de inferencia) y consolidar un informe reproducible.')

# Configuración
doc.add_heading('Configuración de entrenamiento', level=1)
if hparams:
    rows = [
        ("epochs", hparams.get("epochs")),
        ("imgsz", hparams.get("imgsz")),
        ("batch", hparams.get("batch")),
        ("optimizer", hparams.get("optimizer")),
        ("lr0", hparams.get("lr0")),
        ("weight_decay", hparams.get("weight_decay")),
        ("device", hparams.get("device")),
        ("data", hparams.get("data")),
        ("model", hparams.get("model")),
        ("name", hparams.get("name")),
    ]
    table = doc.add_table(rows=1, cols=2)
    hdr = table.rows[0].cells
    hdr[0].text = "Parámetro"
    hdr[1].text = "Valor"
    for k,v in rows:
        row_cells = table.add_row().cells
        row_cells[0].text = str(k)
        row_cells[1].text = str(v)
else:
    doc.add_paragraph("No se encontró args.yaml; se omite tabla de hiperparámetros.")

# Resultados
doc.add_heading('Métricas finales', level=1)
table = doc.add_table(rows=1, cols=2)
hdr = table.rows[0].cells
hdr[0].text = "Métrica"
hdr[1].text = "Valor"
for _, r in df_summary.iterrows():
    row = table.add_row().cells
    row[0].text = str(r["Métrica"])
    row[1].text = f'{r["Valor"]:.4f}'

# Gráficos
doc.add_heading('Curvas y visualizaciones', level=1)
for img_path in [
    RUN_DIR/"curve_precision.png",
    RUN_DIR/"curve_recall.png",
    RUN_DIR/"curve_map50.png",
    RUN_DIR/"curve_map5095.png",
    pre_made_imgs.get("labels"),
    pre_made_imgs.get("results"),
    pre_made_imgs.get("confmat"),
    pre_made_imgs.get("pr_curve"),
    RUN_DIR/"grid_predictions.png",
]:
    if img_path and Path(img_path).exists():
        doc.add_paragraph(Path(img_path).name)
        doc.add_picture(str(img_path), width=Inches(6.0))

# Conclusiones (borrador editable)
doc.add_heading('Conclusiones (borrador)', level=1)
doc.add_paragraph(
    "El modelo YOLO11n alcanzó un desempeño final de:\n"
    f"- mAP@0.5 = {metrics['mAP50']:.3f}\n"
    f"- mAP@0.5:0.95 = {metrics['mAP50-95']:.3f}\n"
    f"con Precision={metrics['precision']:.3f} y Recall={metrics['recall']:.3f}.\n\n"
    "Observaciones:\n"
    "• La curva mAP muestra mejora progresiva y estabilización al final del entrenamiento.\n"
    "• Las clases con menor soporte de datos pueden beneficiarse de mayor augmentación o más épocas.\n"
    "• Para uso en cámaras trampa, considerar umbrales de confianza específicos por clase."
)

# Referencias
doc.add_heading('Referencias', level=1)
doc.add_paragraph(
    "Ultralytics — African Wildlife Dataset: https://docs.ultralytics.com/datasets/detect/african-wildlife/\n"
    "LearnOpenCV (Ankan Ghosh, 2025) — Fine-Tuning RetinaNet: https://learnopencv.com/finetuning-retinanet/\n"
    "Kaggle EDA Image Datasets — Fajri (2022): https://www.kaggle.com/code/faldoae/exploratory-data-analysis-eda-for-image-datasets"
)

out_docx = RUN_DIR/"informe_yolo11n_african_wildlife.docx"
doc.save(out_docx)
out_docx

PosixPath('runs/detect/train7/informe_yolo11n_african_wildlife.docx')

	Métrica	Valor
0	Precision	0.90045
1	Recall	0.90455
2	mAP50	0.93372
3	mAP50-95	0.76123
4	Box loss	0.64558
5	Cls loss	0.81559
6	DFL loss	1.06295