In [1]:
pip install pandas scikit-learn tensorflow
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.1.4) Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.3.2) Requirement already satisfied: tensorflow in /usr/local/lib/python3.10/dist-packages (2.17.0) Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.26.4) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1) Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1) Requirement already satisfied: scipy>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.13.1) Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0) Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.4.0) Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.6.3) Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (24.3.25) Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.6.0) Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0) Requirement already satisfied: h5py>=3.10.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.11.0) Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (18.1.1) Requirement already satisfied: ml-dtypes<0.5.0,>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.0) Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.3.0) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorflow) (24.1) Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.20.3) Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.32.3) Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow) (71.0.4) Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0) Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.4.0) Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (4.12.2) Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0) Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.64.1) Requirement already satisfied: tensorboard<2.18,>=2.17 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.17.0) Requirement already satisfied: keras>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.4.1) Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.37.1) Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow) (0.44.0) Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow) (13.8.0) Requirement already satisfied: namex in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow) (0.0.8) Requirement already satisfied: optree in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow) (0.12.1) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (3.8) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (2.0.7) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (2024.7.4) Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow) (3.7) Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow) (0.7.2) Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow) (3.0.4) Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.18,>=2.17->tensorflow) (2.1.5) Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->keras>=3.2.0->tensorflow) (3.0.0) Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->keras>=3.2.0->tensorflow) (2.16.1) Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.2.0->tensorflow) (0.1.2)
In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
In [3]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [4]:
import os
print(os.listdir('/content/drive/My Drive/Colab Notebooks/'))
['redes neuronales.ipynb', 'Ventas.csv', 'scrip 01.ipynb', 'Pruebas.ipynb', 'Cod_img.ipynb', 'scrip 3.ipynb', 'tarea_1_caracteristicas.ipynb', 'JFA tarea_1_caracteristicas.ipynb', 'jfa kmeans and clust- patrones.ipynb', 'analisis patrones nparm.ipynb', 'Untitled0.ipynb', 'Tarea_Foro_4_Bayes_Clasificacion.ipynb', 'penguins.xlsx', 'Untitled1.ipynb', 'tarea 4', 'copy of Perceptron_AND.ipynb', 'Copy of Perceptron_OR.ipynb', 'Copy of corre en R [CODATA-RDA-datatrieste] Perceptron AND-rlang-solution.ipynb', 'Copy of Perceptron_XOR 1-0-0-1 - plots.ipynb', 'Copy of Perceptron_XOR - plots - solution.ipynb', 'Copy of Feature Selection AutoML Hyperparameter Optimization - crbds2024', 'Copy of Perceptron_XOR definitivo con - plots - solution2.ipynb', 'Copy of MNIST MLP - Solution, con dropout', 'Copy of MNIST MLP', 'Copy of Image Jet Tagging Analysis (PTJ 250-300) - crbds2024', 'Copy of COVID19 Chest CTS - crbds2024', 'Untitled2.ipynb', 'penguins.csv', 'scrip 2.ipynb']
In [23]:
import pandas as pd
# Especificar el nombre del archivo con la ruta correcta
nombreArchivo = '/content/drive/My Drive/Colab Notebooks/penguins.csv'
# Leer el archivo CSV con el delimitador correcto
penguins = pd.read_csv(nombreArchivo, delimiter=';')
# Mostrar las primeras filas del DataFrame
print(penguins.head())
species island bill_length_mm bill_depth_mm flipper_length_mm \ 0 Adelie Biscoe 53.4 17.8 219.0 1 Adelie Biscoe 49.3 18.1 245.0 2 Adelie Biscoe 55.7 16.6 226.0 3 Adelie Biscoe 38.0 15.6 221.0 4 Adelie Biscoe 60.7 17.9 177.0 body_mass_g sex diet life_stage health_metrics 0 5687.0 female fish adult overweight 1 6811.0 female fish adult overweight 2 5388.0 female fish adult overweight 3 6262.0 female fish adult overweight 4 4811.0 female fish juvenile overweight
In [24]:
# Preprocesar datos
# Convertir variables categóricas en numéricas
label_encoders = {}
for column in ['species', 'island', 'sex', 'diet', 'life_stage', 'health_metrics']:
le = LabelEncoder()
penguins[column] = le.fit_transform(penguins[column])
label_encoders[column] = le
# Separar las características y la variable objetivo
X = penguins.drop('species', axis=1)
y = penguins['species']
# Normalizar los datos
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [25]:
# Construir el modelo
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(label_encoders['species'].classes_), activation='softmax'))
# Compilar el modelo
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Entrenar el modelo
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)
Epoch 1/10
/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
78/78 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.5282 - loss: 0.9828 - val_accuracy: 0.6873 - val_loss: 0.7561 Epoch 2/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.7017 - loss: 0.6907 - val_accuracy: 0.6836 - val_loss: 0.6235 Epoch 3/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.7354 - loss: 0.5587 - val_accuracy: 0.7236 - val_loss: 0.5496 Epoch 4/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.7687 - loss: 0.4868 - val_accuracy: 0.7709 - val_loss: 0.5027 Epoch 5/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.7849 - loss: 0.4586 - val_accuracy: 0.7891 - val_loss: 0.4630 Epoch 6/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.7949 - loss: 0.4263 - val_accuracy: 0.8109 - val_loss: 0.4274 Epoch 7/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8177 - loss: 0.3964 - val_accuracy: 0.8109 - val_loss: 0.4085 Epoch 8/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8331 - loss: 0.3737 - val_accuracy: 0.8364 - val_loss: 0.3819 Epoch 9/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8467 - loss: 0.3414 - val_accuracy: 0.8327 - val_loss: 0.3693 Epoch 10/10 78/78 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8557 - loss: 0.3325 - val_accuracy: 0.8327 - val_loss: 0.3482
Out[25]:
<keras.src.callbacks.history.History at 0x7d4fbfbe2620>
In [35]:
# Evaluar el modelo
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")
22/22 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - accuracy: 0.8439 - loss: 0.3751 Test Accuracy: 0.85
In [30]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
# Hacer predicciones
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=-1)
# Crear la matriz de confusión
cm = confusion_matrix(y_test, y_pred_classes)
# Visualizar la matriz de confusión
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoders['species'].classes_, yticklabels=label_encoders['species'].classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
22/22 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
# Crear el modelo de Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
# Entrenar el modelo
rf_model.fit(X_train, y_train)
# Hacer predicciones con el set de prueba
y_pred = rf_model.predict(X_test)
# Evaluar el modelo
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoders['species'].classes_))
# Crear la matriz de confusión
cm = confusion_matrix(y_test, y_pred)
# Convertir la matriz de confusión a un DataFrame para que muestre los nombres de las categorías
cm_df = pd.DataFrame(cm,
index=label_encoders['species'].classes_,
columns=label_encoders['species'].classes_)
# Visualizar la matriz de confusión
plt.figure(figsize=(10, 7))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Random Forest')
plt.show()
Classification Report: precision recall f1-score support Adelie 0.81 0.76 0.78 304 Chinstrap 0.73 0.68 0.71 125 Gentoo 0.85 0.95 0.90 257 accuracy 0.81 686 macro avg 0.80 0.80 0.80 686 weighted avg 0.81 0.81 0.81 686
In [40]:
# Evaluar el modelo Neural
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Neural Accuracy: {accuracy:.2f}")
from sklearn.metrics import accuracy_score, cohen_kappa_score
# Hacer predicciones con el set de prueba
y_pred = rf_model.predict(X_test)
# Calcular el accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"RF Accuracy: {accuracy:.2f}")
# Calcular el Kappa
kappa = cohen_kappa_score(y_test, y_pred)
print(f"RF Cohen's Kappa: {kappa:.2f}")
22/22 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - accuracy: 0.8439 - loss: 0.3751 Neural Accuracy: 0.85 RF Accuracy: 0.81 RF Cohen's Kappa: 0.71
In [51]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
# Cargar y preprocesar los datos
# Convertir variables categóricas en numéricas
label_encoders = {}
for column in ['species', 'island', 'sex', 'diet', 'life_stage', 'health_metrics']:
le = LabelEncoder()
penguins[column] = le.fit_transform(penguins[column])
label_encoders[column] = le
# Separar las características y la variable objetivo
X = penguins.drop('species', axis=1)
y = penguins['species']
# Normalizar los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Dividir el conjunto de datos en entrenamiento (80%) y prueba (20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Rastrear diferentes valores de k para KNN
k_range = range(2, 21)
accuracies = []
for k in k_range:
# Crear el modelo KNN
knn = KNeighborsClassifier(n_neighbors=k)
# Entrenar el modelo
knn.fit(X_train, y_train)
# Hacer predicciones con el set de prueba
y_pred = knn.predict(X_test)
# Calcular accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracies.append(accuracy)
print(f"K = {k}, Accuracy: {accuracy:.2f}")
# Graficar la exactitud en función de k
plt.figure(figsize=(12, 6))
plt.plot(k_range, accuracies, marker='o', linestyle='-', color='b')
plt.xlabel('Número de vecinos (k)')
plt.ylabel('Accuracy')
plt.title('Diferentes valores de K y accuracy')
plt.grid(True)
plt.show()
K = 2, Accuracy: 0.77 K = 3, Accuracy: 0.78 K = 4, Accuracy: 0.77 K = 5, Accuracy: 0.78 K = 6, Accuracy: 0.78 K = 7, Accuracy: 0.79 K = 8, Accuracy: 0.79 K = 9, Accuracy: 0.81 K = 10, Accuracy: 0.80 K = 11, Accuracy: 0.79 K = 12, Accuracy: 0.79 K = 13, Accuracy: 0.80 K = 14, Accuracy: 0.80 K = 15, Accuracy: 0.79 K = 16, Accuracy: 0.79 K = 17, Accuracy: 0.77 K = 18, Accuracy: 0.77 K = 19, Accuracy: 0.77 K = 20, Accuracy: 0.75
In [55]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
# Cargar y preprocesar los datos
# Convertir variables categóricas en numéricas
label_encoders = {}
for column in ['species', 'island', 'sex', 'diet', 'life_stage', 'health_metrics']:
le = LabelEncoder()
penguins[column] = le.fit_transform(penguins[column])
label_encoders[column] = le
# Verificar las etiquetas para 'species'
species_labels = label_encoders['species'].classes_
# Separar las características y la variable objetivo
X = penguins.drop('species', axis=1)
y = penguins['species']
# Normalizar los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Dividir el conjunto de datos en entrenamiento (80%) y prueba (20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Definir el valor de k para el modelo KNN
k = 9 # Puedes cambiar este valor según lo que quieras probar
# Crear el modelo KNN
knn = KNeighborsClassifier(n_neighbors=k)
# Entrenar el modelo
knn.fit(X_train, y_train)
# Hacer predicciones con el set de prueba
y_pred = knn.predict(X_test)
# Calcular accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"K = {k}, Accuracy: {accuracy:.2f}")
# Crear la matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred, labels=label_encoders['species'].transform(species_labels))
# Mostrar la matriz de confusión
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=species_labels)
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.title(f'Matriz de Confusión para KNN con k = {k}')
plt.show()
K = 9, Accuracy: 0.81
In [50]:
# Calcular accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"K = {k}, Accuracy: {accuracy:.2f}")
K = 9, Accuracy: 0.81
In [ ]: