Merge pull request #1 from joanby/master

update
joanby · Aug 6, 2020 · 2b05589 · 2b05589
2 parents c4eb1d8 + 4ec00e4
commit 2b05589
Show file tree

Hide file tree

Showing 16 changed files with 397 additions and 175 deletions.
diff --git a/...ansparencias/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ANN.key b/...ansparencias/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ANN.key
diff --git a/...parencias/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/CNN.key b/...parencias/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/CNN.key
diff --git a/...rt 1 - Data Preprocessing --------------------/data_preprocessing_template_new_version.py b/...rt 1 - Data Preprocessing --------------------/data_preprocessing_template_new_version.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Apr 26 12:11:34 2020
+
+@author: juangabriel
+"""
+
+
+# Plantilla de Pre Procesado
+
+# Cómo importar las librerias en Python
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importar el Data set
+dataset = pd.read_csv("Data.csv")
+X = dataset.iloc[:, :-1].values 
+y = dataset.iloc[:, 3].values 
+
+# Tratamiento de los NAs
+from sklearn.impute import SimpleImputer
+imputer = SimpleImputer(missing_values = np.nan, strategy = "mean", verbose=0)
+imputer = imputer.fit(X[:,1:3]) 
+X[:, 1:3] = imputer.transform(X[:,1:3])
+
+# Codificar datos categoricos
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+
+labelencoder_X = LabelEncoder()
+X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
+
+ct = ColumnTransformer(
+    [('one_hot_encoder', OneHotEncoder(categories='auto'), [0])],   
+    remainder='passthrough'                        
+)
+
+X = np.array(ct.fit_transform(X), dtype=np.float)
+labelencoder_y = LabelEncoder()
+y = labelencoder_y.fit_transform(y)
+
+
+# Dividir el data set en conjunto de entrenamiento y en conjunto de testing
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state = 0)
+
+# Escalado de variables
+from sklearn.preprocessing import StandardScaler
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.transform(X_test)
diff --git a/...n 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py b/...n 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py
diff --git a/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost.py b/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost.py
@@ -30,25 +30,3 @@
 X = onehotencoder.fit_transform(X).toarray()
 X = X[:, 1:]
 
-# Dividir el data set en conjunto de entrenamiento y conjunto de testing
-from sklearn.model_selection import train_test_split
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
-
-# Ajustar el modelo XGBoost al Conjunto de Entrenamiento
-from xgboost import XGBClassifier
-classifier = XGBClassifier()
-classifier.fit(X_train, y_train)
-
-# Predicción de los resultados con el Conjunto de Testing
-y_pred  = classifier.predict(X_test)
-
-# Elaborar una matriz de confusión
-from sklearn.metrics import confusion_matrix
-cm = confusion_matrix(y_test, y_pred)
-
-# Aplicar k-fold cross validation
-from sklearn.model_selection import cross_val_score
-accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
-accuracies.mean()
-accuracies.std()
-
diff --git a/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost_new_version.py b/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost_new_version.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun May  5 09:58:24 2019
+
+@author: juangabriel
+"""
+
+# XGBoost
+# Las instrucciones de instalación se pueden consultar en http://xgboost.readthedocs.io/en/latest/build.html
+
+# Cómo importar las librerías
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importar el data set
+dataset = pd.read_csv('Churn_Modelling.csv')
+
+X = dataset.iloc[:, 3:13].values
+y = dataset.iloc[:, 13].values
+
+# Codificar datos categóricos
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+
+labelencoder_X_1 = LabelEncoder()
+X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
+labelencoder_X_2 = LabelEncoder()
+X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
+ct = ColumnTransformer(
+    [('one_hot_encoder', OneHotEncoder(categories='auto'), [2])],   
+    remainder='passthrough'                        
+)
+X = np.array(ct.fit_transform(X), dtype=np.float)
+X = X[:, 1:]
+
+
+# Dividir el data set en conjunto de entrenamiento y conjunto de testing
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+# Ajustar el modelo XGBoost al Conjunto de Entrenamiento
+from xgboost import XGBClassifier
+classifier = XGBClassifier()
+classifier.fit(X_train, y_train)
+
+# Predicción de los resultados con el Conjunto de Testing
+y_pred  = classifier.predict(X_test)
+
+# Elaborar una matriz de confusión
+from sklearn.metrics import confusion_matrix
+cm = confusion_matrix(y_test, y_pred)
+
+# Aplicar k-fold cross validation
+from sklearn.model_selection import cross_val_score
+accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
+accuracies.mean()
+accuracies.std()
+
diff --git a/...gression/Section 5 - Multiple Linear Regression/multiple_linear_regression_new_version.py b/...gression/Section 5 - Multiple Linear Regression/multiple_linear_regression_new_version.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Mar  3 13:10:07 2019
+
+@author: juangabriel
+"""
+
+# Regresión Lineal Múltiple
+
+# Cómo importar las librerías
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importar el data set
+dataset = pd.read_csv('50_Startups.csv')
+X = dataset.iloc[:, :-1].values
+y = dataset.iloc[:, 4].values
+
+
+# Codificar datos categóricos
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from sklearn.compose import make_column_transformer
+
+labelencoder_X = LabelEncoder()
+X[:, 3] = labelencoder_X.fit_transform(X[:, 3])
+onehotencoder = make_column_transformer((OneHotEncoder(), [3]), remainder = "passthrough")
+X = onehotencoder.fit_transform(X)
+
+# Evitar la trampa de las variables ficticias
+X = X[:, 1:]
+
+# Dividir el data set en conjunto de entrenamiento y conjunto de testing
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+
+
+# Escalado de variables
+"""from sklearn.preprocessing import StandardScaler
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.transform(X_test)"""
+
+# Ajustar el modelo de Regresión lineal múltiple con el conjunto de entrenamiento
+from sklearn.linear_model import LinearRegression
+regression = LinearRegression()
+regression.fit(X_train, y_train)
+
+# Predicción de los resultados en el conjunto de testing
+y_pred = regression.predict(X_test)
+
+# Construir el modelo óptimo de RLM utilizando la Eliminación hacia atrás
+import statsmodels.api as sm
+X = np.append(arr = np.ones((50,1)).astype(int), values = X, axis = 1)
+SL = 0.05
+
+#Se ha añadido el modificador .tolist() al X_opt para adaptarse a Python 3.7
+
+X_opt = X[:, [0, 1, 2, 3, 4, 5]]
+regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit()
+regression_OLS.summary()
+
+X_opt = X[:, [0, 1, 3, 4, 5]]
+regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit()
+regression_OLS.summary()
+
+X_opt = X[:, [0, 3, 4, 5]]
+regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit()
+regression_OLS.summary()
+
+X_opt = X[:, [0, 3, 5]]
+regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit()
+regression_OLS.summary()
+
+X_opt = X[:, [0, 3]]
+regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit()
+regression_OLS.summary()
diff --git a/... Regression/Section 8 - Decision Tree Regression/decission_tree_regression_new_version.py b/... Regression/Section 8 - Decision Tree Regression/decission_tree_regression_new_version.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar  7 19:04:40 2019
+
+@author: juangabriel
+"""
+
+# Regresión con Árboles de Decisión
+
+# Cómo importar las librerías
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importar el data set
+dataset = pd.read_csv('Position_Salaries.csv')
+X = dataset.iloc[:, 1:2].values
+y = dataset.iloc[:, 2].values
+
+
+# Dividir el data set en conjunto de entrenamiento y conjunto de testing
+"""
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+"""
+
+# Escalado de variables
+"""from sklearn.preprocessing import StandardScaler
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.transform(X_test)"""
+
+# Ajustar la regresión con el dataset
+from sklearn.tree import DecisionTreeRegressor
+regression = DecisionTreeRegressor(random_state = 0)
+regression.fit(X, y)
+
+# Predicción de nuestros modelos
+y_pred = regression.predict([[6.5]])
+print(y_pred)
+
+# Visualización de los resultados del Modelo Polinómico
+X_grid = np.arange(min(X), max(X), 0.1)
+X_grid = X_grid.reshape(len(X_grid), 1)
+plt.scatter(X, y, color = "red")
+plt.plot(X, regression.predict(X), color = "blue")
+plt.title("Modelo de Regresión")
+plt.xlabel("Posición del empleado")
+plt.ylabel("Sueldo (en $)")
+plt.show()
+
+
diff --git a/...- Regression/Section 9 - Random Forest Regression/random_forest_regression_new_version.py b/...- Regression/Section 9 - Random Forest Regression/random_forest_regression_new_version.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 11 19:53:04 2019
+
+@author: juangabriel
+"""
+
+# Regresión Bosques Aleatorios
+
+# Cómo importar las librerías
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# Importar el data set
+dataset = pd.read_csv('Position_Salaries.csv')
+X = dataset.iloc[:, 1:2].values
+y = dataset.iloc[:, 2].values
+
+
+# Dividir el data set en conjunto de entrenamiento y conjunto de testing
+"""
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
+"""
+
+# Escalado de variables
+"""from sklearn.preprocessing import StandardScaler
+sc_X = StandardScaler()
+X_train = sc_X.fit_transform(X_train)
+X_test = sc_X.transform(X_test)"""
+
+# Ajustar el Random Forest con el dataset
+from sklearn.ensemble import RandomForestRegressor
+regression = RandomForestRegressor(n_estimators = 300, random_state = 0)
+regression.fit(X, y)
+
+# Predicción de nuestros modelos con Random Forest
+y_pred = regression.predict([[6.5]])
+
+# Visualización de los resultados del Random Forest
+X_grid = np.arange(min(X), max(X), 0.01)
+X_grid = X_grid.reshape(len(X_grid), 1)
+plt.scatter(X, y, color = "red")
+plt.plot(X_grid, regression.predict(X_grid), color = "blue")
+plt.title("Modelo de Regresión con Random Forest")
+plt.xlabel("Posición del empleado")
+plt.ylabel("Sueldo (en $)")
+plt.show()
+
+
diff --git a/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/Apriori_Python/apriori.py b/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/Apriori_Python/apriori.py
@@ -28,4 +28,4 @@
 # Visualización de los resultados
 results = list(rules)
 
-results[4]
+print(results[4])
diff --git a/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/apriori.R b/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/apriori.R
@@ -2,7 +2,9 @@
 
 # Preprocesado de Datos
 #install.packages("arules")
-library(arules)
+library(arules) 
+library(arulesViz)
+
 dataset = read.csv("Market_Basket_Optimisation.csv", header = FALSE)
 dataset = read.transactions("Market_Basket_Optimisation.csv",
                             sep = ",", rm.duplicates = TRUE)
@@ -16,5 +18,7 @@ rules = apriori(data = dataset,
 # Visualización de los resultados
 inspect(sort(rules, by = 'lift')[1:10])
 
-
+plot(rules, method = "graph", engine = "htmlwidget")
+
+
 
diff --git a/...nguage Processing/Section 36 - Natural Language Processing/natural_language_processing.py b/...nguage Processing/Section 36 - Natural Language Processing/natural_language_processing.py
@@ -56,4 +56,5 @@
 from sklearn.metrics import confusion_matrix
 cm = confusion_matrix(y_test, y_pred)
 
+print(cm)
 (55+91)/200