Skip to content

Commit

Permalink
database url removed
Browse files Browse the repository at this point in the history
  • Loading branch information
tedoaba committed Oct 28, 2024
1 parent e9bde72 commit b389a06
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 53 deletions.
12 changes: 0 additions & 12 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,6 @@ services:
volumes:
- .:/app

db:
image: postgres:17
container_name: postgres_db
environment:
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data

css:
image: node:14
container_name: css_processor
Expand Down
1 change: 1 addition & 0 deletions notebooks/kaim2-week-8-9-task-2-balanced_class.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"markdown","metadata":{"_cell_guid":"c53980f3-43e6-4309-b5b3-8c387c94c2ef","_uuid":"ac363b82-57c3-4cb3-87ed-d2c3d463e1a5","trusted":true},"source":["# KAIM Week 8 and 9 Challenges"]},{"cell_type":"markdown","metadata":{"_cell_guid":"9597c964-28b3-47e3-9f51-0691092476ce","_uuid":"977dc833-b4c5-441a-9ca2-d7e8e86bedb1","trusted":true},"source":["## **Task 2: MOdel Building**"]},{"cell_type":"markdown","metadata":{"_cell_guid":"87a00a83-d488-4a01-a3c8-2d2d9f956d41","_uuid":"7ffdbba4-f7f1-4b89-92ea-16cc52919bc6","trusted":true},"source":["## Import Necessary Libraries"]},{"cell_type":"code","execution_count":1,"metadata":{"_cell_guid":"98500711-7c8e-4a73-a926-ad743b2ca99a","_uuid":"05984005-6423-4192-8e7f-856e08148d1f","collapsed":false,"execution":{"iopub.execute_input":"2024-10-18T11:28:10.886046Z","iopub.status.busy":"2024-10-18T11:28:10.885064Z","iopub.status.idle":"2024-10-18T11:28:12.528653Z","shell.execute_reply":"2024-10-18T11:28:12.527539Z","shell.execute_reply.started":"2024-10-18T11:28:10.886001Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Import necessary libraries\n","import pandas as pd\n","import numpy as np\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","from sklearn.preprocessing import StandardScaler, LabelEncoder\n","from sklearn.impute import SimpleImputer\n","from sklearn.model_selection import train_test_split\n","from sklearn.linear_model import LogisticRegression\n","from sklearn.ensemble import RandomForestClassifier\n","from xgboost import XGBClassifier\n","from sklearn.metrics import classification_report\n","from sklearn.tree import DecisionTreeClassifier\n","from sklearn.neural_network import MLPClassifier\n","from tensorflow.keras.models import Sequential\n","from tensorflow.keras.layers import Dense, Conv2D, Flatten, SimpleRNN, LSTM\n","import mlflow\n","import mlflow.sklearn\n","import mlflow.xgboost\n","import warnings\n","from imblearn.combine import SMOTETomek\n","\n","warnings.filterwarnings('ignore')\n","\n","# Set plot style for better visuals\n","sns.set(style=\"whitegrid\")"]},{"cell_type":"markdown","metadata":{"_cell_guid":"59a324ba-900b-40da-8f79-ee8f7957ef5b","_uuid":"8c78d907-d277-48a3-951f-ee7f01b6a7a7","trusted":true},"source":["## Load Datasets"]},{"cell_type":"code","execution_count":2,"metadata":{"_cell_guid":"98601976-817f-413e-ad3a-8c7eedb11606","_uuid":"b72ec9f6-c664-4339-8d29-555121dbd61f","collapsed":false,"execution":{"iopub.execute_input":"2024-10-18T11:28:15.198096Z","iopub.status.busy":"2024-10-18T11:28:15.197361Z","iopub.status.idle":"2024-10-18T11:28:20.098319Z","shell.execute_reply":"2024-10-18T11:28:20.097043Z","shell.execute_reply.started":"2024-10-18T11:28:15.198047Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[],"source":["# Load the datasets\n","fraud_data = pd.read_csv('../data/cleaned_data_1.csv')\n","credit_data = pd.read_csv('../data/cleaned_data_2.csv')"]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[{"data":{"text/plain":["((138846, 15), (283726, 31))"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["fraud_data.shape, credit_data.shape"]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[{"data":{"text/plain":["(Index(['user_id', 'purchase_value', 'source', 'browser', 'sex', 'age',\n"," 'ip_address', 'Class', 'country', 'lower_bound_ip_addres',\n"," 'upper_bound_ip_adress', 'signup_purchase_diff', 'transaction_count',\n"," 'hour_of_day', 'day_of_week'],\n"," dtype='object'),\n"," Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',\n"," 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',\n"," 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',\n"," 'Class'],\n"," dtype='object'))"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["fraud_data.columns, credit_data.columns"]},{"cell_type":"markdown","metadata":{"_cell_guid":"a1c436e7-42e4-474c-8a8f-b00592a274ff","_uuid":"79aa1d6d-a15f-4224-815b-4344da833bbf","trusted":true},"source":["## Model Building"]},{"cell_type":"code","execution_count":5,"metadata":{"_cell_guid":"a49734b2-8a44-42d2-8b30-bd5a916c17eb","_uuid":"ab90e452-bf1c-4287-a72e-c1599293b5b7","collapsed":false,"execution":{"iopub.execute_input":"2024-10-18T12:12:29.677630Z","iopub.status.busy":"2024-10-18T12:12:29.677086Z","iopub.status.idle":"2024-10-18T12:13:18.305808Z","shell.execute_reply":"2024-10-18T12:13:18.304575Z","shell.execute_reply.started":"2024-10-18T12:12:29.677583Z"},"jupyter":{"outputs_hidden":false},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["(array([0, 1], dtype=int64), array([125849, 12997], dtype=int64))\n","(array([0, 1], dtype=int64), array([283253, 473], dtype=int64))\n"]}],"source":["# Prepare data for the model (e-commerce)\n","X1 = fraud_data.drop(columns=['Class'])\n","X2 = credit_data.drop(columns=['Class'])\n","\n","y1 = fraud_data['Class']\n","y2 = credit_data['Class']\n","\n","print(np.unique(y1, return_counts=True))\n","print(np.unique(y2, return_counts=True))"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size=0.2, random_state=42)\n","\n","X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=42)\n"]},{"cell_type":"markdown","metadata":{},"source":["## Class Balancing with SMOTE"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["smt = SMOTETomek(random_state=42)\n","X_train1, y_train1 = smt.fit_resample(X_train1, y_train1)\n","X_train2, y_train2 = smt.fit_resample(X_train2, y_train2)\n","\n","print(np.unique(X_train1, return_counts=True))\n","print(np.unique(X_train2, return_counts=True))\n","print(np.unique(y_train1, return_counts=True))\n","print(np.unique(y_train2, return_counts=True))"]},{"cell_type":"markdown","metadata":{},"source":["### Logistic Regression"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":[" precision recall f1-score support\n","\n"," 0 0.91 1.00 0.95 25193\n"," 1 0.00 0.00 0.00 2577\n","\n"," accuracy 0.91 27770\n"," macro avg 0.45 0.50 0.48 27770\n","weighted avg 0.82 0.91 0.86 27770\n","\n"]}],"source":["log_reg = LogisticRegression(C=1, solver='liblinear')\n","log_reg.fit(X_train, y_train)\n","y_pred_log_reg = log_reg.predict(X_test)\n","print(classification_report(y_test, y_pred_log_reg))"]},{"cell_type":"markdown","metadata":{},"source":["### Random Forest"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":[" precision recall f1-score support\n","\n"," 0 0.96 1.00 0.98 25193\n"," 1 1.00 0.55 0.71 2577\n","\n"," accuracy 0.96 27770\n"," macro avg 0.98 0.77 0.84 27770\n","weighted avg 0.96 0.96 0.95 27770\n","\n"]}],"source":["rf_clf = RandomForestClassifier(n_estimators=30, max_depth=3)\n","rf_clf.fit(X_train, y_train)\n","y_pred_rf = rf_clf.predict(X_test)\n","print(classification_report(y_test, y_pred_rf))"]},{"cell_type":"markdown","metadata":{},"source":["### XGBoost "]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":[" precision recall f1-score support\n","\n"," 0 0.96 1.00 0.98 25193\n"," 1 0.94 0.55 0.69 2577\n","\n"," accuracy 0.96 27770\n"," macro avg 0.95 0.77 0.84 27770\n","weighted avg 0.95 0.96 0.95 27770\n","\n"]}],"source":["xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')\n","xgb_clf.fit(X_train, y_train)\n","y_pred_xgb = xgb_clf.predict(X_test)\n","print(classification_report(y_test, y_pred_xgb))"]},{"cell_type":"markdown","metadata":{},"source":["## Experiments"]},{"cell_type":"code","execution_count":50,"metadata":{},"outputs":[],"source":["datasets = {\n"," \"fraud_data\": (X_train1, y_train1, X_test1, y_test1),\n"," \"credit_data\": (X_train2, y_train2, X_test2, y_test2),\n","}"]},{"cell_type":"code","execution_count":54,"metadata":{},"outputs":[],"source":["models = [\n"," (\n"," \"Logistic Regression\", \n"," LogisticRegression(C=1, solver='liblinear')\n"," ),\n"," (\n"," \"Random Forest\", \n"," RandomForestClassifier(n_estimators=30, max_depth=3)\n"," ),\n"," (\n"," \"XGBClassifier\",\n"," XGBClassifier(use_label_encoder=False, eval_metric='logloss')\n"," )\n","]"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["2024/10/20 13:43:16 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 13:43:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic Regression at: http://localhost:5000/#/experiments/935012191474515353/runs/66416527b5ad44a0be0886ed9e43b658.\n","2024/10/20 13:43:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/935012191474515353.\n","2024/10/20 13:43:20 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 13:43:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest at: http://localhost:5000/#/experiments/935012191474515353/runs/3c3923af256d4ca4947318c3e70a6fbf.\n","2024/10/20 13:43:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/935012191474515353.\n","2024/10/20 13:43:25 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 13:43:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGBClassifier at: http://localhost:5000/#/experiments/935012191474515353/runs/019f76c5419a4e24a15a654ad2358b1c.\n","2024/10/20 13:43:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/935012191474515353.\n","2024/10/20 13:43:28 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 13:43:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGBClassifier With SMOTE at: http://localhost:5000/#/experiments/935012191474515353/runs/b89994c7190a441495941622db7bf9ac.\n","2024/10/20 13:43:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/935012191474515353.\n"]}],"source":["# Initialize MLflow\n","mlflow.set_experiment(\"Fraud Detection Models - Single Dataset\")\n","mlflow.set_tracking_uri(\"http://localhost:5000\")\n","\n","reports = []\n","\n","for model_name, model, train_set, test_set in models:\n"," X_train = train_set[0]\n"," y_train = train_set[1]\n"," X_test = test_set[0]\n"," y_test = test_set[1]\n"," \n"," model.fit(X_train, y_train)\n"," y_pred = model.predict(X_test)\n"," report = classification_report(y_test, y_pred, output_dict=True)\n"," reports.append(report)\n","\n","for i, element in enumerate(models):\n"," model_name = element[0]\n"," model = element[1]\n"," report = reports[i]\n"," \n"," with mlflow.start_run(run_name=model_name): \n"," mlflow.log_param(\"model\", model_name)\n"," mlflow.log_metric('accuracy', report['accuracy'])\n"," mlflow.log_metric('recall_class_1', report['1']['recall'])\n"," mlflow.log_metric('recall_class_0', report['0']['recall'])\n"," mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score']) \n"," \n"," if \"XGB\" in model_name:\n"," mlflow.xgboost.log_model(model, \"model\")\n"," else:\n"," mlflow.sklearn.log_model(model, \"model\") "]},{"cell_type":"code","execution_count":55,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["2024/10/20 20:16:21 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 20:16:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic Regression_fraud_data at: http://localhost:5000/#/experiments/478268722598582565/runs/8273f777699647a39535d873391b9d13.\n","2024/10/20 20:16:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/478268722598582565.\n","2024/10/20 20:16:24 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 20:16:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest_fraud_data at: http://localhost:5000/#/experiments/478268722598582565/runs/65913aaab7ce49948b90f6752b771312.\n","2024/10/20 20:16:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/478268722598582565.\n","2024/10/20 20:16:27 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 20:16:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGBClassifier_fraud_data at: http://localhost:5000/#/experiments/478268722598582565/runs/e262b2c32c8442b2a720a086f36a2f63.\n","2024/10/20 20:16:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/478268722598582565.\n","2024/10/20 20:16:52 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 20:16:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic Regression_credit_data at: http://localhost:5000/#/experiments/478268722598582565/runs/37abcc1c28b54206bffc2f7695ca9d49.\n","2024/10/20 20:16:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/478268722598582565.\n","2024/10/20 20:16:56 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 20:16:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest_credit_data at: http://localhost:5000/#/experiments/478268722598582565/runs/afef239a5b3845b082ab9368d9621edd.\n","2024/10/20 20:16:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/478268722598582565.\n","2024/10/20 20:16:59 WARNING mlflow.models.model: Model logged without a signature and input example. Please set `input_example` parameter when logging the model to auto infer the model signature.\n","2024/10/20 20:16:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGBClassifier_credit_data at: http://localhost:5000/#/experiments/478268722598582565/runs/bcfb79f52acc4f82bf61298763c208df.\n","2024/10/20 20:16:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/478268722598582565.\n"]}],"source":["# Initialize MLflow\n","mlflow.set_experiment(\"Fraud Detection Models - 2 Datasets\")\n","mlflow.set_tracking_uri(\"http://localhost:5000\")\n","\n","# Iterate through datasets\n","for dataset_name, (X_train, y_train, X_test, y_test) in datasets.items():\n"," reports = []\n","\n"," # Train each model on the current dataset\n"," for model_name, model in models:\n"," model.fit(X_train, y_train)\n"," y_pred = model.predict(X_test)\n"," report = classification_report(y_test, y_pred, output_dict=True)\n"," reports.append(report)\n","\n"," # Log each model's performance metrics to MLflow\n"," for i, (model_name, model) in enumerate(models):\n"," report = reports[i]\n","\n"," with mlflow.start_run(run_name=f\"{model_name}_{dataset_name}\"):\n"," mlflow.log_param(\"model\", model_name)\n"," mlflow.log_param(\"dataset\", dataset_name)\n"," mlflow.log_metric('accuracy', report['accuracy'])\n"," mlflow.log_metric('recall_class_1', report['1']['recall'])\n"," mlflow.log_metric('recall_class_0', report['0']['recall'])\n"," mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])\n","\n"," # Log the model using the appropriate MLflow method\n"," if \"XGBoost\" in model_name:\n"," mlflow.xgboost.log_model(model, \"model\")\n"," else:\n"," mlflow.sklearn.log_model(model, \"model\")\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"kaggle":{"accelerator":"none","dataSources":[{"datasetId":5896976,"sourceId":9653739,"sourceType":"datasetVersion"}],"dockerImageVersionId":30786,"isGpuEnabled":false,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":".week89","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.0"}},"nbformat":4,"nbformat_minor":4}
41 changes: 0 additions & 41 deletions scripts/validate_model_for_deployment.py

This file was deleted.

0 comments on commit b389a06

Please sign in to comment.