Skip to content

Commit

Permalink
Merge pull request #7 from vishalmhjn/dev
Browse files Browse the repository at this point in the history
Add docker and minor fixes
  • Loading branch information
vishalmhjn authored Oct 23, 2024
2 parents 35ef62e + b453339 commit 7269a97
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 78 deletions.
67 changes: 41 additions & 26 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,29 +1,38 @@
# Ignore Python cache files
__pycache__/
*.py[cod]
# Include any files or directories that you don't want to be copied to your
# container here (e.g., local build artifacts, temporary files, etc.).
#
# For more help, visit the .dockerignore file reference guide at
# https://docs.docker.com/go/build-context-dockerignore/

# Ignore virtual environment directory
.env/
venv/

# Ignore version control directories
.git/
.gitignore
.github

# Ignore Docker-related files
Dockerfile
docker-compose.yml
.dockerignore

# Ignore logs, temporary files, and compiled files
*.log
*.sqlite3
*.db
*.swp
*.swo
*.DS_Store
.coverage
**/.DS_Store
**/__pycache__
**/.venv
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.github
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/bin
**/charts
**/docker-compose*
**/compose.y*ml
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
README.md

# Ignore local configuration and environment files
.env
Expand All @@ -34,4 +43,10 @@ assets
aws
aws-scripts
purged
src-deprecated
src-deprecated
frontend
predictions
purged
model_output
docker
src/artifacts
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
src-deprecated/
aws-scripts/

!model_output/*.gitkeep
# !*.gitkeep
data/
.vscode
*.log
src/__pycache__
*.zip
src/purge/
predictions/
.env
src/artifacts/
docker
**/.DS_Store
35 changes: 19 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
# Use an official Python runtime as a parent image
FROM python:3.12-slim
# syntax=docker/dockerfile:1

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
ARG PYTHON_VERSION=3.12.3
FROM python:${PYTHON_VERSION}-slim as base

# Set the working directory in the container
WORKDIR /app
# Prevents Python from writing pyc files
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

# Install build tools, including `make`
RUN apt-get update && apt-get install -y build-essential make && rm -rf /var/lib/apt/lists/*
WORKDIR /app

# Copy the requirements file into the container at /app
COPY requirements.txt /app/
# Install dependencies
COPY requirements.txt .
RUN python -m pip install -r requirements.txt

# Install any dependencies in requirements.txt
RUN pip install --upgrade pip && pip install -r requirements.txt
# Copy the application files
COPY . .

# Copy the entire project directory into the container at /app
COPY . /app/
RUN mkdir ./predictions
RUN mkdir ./model_output

# Expose the port the app runs on (5000 for Flask)
# Expose the application port
EXPOSE 5000

WORKDIR /app/src

CMD ["sh", "-c", "python main.py -t -m knn && python app.py"]
29 changes: 10 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,21 @@ PYTHON := python3
ENV := .env
VENV := ${ENV}/bin/activate

# Check if running in a Docker container
DOCKER_ENV := $(shell [ -f /.dockerenv ] && echo 1 || echo 0)

install:
ifeq ($(DOCKER_ENV), 0)
$(PYTHON) -m venv ${ENV} && \
. ${VENV} && \
pip install -r requirements.txt
else
pip install -r requirements.txt
endif

lint:
pylint --disable=R,C ./src

format:
black src/*.py

run:
ifeq ($(DOCKER_ENV), 0)
source $(VENV) && \
cd src/ && $(PYTHON) main.py -m knn
else
cd src/ && $(PYTHON) main.py -m knn
endif
source $(VENV) && cd src/ && \
$(PYTHON) main.py -m knn -t

app:
ifeq ($(DOCKER_ENV), 0)
source $(VENV) && \
cd src/ && $(PYTHON) app.py
else
cd src/ && $(PYTHON) app.py
endif
source $(VENV) && cd src/ && \
$(PYTHON) app.py
34 changes: 21 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,22 @@ Traffic-Waves is a voluntary project focused on daily traffic predictions in Par

The project leverages ML and DL techniques to analyze historical traffic data and make predictions for daily traffic patterns in Paris. This aids in providing insights for commuters and city planners alike.

## Download and Install
## Pipeline components

The above command runs the following components:

**Data**:
- **[Data collection](src/call_data_api.py)**: Call the Open data Paris API and save the data in batches.
- **[Data processing](src/process_data.py)**: Merge the data and apply preprocessing steps to prepare data for batch predictions.

**Machine learning**:
- **[Model training](src/train.py)**: Import and train the ML model on the historical data.
- **[Predictions](src/predict.py)**: Get the one-day ahead predictions using the trained model and batch data.

**Visualization**:
- **[Dashboard](src/app.py)**: Start a flask app to display the input data and predictions for all the links.

## 1. Download and Install

To install the requirements, run the following command in the parent:

Expand All @@ -16,7 +31,7 @@ cd traffic-waves
make install
```

## Usage
### Usage

Run the data collection, processing and machine learing pipeline (with default options):
```bash
Expand All @@ -29,20 +44,13 @@ make app
```
Visit `http://127.0.0.1:5000/` in the web-browser to open the visualization dashboard.

## Pipeline components
## 2. Building and running your application using Docker

The above command runs the following components:
When you're ready with Docker installed, start your application by running:
`docker compose up --build`.

**Data**:
- **[Data collection](src/call_data_api.py)**: Call the Open data Paris API and save the data in batches.
- **[Data processing](src/process_data.py)**: Merge the data and apply preprocessing steps to prepare data for batch predictions.
Your application will be available at http://localhost:5000.

**Machine learning**:
- **[Model training](src/train.py)**: Import and train the ML model on the historical data.
- **[Predictions](src/predict.py)**: Get the one-day ahead predictions using the trained model and batch data.

**Visualization**:
- **[Dashboard](src/app.py)**: Start a flask app to display the input data and predictions for all the links.

## License
**to be updated**
19 changes: 19 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: '3.8'

volumes:
traffic-volume:
name: "traffic-volume"
driver: local

services:
traffic-app:
image: traffic-image
build:
context: .
ports:
- "5000:5000"
volumes:
- traffic-volume:/app
container_name: traffic-app
stdin_open: true
tty: true
2 changes: 1 addition & 1 deletion src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ def add_header(response):


if __name__ == "__main__":
app.run(debug=True)
app.run(host="0.0.0.0", port=5000, debug=True)
5 changes: 4 additions & 1 deletion src/call_data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def merge_data(self):
df["t_1h"] = pd.to_datetime(df["t_1h"])
assert (
str(df["t_1h"].dt.date.min()) == self.date_formatted
), f"Data for previous day is not available via API yet. For other days, manually set the offsets in API query."
), f"Data for previous day is not available via API yet. For \
other days, manually set the offsets in API query."

full_data_list.append(df)
full_data = pd.concat(full_data_list, axis=0)
Expand Down Expand Up @@ -103,6 +104,8 @@ def data_collector(
"order_by": "t_1h DESC",
"timezone": timezone,
"offset": offset,
# for date filtering: t_1h >= '2024-06-11 01:00:00' AND\
# t_1h <= '2024-06-12 00:00:00' AND iu_ac in ('5169')
}
# Example of using the dataclasses
api_handler = ParisAPIHandler(
Expand Down
4 changes: 2 additions & 2 deletions src/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def predictor(predictions_folder, file_processed_input, date_formatted, args_mod
target_as_autoregressive_feature,
target_column,
)
_ = time_series_object.load_scaler("artifacts/minmax_scaler.gz")
_ = time_series_object.load_scaler(f"{model_output}/minmax_scaler.gz")
logging.info(f"Scaler successfully loaded.")

scaled_test = time_series_object.scaler_transform(X_formatted)
Expand All @@ -47,7 +47,7 @@ def predictor(predictions_folder, file_processed_input, date_formatted, args_mod
traffic_model.load_model(f"{model_output}/{args_model}_model")
elif args_model == "xgboost":
traffic_model = XGBoostModel()
traffic_model.load_model(f"artifacts/{args_model}_model")
traffic_model.load_model(f"{model_output}/{args_model}_model")
logging.info(f"Model {args_model} successfully loaded.")

y_test_hat = traffic_model.predict_model(X_test)
Expand Down

0 comments on commit 7269a97

Please sign in to comment.