Skip to content

Commit

Permalink
Revison 0.1 - Adding new support for AI and LLM with Support Vector M…
Browse files Browse the repository at this point in the history
…achine
  • Loading branch information
paritoshtripathi935 committed Feb 2, 2024
1 parent 69af838 commit 1d90505
Show file tree
Hide file tree
Showing 40 changed files with 217 additions and 99 deletions.
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion data/Amazon/models/model.pkl

This file was deleted.

1 change: 0 additions & 1 deletion data/Amazon/processed/data.csv

This file was deleted.

1 change: 0 additions & 1 deletion data/Amazon/raw/data.csv

This file was deleted.

5 changes: 0 additions & 5 deletions data/DataScraping.py

This file was deleted.

File renamed without changes.
File renamed without changes.
75 changes: 0 additions & 75 deletions data/models/.ipynb_checkpoints/Model_1-checkpoint.ipynb

This file was deleted.

14 changes: 0 additions & 14 deletions shScripts/startScraping.sh

This file was deleted.

3 changes: 3 additions & 0 deletions src/data_scraping/Amazon/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from scripts.main import Scraper

__all__ = ['Scraper']
Binary file added src/data_scraping/Amazon/amazon.db
Binary file not shown.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def main(self, keyword, number_of_threads):
for keyword in product_categories:
scraper.main(keyword, number_of_threads)
scraper.pagination = 1
# make new request session
SeleniumScraper.reqSession = requests.Session()

scraper.db.removeDuplicates()
Expand Down
File renamed without changes.
File renamed without changes.
Binary file added src/data_scraping/Flipkart/flipkart.db
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
98 changes: 98 additions & 0 deletions src/eda/EDA.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The database amazon.db does not exist\n"
]
},
{
"ename": "SystemExit",
"evalue": "1",
"output_type": "error",
"traceback": [
"An exception has occurred, use %tb to see the full traceback.\n",
"\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 1\n"
]
}
],
"source": [
"# connect with the database\n",
"import sqlite3\n",
"import os\n",
"import sys\n",
"import time\n",
"import datetime\n",
"sys.path.append(\"src\")\n",
"\n",
"def connect(db_name: str):\n",
" # check if the database exists\n",
" if not os.path.exists(db_name):\n",
" print(f\"The database {db_name} does not exist\")\n",
" sys.exit(1)\n",
" \n",
" conn = sqlite3.connect(db_name)\n",
" cursor = conn.cursor()\n",
" return cursor\n",
"\n",
"def generate_stats(database: str):\n",
" stats = {}\n",
" cursor = connect(database)\n",
" cursor.execute(\"SELECT name FROM sqlite_master WHERE type='table';\")\n",
" tables = cursor.fetchall()\n",
" stats[\"tables\"] = tables\n",
" print(f\"Tables in the database: {tables}\")\n",
" \n",
" for table in tables:\n",
" cursor.execute(f\"SELECT COUNT(*) FROM {table[0]}\")\n",
" rows = cursor.fetchall()\n",
" print(f\"Total rows in {table[0]}: {rows[0][0]}\")\n",
" stats[table[0]] = rows[0][0]\n",
" \n",
" for table in tables:\n",
" cursor.execute(f\"PRAGMA table_info({table[0]})\")\n",
" columns = cursor.fetchall()\n",
" print(f\"Columns in {table[0]}: {len(columns)}\")\n",
" stats[table[0]] = columns\n",
" \n",
" cursor.execute(\"SELECT COUNT(DISTINCT sku) FROM products\")\n",
" products = cursor.fetchall()\n",
" print(f\"Total unique products: {products[0][0]}\")\n",
" stats[\"products\"] = products[0][0]\n",
" \n",
" return stats\n",
" \n",
"\n",
"\n",
"generate_stats(\"amazon.db\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
File renamed without changes.
34 changes: 34 additions & 0 deletions src/eda/eda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# connect with the database
import sqlite3
import os
import sys
import time
import datetime

def connect(db_name: str):
# check if the database exists
if not os.path.exists(db_name):
print(f"The database {db_name} does not exist")
sys.exit(1)

conn = sqlite3.connect(db_name)
cursor = conn.cursor()
return cursor

def generate_stats(database: str):
stats = {}
cursor = connect(database)
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
stats["tables"] = tables
print(f"Tables in the database: {tables}")


cursor.execute("SELECT COUNT(DISTINCT sku) FROM products")
products = cursor.fetchall()
print(f"Total unique products: {products[0][0]}")
stats["products"] = products[0][0]

return stats


File renamed without changes.
File renamed without changes.
File renamed without changes.
82 changes: 82 additions & 0 deletions src/utils/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#the file is for logging the information of the program
import logging
import os
import sys
import time
import datetime
import colorama
from colorama import Fore, Style
colorama.init(autoreset=True)

def create_logger():
# create a logger
logger = logging.getLogger("my_logger")
logger.setLevel(logging.DEBUG)

# create /logs directory if it does not exist
if not os.path.exists("logs"):
os.makedirs("logs")

# create a file handler
log_file = os.path.join("logs", "log_" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + ".log")
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.DEBUG)

# create a formatter and set the formatter for the handler
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
file_handler.setFormatter(formatter)

# add the handler to the logger
logger.addHandler(file_handler)

# log to the console
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.DEBUG)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

logger.info(f'Log Created. Log file: {log_file}')

return logger

def handle_logging(message, message_type="info", env="dev"):
"""
Log messages with color-coded formatting based on the message type.
Parameters:
- message (str): The message to be logged.
- message_type (str, optional): The type of message. Possible values are 'info', 'warning', or 'error'.
Defaults to 'info'.
Notes:
- This function checks the environment (assumed to be stored in the global variable ENV).
- In a production environment ('prod'), it uses the logging module for consistent logging.
- In a non-production environment, it prints directly to the console with color-coded formatting.
Example:
>>> handling_logging("This is an informational message", message_type='info')
>>> handling_logging("This is a warning message", message_type='warning')
>>> handling_logging("This is an error message", message_type='error')
"""

logger = create_logger()

if env == "production":

if message_type == "error":
logger.error(f"{Fore.RED}ERROR: {message}{Style.RESET_ALL}")
elif message_type == "info":
logger.info(f"{Fore.GREEN}INFO: {message}{Style.RESET_ALL}")
elif message_type == "warning":
logger.warning(f"{Fore.YELLOW}WARNING: {message}{Style.RESET_ALL}")

else:
if message_type == "error":
print(f"{Fore.RED}{Style.BRIGHT}ERROR: {message}{Style.RESET_ALL}")

elif message_type == "info":
print(f"{Fore.GREEN}INFO: {message}{Style.RESET_ALL}")

elif message_type == "warning":
print(f"{Fore.YELLOW}WARNING: {message}{Style.RESET_ALL}")

1 change: 0 additions & 1 deletion src/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
# Sample utility source code

0 comments on commit 1d90505

Please sign in to comment.