Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
rlrossiter committed Jan 22, 2021
0 parents commit 3f511b6
Show file tree
Hide file tree
Showing 17 changed files with 674 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .funcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.git*
.vscode
local.settings.json
test
.venv
132 changes: 132 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don’t work, or not
# install all needed dependencies.
#Pipfile.lock

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Azure Functions artifacts
bin
obj
appsettings.json
local.settings.json
skus.json
subscriptions.json
.python_packages
6 changes: 6 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"recommendations": [
"ms-azuretools.vscode-azurefunctions",
"ms-python.python"
]
}
12 changes: 12 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Attach to Python Functions",
"type": "python",
"request": "attach",
"port": 9091,
"preLaunchTask": "func: host start"
}
]
}
8 changes: 8 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"azureFunctions.deploySubpath": ".",
"azureFunctions.scmDoBuildDuringDeployment": true,
"azureFunctions.pythonVenv": ".venv",
"azureFunctions.projectLanguage": "Python",
"azureFunctions.projectRuntime": "~3",
"debug.internalConsoleOptions": "neverOpen"
}
26 changes: 26 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "func",
"command": "host start",
"problemMatcher": "$func-python-watch",
"isBackground": true,
"dependsOn": "pipInstall"
},
{
"label": "pipInstall",
"type": "shell",
"osx": {
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
},
"windows": {
"command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt"
},
"linux": {
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
},
"problemMatcher": []
}
]
}
15 changes: 15 additions & 0 deletions FanoutTrigger/fanout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import datetime
import json
import logging

import azure.functions as func
from scraper_pipeline.fanout import bestbuy_sku_to_url_fanout


def main(timer: func.TimerRequest, skusblob: str):
skus = json.loads(skusblob)
fanout = bestbuy_sku_to_url_fanout.BestBuySkuFanout(skus)

scrapes = fanout.fanout()

return json.dumps([s._asdict() for s in scrapes])
26 changes: 26 additions & 0 deletions FanoutTrigger/function.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"scriptFile": "fanout.py",
"bindings": [
{
"name": "timer",
"type": "timerTrigger",
"direction": "in",
"schedule": "0 * * * * *"
},
{
"name": "skusblob",
"type": "blob",
"dataType": "string",
"path": "sku/skus.json",
"connection": "ScannerStorageAccountConnection",
"direction": "in"
},
{
"name": "$return",
"type": "serviceBus",
"direction": "out",
"queueName": "trigger",
"connection": "ScannerServiceBusConnection"
}
]
}
35 changes: 35 additions & 0 deletions ProductNotifier/function.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"scriptFile": "notify.py",
"bindings": [
{
"name": "msg",
"type": "serviceBusTrigger",
"direction": "in",
"queueName": "notification",
"connection": "ScannerServiceBusConnection"
},
{
"name": "notifications",
"type": "table",
"tableName": "notification",
"take": "1000",
"connection": "ScannerStorageAccountConnection",
"direction": "in"
},
{
"name": "subscriptionsblob",
"type": "blob",
"dataType": "string",
"path": "subscriber/subscriptions.json",
"connection": "ScannerStorageAccountConnection",
"direction": "in"
},
{
"name": "$return",
"type": "table",
"tableName": "notification",
"connection": "ScannerStorageAccountConnection",
"direction": "out"
}
]
}
53 changes: 53 additions & 0 deletions ProductNotifier/notify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import json
import os
import uuid

import azure.functions as func
from scraper_pipeline.models import notification
from scraper_pipeline.models import state_change_notification
from scraper_pipeline.models import subscriber
from scraper_pipeline.notify import notifier
from scraper_pipeline.notify.plugin import gmail_notifier


def main(msg: func.ServiceBusMessage, notifications, subscriptionsblob: str):
json_str = msg.get_body().decode('utf-8')
change_notif = state_change_notification.StateChangeNotification(**json.loads(json_str))
previous_notifications = [
notification.Notification(page_id=obj['page_id'], address=obj['address'], last_notify_time=obj['last_notify_time'])
for obj in json.loads(notifications)
]

# the subscriptions are stored in the format key=sku, val=(email,wait)
# so this needs to be expanded into a list of objects that are
# Subscriber objects that have all (sku,email,wait) encapsulated in the object
subscriber_sku_mapping = json.loads(subscriptionsblob)
subscribers = []
for sku in subscriber_sku_mapping:
for email_wait in subscriber_sku_mapping[sku]:
subscribers.append(subscriber.Subscriber(page_id=sku, address=email_wait['email'], wait_seconds=email_wait['wait']))

notifier_obj = notifier.Notifier(send_mail)

new_notifications = notifier_obj.notify_change(change_notif, previous_notifications, subscribers)

notification_rows = []
for notif in new_notifications:
row_key = str(uuid.uuid4())
notification_rows.append(
{
'PartitionKey': notif.page_id,
'RowKey': row_key,
**notif._asdict()
}
)

return json.dumps(notification_rows)

def send_mail(sku, old, new, addr, metadata):
gmail = gmail_notifier.GmailNotifier(os.environ['NotifierEmail'], os.environ['NotifierPw'])

with gmail.connect():
subject = f"SKU {sku} in stock"
body = f"Link: {metadata['url']}"
gmail.send(addr, subject, body)
19 changes: 19 additions & 0 deletions ProductScraper/function.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"scriptFile": "scrape.py",
"bindings": [
{
"name": "msg",
"type": "serviceBusTrigger",
"direction": "in",
"queueName": "trigger",
"connection": "ScannerServiceBusConnection"
},
{
"name": "$return",
"type": "serviceBus",
"direction": "out",
"queueName": "status",
"connection": "ScannerServiceBusConnection"
}
]
}
18 changes: 18 additions & 0 deletions ProductScraper/scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json

import azure.functions as func
from scraper_pipeline.models import scrape
from scraper_pipeline.scrape.plugin import best_buy_processor
from scraper_pipeline.scrape import scraper

def main(msg: func.ServiceBusMessage):
scrape_json = msg.get_body().decode('utf-8')
scrape_obj = scrape.Scrape(**json.loads(scrape_json))

proc = best_buy_processor.BestBuyProcessor()
scrpr = scraper.Scraper()

status = scrpr.scrape(scrape_obj, proc)

# Clear out any possible null values
return json.dumps({k: v for k, v in status._asdict().items() if v})
Loading

0 comments on commit 3f511b6

Please sign in to comment.