Skip to content

Commit

Permalink
Initialize apis, twitter_worker; adjust model.py to model updates; up…
Browse files Browse the repository at this point in the history
…date software list
  • Loading branch information
iconix committed Aug 26, 2018
1 parent da0fb21 commit 856b830
Show file tree
Hide file tree
Showing 9 changed files with 454 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__pycache__
.vscode/
node_modules/
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,18 @@ Once generations for a new proposed tweet are available, an email will be sent t
- Sohn, K., Yan, X., Lee, H. Learning Structured Output Representation using Deep Conditional Generative Models [CVAE [paper](http://papers.nips.cc/paper/5775-learning-structured-output-representation-using-deep-conditional-generative-models.pdf)] -[](http://papers.nips.cc/paper/5775-learning-structured-output-representation-using-deep-conditional-generative-models.pdf)#vae
- Bernardo, F., Zbyszynski, M., Fiebrink, R., Grierson, M. (2016). Interactive Machine Learning for End-User Innovation [[paper](http://research.gold.ac.uk/19767/)] - #onlinelearning
- https://devcenter.heroku.com/articles/getting-started-with-python
- https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-xviii-deployment-on-heroku
- https://developer.spotify.com/documentation/web-api/quick-start/

**Software…**
- [PyTorch](https://pytorch.org/) for deep learning
- [Quilt](https://quiltdata.com/) for versioning and deploying data
- [Tweepy](https://github.com/tweepy/tweepy) or [Twython](https://github.com/ryanmcgrath/twython) for Python Twitter API access
- [Conda](https://conda.io/docs/) and [npm](https://www.npmjs.com/) for package and environment management in Python and JavaScript
- [Flask](http://flask.pocoo.org/) for a lightweight Python web (model) server
- [Express.js](https://expressjs.com/) for a lightweight Node.js web (API) server
- [Twit](https://github.com/ttezel/twit) for Node.js Twitter API access
- [Spotify Web API Node](https://github.com/thelinmichael/spotify-web-api-node) for Node.js Spotify Web API access
- [Node Google Spreadsheet](https://github.com/theoephraim/node-google-spreadsheet) for Node.js Google Sheets API access
- [Heroku](https://www.heroku.com/) free tier for deployments

## Timeline
Expand Down Expand Up @@ -116,4 +124,4 @@ _Mentor: [Natasha Jaques](https://twitter.com/natashajaques)_
- Assistance in debugging model training
- Suggestions for model enhancement

### _Follow my progress this summer with this blog's [#openai](https://iconix.github.io/tags/openai) tag, or on [GitHub](https://github.com/iconix/openai)._
### _Follow my progress this summer with my blog's [#openai](https://iconix.github.io/tags/openai) tag, or on [GitHub](https://github.com/iconix/openai)._
22 changes: 22 additions & 0 deletions apis/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// index.js - launch API endpoints that communicate with Spotify, Google Sheets, and Twitter
var express = require('express');

var spotify = require('./spotify.js');
var sheets = require('./sheets.js');

var app = express();
app.use(express.json());

// takes in a string query, returns associated genres
app.post('/get_genres', spotify.get_genres);

// takes in a tweet num, returns tweet at that index in Google Sheets
// TODO: replace with getting tweets from Twitter
app.post('/get_tweet', sheets.get_tweet);
// TODO: batching - /save_gens
app.post('/save_gen', sheets.save_gen);


port_num = 8888;
console.log(`Listening on ${port_num}`);
app.listen(port_num);
24 changes: 24 additions & 0 deletions apis/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "deephypebot_apis",
"version": "0.0.1",
"description": "APIs for pulling Twitter activity, requesting Spotify attributes, and saving generated tweets to Google Sheets",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/iconix/deephypebot.git"
},
"author": "Nadja Rhodes",
"license": "MIT",
"bugs": {
"url": "https://github.com/iconix/deephypebot/issues"
},
"homepage": "https://github.com/iconix/deephypebot#readme",
"dependencies": {
"express": "^4.16.3",
"google-spreadsheet": "^2.0.5",
"spotify-web-api-node": "^3.1.1"
}
}
166 changes: 166 additions & 0 deletions apis/sheets.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
var GoogleSpreadsheet = require('google-spreadsheet');
var async = require('async');

// spreadsheet key is the long id in the sheets URL
var doc = new GoogleSpreadsheet(process.env.DEEPHYPEBOT_SHEETS_ID);
var sheet;

var get_tweet = (req, res) => {
if (!req || !req.body) {
return res.status(400).send('Request body required');
}

var key = 'tweet_num';
var tweet_num = req.body[key];
if (tweet_num == undefined) {
return res.status(400).send(`Request JSON must contain "${key}" as a key`);
}

return get_tweet_from_sheet(res, tweet_num);
}

var save_gen = (req, res) => {
if (!req || !req.body) {
return res.status(400).send('Request body required');
}

var gen_key = 'gen';
var gen = req.body[gen_key];
if (gen == undefined) {
return res.status(400).send(`Request JSON must contain "${gen_key}" as a key`);
}

var num_key = 'tweet_num';
var num = req.body[num_key];
if (num == undefined) {
return res.status(400).send(`Request JSON must contain "${num_key}" as a key`);
}

return save_gen_to_sheet(res, num, gen);
}

// TODO: cache tweet list (although this code should all go away)
function get_tweet_from_sheet(res, num) {
sheet_title = 'gens';
col_name = 'tweet';

async.series([
set_auth,
get_worksheet,
read_column
], (err, results) => {
if (err) {
res.status(500).send(`error: ${err}`);
} else {
results = results.reduce((acc, val) => acc.concat(val), []).filter(Boolean);

if (num >= results.length) {
res.status(400).send(`tweet_num (${num}) must be less than num of tweets available (${results.length})`);
} else {
res.send(results[num]);
}
}
});
}

function save_gen_to_sheet(res, num_param, gen_param) {
sheet_title = 'gens';
col_name = 'commentary';
num = num_param;
gen = gen_param;

async.series([
set_auth,
get_worksheet,
write_column
], (err, results) => {
if (err) {
res.status(500).send(`error: ${err}`);
} else {
results = results.reduce((acc, val) => acc.concat(val), []).filter(Boolean);

if (!results) {
res.status(400).send(`tweet_num (${num}) must be less than num of tweets available (${results.length})`);
} else {
res.send(results[num]);
}
}
});
}

var set_auth = (step) => {
var creds_json = {
client_email: process.env.DEEPHYPEBOT_SHEETS_CLIENT_EMAIL,
private_key: process.env.DEEPHYPEBOT_SHEETS_PRIVATE_KEY
}

doc.useServiceAccountAuth(creds_json, step);
}

var get_worksheet = (step) => {
doc.getInfo((err, info) => {
if (err) {
step(err);
}

console.log(`loaded doc: '${info.title}' by ${info.author.email}`);

for (var ws of info.worksheets) {
if (ws.title == sheet_title) {
sheet = ws;
break;
}
}

console.log(`found sheet '${sheet.title}' ${sheet.rowCount} x ${sheet.colCount}`);
step(null);
});
}

var read_column = (step) => {
// google provides some query options
sheet.getRows({
offset: 1
}, (err, rows) => {
if (err) {
step(err);
}

console.log(`read ${rows.length} rows`);

var vals = [];

rows.forEach(row => {
vals.push(row[col_name]);
});

step(null, vals);
});
}

var write_column = (step) => {
// google provides some query options
sheet.getRows({
offset: 1
}, (err, rows) => {
if (err) {
step(err);
}

console.log(`read ${rows.length} rows`);

rows[num][col_name] = gen;

rows[num].save((err) => {
if (err) {
step(err);
}
else {
console.log('saved!');
step(null, rows[num][col_name]);
}
});
});
}

module.exports = { get_tweet: get_tweet, save_gen: save_gen };
94 changes: 94 additions & 0 deletions apis/spotify.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
var spotify_web_api = require('spotify-web-api-node');

var client_id = process.env.DEEPHYPEBOT_SPOTIFY_CLIENT_ID,
client_secret = process.env.DEEPHYPEBOT_SPOTIFY_CLIENT_SECRET;

// create the api object with the credentials
var spotify_api = new spotify_web_api({
clientId: client_id,
clientSecret: client_secret
});

var expiry_date;

// search tracks whose name, album or artist contains the query
function get_genres_by_query(query) {
return spotify_api.searchTracks(query).then((data) => {
if (data.body) {
var track = data.body['tracks']['items'][0]; // always get the first track result
var artist_ids = []
for (var artist of track['artists']) {
artist_ids.push(artist['id']);
}

// get multiple artists
return spotify_api.getArtists(artist_ids).then((data) => {
var genres = []
for (var artist of data.body['artists']) {
genres.push(...artist['genres'])
}

// unique genre list
genres = [...new Set(genres)];

return Promise.resolve(genres);
}, (err) => {
return Promise.reject(err);
});
}
}, (err) => {
return Promise.reject(err);
});
}

var get_genres = (req, res) => {
if (!req || !req.body) {
return res.status(400).send('Request body required');
}

var key = 'q';
var q = req.body[key];
if (q == undefined) {
return res.status(400).send(`Request JSON must contain "${key}" as a key`);
}

var now = new Date();

if (spotify_api.getAccessToken() && expiry_date > now) {
get_genres_by_query(q).then((search_res) => {
res.send(search_res);
}, (err) => {
if (err instanceof WebapiError) {
res.status(err.statusCode).send(err.message);
} else {
res.status(500).send(`error: ${err}`);
}
});
} else {
// retrieve an access token
spotify_api.clientCredentialsGrant().then((data) => {
expiry_date = new Date(now.getTime() + data.body['expires_in'] * 1000);

console.log(`The access token expires at ${expiry_date}`);
console.log(`The access token is ${data.body['access_token']}`);

// save the access token so that it's used in future calls
spotify_api.setAccessToken(data.body['access_token']);

get_genres_by_query(q).then((search_res) => {
res.send(search_res);
}, (err) => {
if (err instanceof WebapiError) {
res.status(err.statusCode).send(err.message);
} else {
res.status(500).send(`error: ${err}`);
}
});
},
(err) => {
res.status(500).send(`Something went wrong when retrieving an access token: ${err}`);
});
}
}

module.exports = {get_genres: get_genres};
10 changes: 5 additions & 5 deletions model/serve.py → model/model.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
# serve.py - launch a simple PyTorch model server with Flask
# model.py - launch a simple PyTorch model server with Flask

from flask import Flask, jsonify, request
import torch

from pytorchtextvae import generate # https://github.com/iconix/pytorch-text-vae

### Load my pre-trained PyTorch model from another package
### Load my pre-trained PyTorch model from another package (TODO: slow)

print('Loading model')
DEVICE = torch.device('cpu') # CPU inference
# TODO: load model from Quilt
vae, input_side, output_side, pairs, dataset, EMBED_SIZE, random_state = generate.load_model('reviews_and_metadata_5yrs_state.pt', 'reviews_and_metadata_5yrs_stored_info.pkl', '.', None, DEVICE)
num_sample, max_length, temp, print_z = 1, 50, 0.75, False
vae, input_side, output_side, pairs, dataset, Z_SIZE, random_state = generate.load_model('reviews_and_metadata_5yrs_state.pt', 'reviews_and_metadata_5yrs_stored_info.pkl', DEVICE, cache_path='.')
num_sample = 1

### Setup Flask app

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
gens, zs, conditions = generate.generate(vae, num_sample, max_length, temp, print_z, input_side, output_side, pairs, dataset, EMBED_SIZE, random_state, DEVICE)
gens, zs, conditions = generate.generate(vae, input_side, output_side, pairs, dataset, Z_SIZE, random_state, DEVICE, genres=request.json['genres'], num_sample=1)
return jsonify({'gens': str(gens), 'zs': str(zs), 'conditions': str(dataset.decode_genres(conditions[0]))})

### App error handling
Expand Down
23 changes: 23 additions & 0 deletions workers/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"name": "deephypebot_workers",
"version": "0.0.1",
"description": "Worker agents for monitoring Twitter and Google Sheets",
"main": "twitter_worker.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/iconix/deephypebot.git"
},
"author": "Nadja Rhodes",
"license": "MIT",
"bugs": {
"url": "https://github.com/iconix/deephypebot/issues"
},
"homepage": "https://github.com/iconix/deephypebot#readme",
"dependencies": {
"async": "^2.6.1",
"request": "^2.88.0"
}
}
Loading

0 comments on commit 856b830

Please sign in to comment.