Skip to content

Commit

Permalink
additional clustering, webapp development
Browse files Browse the repository at this point in the history
  • Loading branch information
kelseyfglenn committed Aug 20, 2020
1 parent 3d10216 commit ead281e
Show file tree
Hide file tree
Showing 15 changed files with 2,134 additions and 37 deletions.
393 changes: 365 additions & 28 deletions clustering.ipynb

Large diffs are not rendered by default.

114 changes: 108 additions & 6 deletions data_collection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
"version": "3.7.7-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python_defaultSpec_1597104935903",
"name": "python_defaultSpec_1597907847001",
"display_name": "Python 3.7.7 64-bit ('anaconda3': virtualenv)"
}
},
Expand All @@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -65,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -111,7 +111,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -127,6 +127,48 @@
"\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def get_artist_profile(artist_name):\n",
" url_search = \"/search?q=\" \n",
" querystring = url_api + url_search + quote(artist_name)\n",
" # get API response\n",
" response = requests.get(querystring, headers=headers)\n",
" response_artist = response.json()\n",
" return response_artist"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"prof = get_artist_profile('Drake')\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "'https://genius.com/artists/Drake'"
},
"metadata": {},
"execution_count": 19
}
],
"source": [
"prof['response']['hits'][0]['result']['primary_artist']['url']"
]
},
{
"cell_type": "code",
"execution_count": 14,
Expand All @@ -146,6 +188,25 @@
" return url_artist"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def get_artist_url(artist_name):\n",
" # generate and store url, modify artist name to remove spaces\n",
" url_search = \"/search?q=\" \n",
" querystring = url_api + url_search + quote(artist_name)\n",
" # get API response\n",
" response = requests.get(querystring, headers=headers)\n",
" response_artist = response.json()\n",
" # pull artist url -- assumes they are the primary artist in the first search result since all searches return song objects\n",
" url_artist = response_artist['response']['hits'][0]['result']['primary_artist']['url']\n",
"\n",
" return url_artist"
]
},
{
"cell_type": "code",
"execution_count": 15,
Expand Down Expand Up @@ -257,6 +318,47 @@
"# pkl.dump(artists, f)\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Pull and store artist URL's\n",
"\n",
"**note: this didn't check for primary_artist so it didn't pull properly for anyone who's not the primary artist on their first searched song\n",
"e.g. a bunch of people's names will link to Kanye\n",
"\"\"\"\n",
"all_artist_urls = {}\n",
"for artist in artists:\n",
" try:\n",
" all_artist_urls[artist] = get_artist_url(artist)\n",
" except:\n",
" all_artist_urls[artist] = None \n",
"\n",
"with open('all_artist_urls.pkl', 'wb') as f:\n",
" pkl.dump(all_artist_urls, f)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "'https://genius.com/artists/Earl-sweatshirt'"
},
"metadata": {},
"execution_count": 24
}
],
"source": [
"all_artist_urls['Earl Sweatshirt']"
]
},
{
"cell_type": "code",
"execution_count": 14,
Expand Down
6 changes: 3 additions & 3 deletions preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
"version": "3.7.7-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python_defaultSpec_1597266629003",
"name": "python_defaultSpec_1597907492795",
"display_name": "Python 3.7.7 64-bit ('base': conda)"
}
},
Expand All @@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down
Binary file added webapp/__pycache__/predictor_api.cpython-37.pyc
Binary file not shown.
Binary file added webapp/all_artist_urls.pkl
Binary file not shown.
Binary file added webapp/artist_features.pkl
Binary file not shown.
39 changes: 39 additions & 0 deletions webapp/predictor_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pandas as pd
import pickle as pkl
import numpy as np
from scipy.spatial.distance import pdist

tsne = pd.read_csv('tsne_df_v1.csv')

with open('web_features.pkl', 'rb') as f:
artist_features = pkl.load(f)

with open('scaled_df_v1.pkl', 'rb') as f:
data = pkl.load(f)

def show_stats(artist):
return artist_features.loc[artist]

def recommend(artist, n_recs=5):
"""
calculate n_recs closest artists in data by euclidean distance
"""
distances = []
target = data.loc[artist].to_numpy()
artist_list = list(data.index)
artist_list.remove(artist)

for name in artist_list:
comparison = data.loc[name].to_numpy()
stack = np.vstack((target, comparison))
dist = pdist(stack)
# dist = pairwise_distances(target, comparison)
distances.append(dist[0])

top_n_ind = np.argsort(distances)[0:n_recs]

return [artist_list[i] for i in top_n_ind]

# def show_pic(artist):

# def show_songs(artist):
63 changes: 63 additions & 0 deletions webapp/predictor_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import flask
from flask import request
from predictor_api import show_stats, recommend

import pickle as pkl

with open('all_artist_urls.pkl', 'rb') as f:
links = pkl.load(f)

app = flask.Flask(__name__)



@app.route("/", methods=["GET", "POST"])
def landing():
return flask.render_template('landing.html')

@app.route("/profile", methods=["GET", "POST"])
def get_artist_profile():
"""
load rapper picture, stats, songs
"""

# look for the name searched from the landing page
artist = request.form['artist']
query = request.args.to_dict()

if query == {}:
query = artist
else:
query = query['artist']

stats = show_stats(query)

return flask.render_template('profile.html',
stats = stats,
artist = artist)

@app.route("/recommend", methods=["GET", "POST"])
def get_recs():
"""
generate recommendations
"""
artist = request.form['artist']
query = request.args.to_dict()

if query == {}:
query = artist
else:
query = query['artist']

recs = recommend(artist)
stats = [show_stats(rec) for rec in recs]

return flask.render_template('recommender.html',
recs = recs,
stats = stats,
links = links,
artist = artist)

if __name__=='__main__':
app.run(debug=True)

Binary file added webapp/scaled_df_v1.pkl
Binary file not shown.
40 changes: 40 additions & 0 deletions webapp/templates/landing.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang="en">


<head>
</head>

<body style="background-color:#b997e0;">
<div class='tableauPlaceholder' id='viz1597813392758' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;ta&#47;tab_viz_v1&#47;Dashboard1&#47;1_rss.png' style='border: none' /></a>
</noscript><object class='tableauViz' style='display:none;'>
<param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' />
<param name='embed_code_version' value='3' />
<param name='site_root' value='' />
<param name='name' value='tab_viz_v1&#47;Dashboard1' />
<param name='tabs' value='no' />
<param name='toolbar' value='yes' />
<param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;ta&#47;tab_viz_v1&#47;Dashboard1&#47;1.png' />
<param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' />
<param name='display_spinner' value='yes' />
<param name='display_overlay' value='yes' />
<param name='display_count' value='yes' />
<param name='language' value='en' />
<param name='filter' value='publish=yes' /></object>
</div>
<script type='text/javascript'> var divElement = document.getElementById('viz1597813392758'); var vizElement = divElement.getElementsByTagName('object')[0]; if ( divElement.offsetWidth > 800 ) { vizElement.style.width='1366px';vizElement.style.height='795px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.width='1366px';vizElement.style.height='795px';} else { vizElement.style.width='100%';vizElement.style.height='977px';} var scriptElement = document.createElement('script'); scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js'; vizElement.parentNode.insertBefore(scriptElement, vizElement); </script>
<p>
RAP GEO v1
<form action='/profile' method="POST">
<input type="text" name="artist">
<input type="submit" value="Check Profile">
</form>
<form action='/recommend' method="POST">
<input type="text" name="artist">
<input type="submit" value="Get Recommendations">
</form>
</p>

</body>
</html>

48 changes: 48 additions & 0 deletions webapp/templates/profile.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en">


<head>
</head>

<body style="background-color:#b997e0;">

<p>
<b>Artist</b>:<br>
<b>{{ artist }}</b> <br>
<br>

<b>Lyrical Content</b>:<br>
{{ stats['Lyrical/Misc'].round(3) }} Misc. Lyricism <br>
{{ stats['Bragging/Combative'].round(3) }} Flexing & Threats<br>
{{ stats['Money/Party'].round(3) }} Money & Partying <br>
{{ stats['Romance/Sex'].round(3) }} Sex & Romance<br>
<br>

<b>Flow Statistics</b>:<br>
{{ (stats['unique_word_rate'] * 100).round(1) }}% Word Uniqueness <br>
{{ stats['syllables_per_line'].round(1) }} Syllables/Line<br>
<br>

<b>Style</b><br>
{{ stats['cluster'] }} <br>

<form action="/recommend" method="POST">
<input type="hidden" name="artist" value="{{ artist }}">
<input type="submit" value="Get Recommendations">
</form>
<br>

<form action='/profile' method="POST">
<input type="text" name="artist">
<input type="submit" value="Find Another Artist">
</form>

<form action='/' method="POST">
<input type="submit" value="Return to Dashboard">
</form>

</p>
</body>
</html>

Loading

0 comments on commit ead281e

Please sign in to comment.