This repository has been archived by the owner on May 5, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
218 lines (175 loc) · 6.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""
Fetch all RA tracks from 1 to +oo
"""
import os
import sys
# https://github.com/apex/apex/issues/639#issuecomment-455883587
file_path = os.path.dirname(__file__)
module_path = os.path.join(file_path, "env")
sys.path.append(module_path)
from spotipy.oauth2 import SpotifyClientCredentials
from pprint import pprint
import urllib.parse
from bs4 import BeautifulSoup
import requests
import musicbrainzngs
import spotipy
import re
import boto3
dynamodb = boto3.resource("dynamodb", region_name='eu-west-1')
any_tracks = dynamodb.Table('any_tracks')
def stringified_page(url):
"""Request a webpage"""
r = requests.get(url)
if r.status_code == 200:
for hist in r.history:
if hist.status_code != 200:
raise Exception(r.status_code)
return r.text
else:
raise Exception(r.status_code)
def extract_track_info(page):
soup = BeautifulSoup(page, 'html.parser')
twitter = None
twitters = soup.find_all(text=re.compile("Twitter"))
if len(twitters) > 1:
twitter = twitters[0].parent.get('href')
return twitter
def find_artist_ra(artist):
artist = artist.replace(" ", "")
try:
p = stringified_page("https://www.residentadvisor.net/dj/" + artist)
except Exception as e:
return
return extract_track_info(p)
def find_artist_musicbrainz(artist):
artists = musicbrainzngs.search_artists(query=artist)
if artists['artist-count'] > 0:
artist = musicbrainzngs.get_artist_by_id(artists['artist-list'][0]["id"], includes=["url-rels"])
artist = artist['artist']
if 'url-relation-list' in artist and len(artist['url-relation-list']) > 1:
for link in artist['url-relation-list']:
target = link['target']
if re.search('twitter', target, re.IGNORECASE):
return target
def find_twitter(artist):
twitter_username = find_artist_musicbrainz(artist) or find_artist_ra(artist)
if twitter_username:
return urllib.parse.urlparse(twitter_username)[2].rpartition('/')[2]
def get_genres(sp, album_id, artists):
album = sp.album(album_id)
song_genres = album['genres']
print("release genres:", song_genres)
for artist in artists:
info = sp.artist(artist['id'])
song_genres = song_genres + info['genres']
song_genres = [g.replace(" ", "").replace("-", "") for g in song_genres]
print("artist genres", song_genres)
return song_genres
def remove_duplicates_insensitive(input_list):
output_list = []
marker = set()
for item in input_list:
item_low = item.lower()
if item_low not in marker: # test presence
marker.add(item_low)
output_list.append(item) # preserve order
return output_list
def find_artists_twitters(artists):
artists = [a['name'] for a in artists]
found_one = False
for i, artist in enumerate(artists):
twitter = find_twitter(artist)
if twitter:
artists[i] = twitter
found_one = found_one or True
artists = remove_duplicates_insensitive(artists)
return artists, found_one
def mark_as_tweeted(record, tweet_id):
update_expr = "set tweet_id = :tweet_id"
expr_attrs = {
':tweet_id': tweet_id
}
any_tracks.update_item(
Key={
'host': 'ra',
'id': record['Keys']['id']
},
UpdateExpression=update_expr,
ExpressionAttributeValues=expr_attrs
)
def tweet(track):
import twitter
api = twitter.Api(consumer_key=os.environ['TWITTER_CONSUMER_KEY'],
consumer_secret=os.environ['TWITTER_CONSUMER_SECRET'],
access_token_key=os.environ['TWITTER_ACCESS_TOKEN_KEY'],
access_token_secret=os.environ['TWITTER_ACCESS_TOKEN_SECRET'])
if len(track['genres']) > 0:
genres = " #%s" % ' #'.join(track['genres'])
else:
genres = ""
txt = ('Track "%s" by %s just added to the ResidentArchive %s playlist on Spotify%s #electronicmusic %s'
% (track['name'],
', '.join(track['artists']),
track['year'],
genres,
track['playlist_url'],))
print(txt)
# print("{name: >60}\t{twitter: >40}".format(name=track['name'], twitter=(twitter or "")))
try:
return api.PostUpdate(txt)
except twitter.error.TwitterError as e:
for msg in e.message:
# don't raise if tweet is duplicate (187)
if msg['code'] != 187:
raise e
def tweet_record(spotify_track, year, playlist_id):
musicbrainzngs.set_useragent("Resident Archive", "1.0", "https://residentarchive.com")
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
track = spotify.track(spotify_track)
artists, found_one = find_artists_twitters(track['artists'])
if found_one:
return tweet({
'artists': artists,
'name': track['name'],
'year': year,
'playlist_url': 'https://open.spotify.com/playlist/' + playlist_id,
'genres': get_genres(spotify, track['album']['id'], track['artists'])[:2]
})
def handle(event, context):
"""
Lambda handler
"""
if 'Records' not in event:
return
# Process last RA tracks added to DynamoDB stream
for record in event['Records']:
if record['eventSource'] != "aws:dynamodb" \
or record['eventName'] == "INSERT":
continue
image = record['dynamodb']['NewImage']
if 'release_date_year' in image \
and 'spotify_track' in image \
and 'spotify_playlist' in image \
and 'tweet_id' not in image:
year = image['release_date_year']['N']
spotify_track = image['spotify_track']['S']
spotify_playlist = image['spotify_playlist']['S']
resp = tweet_record(spotify_track, year, spotify_playlist)
if resp:
mark_as_tweeted(record['dynamodb'], resp.id)
if __name__ == "__main__":
print(handle({u'Records': [
{u'dynamodb': {u'Keys': {u'host': {u'S': u'ra'},
u'id': {u'N': u'956790'}},
u'NewImage': {u'spotify_track': {u'S': u'spotify:track:2xG4qpmeaQvLFt4AuFbKEu'},
u'spotify_playlist': {u'S': u'1VHpfwF7HNqZavzg7EIBVM'},
u'release_date_year': {u'N': u'2007'},
u'first_charted_year': {u'N': u'2006'}},
u'ApproximateCreationDateTime': 1558178610.0,
u'StreamViewType': u'NEW_AND_OLD_IMAGES'},
u'awsRegion': u'eu-west-1',
u'eventName': u'MODIFY',
u'eventSourceARN': u'arn:aws:dynamodb:eu-west-1:705440408593:table/any_tracks/stream/2019-05-06T10:02:12.102',
u'eventSource': u'aws:dynamodb'}
]}, {}))