-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtask12.py
38 lines (32 loc) · 1.16 KB
/
task12.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os,json,requests,time,random,task1
from bs4 import BeautifulSoup
from pprint import pprint
def movie_Cast(moviesLst):
finalDict={}
for movieDic in moviesLst:
link=movieDic["link"]
movieId=link[-10:-1]
# print(movieId)
# a=https://www.imdb.com/title/tt0066763/fullcredits?ref_=tt_cl_sm#cast
if os.path.exists("/home/navgurukul/Desktop/rAju/data/castData/"+movieId+"_cast.json"):
with open("/home/navgurukul/Desktop/rAju/data/castData/"+movieId+"_cast.json") as file:
data=json.loads(file.read())
finalDict[movieId]=data
else:
time.sleep(random.randint(1,5))
page=requests.get(link+"fullcredits?ref_=tt_cl_sm#cast").text
page0=BeautifulSoup(page,"html.parser")
castLst=page0.find("table",class_="cast_list")
all_td=castLst.find_all("td",class_="")
finalLst=[]
for td in all_td:
dic={}
dic["imdb_id"]=td.a["href"][-10:-1]
dic["name"]=td.text.strip()
finalLst.append(dic)
with open("/home/navgurukul/Desktop/rAju/data/castData/"+movieId+"_cast.json","w") as file:
file.write(json.dumps(finalLst))
finalDict[movieId]=finalLst
return finalDict
# moviesLst=task1.top_250movies()
# pprint(movie_Cast(moviesLst))