-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvideo_download.py
79 lines (56 loc) · 2.34 KB
/
video_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import yt_dlp
import os
from bs4 import BeautifulSoup
from requests_html import HTMLSession
news_base_url = 'https://vod.tvp.pl/informacje-i-publicystyka,205/wiadomosci-odcinki,273726'
base_url = 'https://vod.tvp.pl'
def load_page(url):
"""Load competition page with results and event data, but without Selenium becouse it's slow."""
session = HTMLSession()
response = session.get(url)
response.html.render(timeout=10, sleep=0.5)
return BeautifulSoup(response.html.html, 'html.parser')
def get_latest_tvp_news_url():
page = load_page(news_base_url)
latest_news_endpoint = page.find_all('a', {'class': 'tile__link'})[0].get('href')
return base_url + latest_news_endpoint
def get_all_tvp_news_urls():
page = load_page(news_base_url)
all_news_elems = page.find_all('a', {'class': 'tile__link'})
# Comment this line to downloads all of the TVP news throughout the day
evening_news_elems = [elem for elem in all_news_elems if '19:30' in elem.get('aria-label')]
latest_news_urls = [base_url + elem.get('href') for elem in evening_news_elems]
return [latest_news_urls]
def download_all_videos():
output_path = os.path.join(os.path.abspath(os.getcwd()), 'data', '%(title)s.%(ext)s')
ydl_opts = {
'format': 'bestvideo[height<=480]/best',
'outtmpl': output_path
}
latest_news_urls = get_all_tvp_news_urls()
for url in latest_news_urls:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download(url)
def download_video():
output_path = os.path.join(os.path.abspath(os.getcwd()), 'data', '%(title)s.%(ext)s')
ydl_opts = {
'format': 'bestvideo[height<=480]/best',
'outtmpl': output_path
}
latest_news_url = get_latest_tvp_news_url()
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download(latest_news_url)
def download_mp3(url):
output_path = os.path.join(os.path.abspath(os.getcwd()), 'data', '%(title)s.%(ext)s')
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': output_path
}
# latest_news_url = get_latest_tvp_news_url()
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download(url)
if __name__ == "__main__":
# urls = get_all_tvp_news_urls()
# print(urls)
url = "https://archive.org/details/johnny-rebel-full-compilation/Johnny+Rebel+Nigger+Hatin'+Me.mp3"
download_mp3(url)