forked from schollz/web-archiver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
39 lines (35 loc) · 1.24 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from datetime import datetime
from selenium import webdriver
CURRENT_DIRECTORY = os.getcwd()
def archiveSite(site):
if not os.path.exists('sites/' + site):
os.makedirs('sites/' + site)
dateString = datetime.now().strftime('%Y-%m-%d')
folder = './sites/' + site + '/' + dateString
if not os.path.exists(folder):
os.makedirs(folder)
os.chdir(folder)
os.system('wget --recursive --no-clobber --page-requisites --html-extension --convert-links --restrict-file-names=windows ' + site)
os.system('mv ' + site + '/* ./')
os.system('rm -rf ' + site)
browser = webdriver.Firefox()
browser.get('http://' + site)
browser.save_screenshot('screenshot.png')
browser.quit()
os.chdir(CURRENT_DIRECTORY)
'''
with open('sites','r') as f:
for site in f:
archiveSite(site.strip())
'''
with open('repos','r') as f:
for repo in f:
repo = repo.strip()
if not os.path.exists('github'):
os.makedirs('github')
if not os.path.exists('github/' + repo):
os.makedirs('github/' + repo)
os.chdir('github/' + repo)
os.system('wget https://github.com/schollz/' + repo.strip() + '/archive/master.zip')
os.chdir(CURRENT_DIRECTORY)