Skip to content

Commit

Permalink
Clean up unsed portions of the code and also
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamTuhacek committed Oct 17, 2023
1 parent 3e35e98 commit d2e80a9
Showing 1 changed file with 17 additions and 33 deletions.
50 changes: 17 additions & 33 deletions printers_scraper.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,19 @@
import requests

import urllib.request

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import platform
from selenium.webdriver.chrome.service import Service

import json

url = 'https://itssc.rpi.edu/hc/en-us/articles/360005151451-RCS-Public-Printers-Sorted-by-Location'

# a = requests.get(url)
# print(a.text)

# header= {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
# 'AppleWebKit/537.11 (KHTML, like Gecko) '
# 'Chrome/23.0.1271.64 Safari/537.11',
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
# 'Accept-Encoding': 'none',
# 'Accept-Language': 'en-US,en;q=0.8',
# 'Connection': 'keep-alive'}

# req = urllib.request.Request(url, headers=header)

# fp = urllib.request.urlopen(url)
# mybytes = fp.read()

# mystr = mybytes.decode("utf8")
# fp.close()

# print(mystr)

#Method 2: Use selenium

options = webdriver.ChromeOptions()
#options.add_argument("--headless")
options.add_argument("--headless")

# Note: Running this program running requires downloading the most recent chromedriver version
# If on Windows, Visit https://chromedriver.chromium.org/downloads to download, put in same directory as this program
# On linux, use sudo apt-get install chromium-chromedriver
# Not sure about mac, but probably a similar approach to windows

#This is the default path on linux, but probably isn't neccacary tbh
service = Service(executable_path = r'/usr/bin/chromedriver')
if platform.system() == 'Windows':
driver = webdriver.Chrome(options=options)
Expand Down Expand Up @@ -67,16 +41,26 @@
duplex = False

printer_info = contents[i].split('</td>')

building = printer_info[0].split('<td>')[-1]
building.replace(' ','_')

room = printer_info[1].split('<td>')[-1]

if printer_info[3].split('<td>')[-1] != '&nbsp;':
color = True

printer_id = printer_info[2].split('<td>')[-1]

paper_type = printer_info[4].split('<td>')[-1]
paper_type = paper_type.replace('\u2033', '')
paper_type = paper_type.replace('\u00d7', 'x')

if printer_info[5].split('<td>')[-1] != '&nbsp;':
duplex = True

dpi = printer_info[6].split('<td>')[-1]

dpi = dpi.split()[0]

if building not in printerdict:
printerdict[building] = dict()
Expand Down

0 comments on commit d2e80a9

Please sign in to comment.