Various improvements

quacs · Nov 1, 2020 · e0a7fbc · e0a7fbc
1 parent c9cace2
commit e0a7fbc
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 40 deletions.
diff --git a/.github/workflows/scrape.yml b/.github/workflows/scrape.yml
@@ -13,6 +13,11 @@ jobs:
     name: Scrapes schools per year
     runs-on: ubuntu-latest
     steps:
+      - name: Cancel Previous Runs
+        uses: styfle/[email protected]
+        with:
+          access_token: ${{ github.token }}
+
       - name: Checkout scrapers
         uses: actions/checkout@v2
         with:

diff --git a/catalog_scraper/main.py b/catalog_scraper/main.py
@@ -157,30 +157,34 @@ async def get_years() -> List[Tuple[str, str, str]]:
 
 
 async def parse_year(s, year_data):
-    year, courses_url, schools_url = year_data
-
-    if sys.argv[1] == "catalog":
-        data = {}
-        while True:
-            if courses_url is None:
-                break
-            courses_url = await scrapePage(s, courses_url, data)
-    else:
-        data = await get_schools(s, schools_url)
-        data = list(map(lambda x: {"name": x[0], "depts": x[1]}, data.items()))
-
-    years = year.split("-")
-    for directory in (f"{years[0]}09", f"{years[1]}01", f"{years[1]}05"):
-        directory = "data/" + directory
-        os.makedirs(directory, exist_ok=True)
-        with open(f"{directory}/{sys.argv[1]}.json", "w") as outfile:
-            json.dump(data, outfile, sort_keys=False, indent=2)
+    try:
+        year, courses_url, schools_url = year_data
+
+        if sys.argv[1] == "catalog":
+            data = {}
+            while True:
+                if courses_url is None:
+                    break
+                courses_url = await scrapePage(s, courses_url, data)
+        else:
+            data = await get_schools(s, schools_url)
+            data = list(map(lambda x: {"name": x[0], "depts": x[1]}, data.items()))
+
+        years = year.split("-")
+        for directory in (f"{years[0]}09", f"{years[1]}01", f"{years[1]}05"):
+            directory = "data/" + directory
+            os.makedirs(directory, exist_ok=True)
+            with open(f"{directory}/{sys.argv[1]}.json", "w") as outfile:
+                json.dump(data, outfile, sort_keys=False, indent=2)
+    except Exception as e:
+        print(year_data)
+        print(e)
+        raise e
 
 
 async def parse_years(years_data):
     async with aiohttp.ClientSession() as s:
-        for year_data in years_data:
-            await parse_year(s, year_data)
+        await asyncio.gather(*(parse_year(s, year_data) for year_data in years_data))
 
 
 years = asyncio.run(get_years())

diff --git a/prerequisites_scraper/main.py b/prerequisites_scraper/main.py
@@ -195,28 +195,31 @@ async def get_prereq_string(s, term, crn):
         return data
 
 
+async def parse_term(s, term):
+    prerequisites = {}
+
+    crns = []
+    with open(f"data/{term}/courses.json") as json_file:
+        courses = json.load(json_file)
+        for department in courses:
+            for course in department["courses"]:
+                for section in course["sections"]:
+                    crns.append(section["crn"])
+
+    for crn in tqdm(crns, desc=term):
+        try:
+            prerequisites[crn] = await get_prereq_string(s, term, crn)
+        except Exception as e:
+            print(f"CRN: {crn} - {e}")
+            prerequisites[crn] = {}
+
+    with open(f"data/{term}/prerequisites.json", "w") as outfile:
+        json.dump(prerequisites, outfile, indent=4)
+
+
 async def main():
     async with aiohttp.ClientSession() as s:
-        for term in os.listdir("data"):
-            prerequisites = {}
-
-            crns = []
-            with open(f"data/{term}/courses.json") as json_file:
-                courses = json.load(json_file)
-                for department in courses:
-                    for course in department["courses"]:
-                        for section in course["sections"]:
-                            crns.append(section["crn"])
-
-            for crn in tqdm(crns, desc=term):
-                try:
-                    prerequisites[crn] = await get_prereq_string(s, term, crn)
-                except Exception as e:
-                    print(f"CRN: {crn} - {e}")
-                    prerequisites[crn] = {}
-
-            with open(f"data/{term}/prerequisites.json", "w") as outfile:
-                json.dump(prerequisites, outfile, indent=4)
+        await asyncio.gather(*(parse_term(s, term) for term in os.listdir("data")))
 
 
 if __name__ == "__main__":