Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Prereqs, Instructors, Loc, and GEs #53

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
.DS_Store

# C extensions
*.so
Expand Down
38 changes: 30 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from scipy import spatial
import os
import json
from recommend_GloVe.recommend_GloVe_average import recommend
from functools import wraps
import time
from recommend_tfidf.recommend_tfidf import recommend
# from recommend_GloVe.recommend_GloVe_average import recommend


with open("./static/courses.json", "r") as courses_file:
Expand All @@ -16,23 +19,42 @@

app = Flask(__name__)

def timer(request):
@wraps(request)
def wrapper(*args, **kwargs):
start = time.perf_counter()
response = request(*args, **kwargs)
end = time.perf_counter()
run = end - start
print(f"Time to find a recommendation: {run} sec")
return response
return wrapper

@app.route('/')
def index():
return render_template('index.html')
return render_template('index.html')

@app.route('/rec',methods=['POST'])
@timer
def getvalue():
try:
coursename = request.form['search'].split(" ")[0]
lowlvl = 'lowlvl' in request.form
dept_filter = 'dept_filter' in request.form
df = recommend([coursename], blacklist_lowerlevel=lowlvl, blacklist_dept=dept_filter)
return render_template('result.html', tables = df, course = coursename)
user_input = request.form['search']
upperlvl = 'upperlvl' in request.form
cmc = 'cmc' in request.form
pomona = 'pomona' in request.form
hmc = 'hmc' in request.form
scripps = 'scripps' in request.form
pitzer = 'pitzer' in request.form
other = 'other' in request.form
df = recommend(user_input, upperlvl=upperlvl, cmc=cmc, pomona=pomona, hmc=hmc, scripps=scripps, pitzer=pitzer, other=other)
return render_template('result.html', tables = df, course = user_input)
except Exception as e:
error = "Invalid Course ID. Please Try Again"
print(e)
error = "Invalid Description. Please Try Again"
return render_template('index.html', error = error)

@app.route('/search', methods=['POST'])
@timer
def search():
term = request.form['q']
print ('term: ', term)
Expand Down
3,475 changes: 3,475 additions & 0 deletions course/courses.csv

Large diffs are not rendered by default.

93 changes: 93 additions & 0 deletions course/courses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import requests, os, csv
from dotenv import load_dotenv
import pandas as pd

# Creates list of all course areas offered at the 5Cs

courseareas = requests.get(f'http://jicsweb.pomona.edu/api/courseareas')
coursedict = {}
for codes in courseareas.json():
coursedict.update({codes['Code']: codes['Description']})


# Generates url for all valid course areas of this semester and returns json

def reqCourseInfo(code):
payload = {}
load_dotenv()
api_key = os.environ.get("API_KEY", None)
payload["api_key"]=api_key
r = requests.get("https://jicsweb.pomona.edu/api/Courses/2023;FA/" + code, params = payload)
try:
return r.json()
except:
print(f"No courses offered in course area {code} this semester")


# Calling above function to get all valid course areas this current semester
valid = {}
for code in coursedict.keys():
if reqCourseInfo(code) is not None:
valid[code] = coursedict[code]


# Writing data to CSV

header = ['Course Area', 'CourseCode', 'Name', 'Course Description', 'Faculty', 'Campus', 'MeetTime', 'Weekdays', 'Prerequisites']

with open('courses.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
writer.writerow(header)
for current in valid:
for course in reqCourseInfo(current):
try:
CourseArea = valid[current]
CourseCode = course['CourseCode']
Name = course['Name']
Description = course['Description']
Faculty = []
reqs = Description.find("Prerequisite:")
reqs1 = Description.find("Prerequisites:")
if reqs != -1:
Prerequisites = Description[reqs + len("Prerequisite:"):]
elif reqs1 != -1:
Prerequisites = Description[reqs1 + len("Prerequisites:"):]
else:
Prerequisites = 'None'
if len(course['Instructors']) == 1:
Faculty = course['Instructors'][0]['Name']
if Faculty == ', taff':
Faculty = 'Staff'
else:
for instructor in course['Instructors']:
if instructor['Name'] == ', taff':
Faculty.append('Staff')
else:
Faculty.append(instructor['Name'])
Campus = []
MeetTime = []
Weekdays = []
if len(course['Schedules']) == 1:
Campus = course['Schedules'][0]['Campus']
MeetTime = course['Schedules'][0]['MeetTime']
Weekdays = course['Schedules'][0]['Weekdays']
else:
for schedule in course['Schedules']:
Campus.append(schedule['Campus'])
MeetTime.append(schedule['MeetTime'])
Weekdays.append(schedule['Weekdays'])
except:
print("Insufficient information on course")
data = [CourseArea, CourseCode, Name, Description, Faculty, Campus, MeetTime, Weekdays, Prerequisites]
writer.writerow(data)

df = pd.read_csv('courses.csv')
duplicates = df.loc[df.duplicated(subset=['CourseCode'], keep=False), :]
print("duplicates=", duplicates)
for idx, row in duplicates.groupby('CourseCode')['Course Area']:
concat = ', '.join(row.values)
df.loc[df['CourseCode'] == idx, 'Course Area(s)'] = concat
df.drop_duplicates(subset=['CourseCode'], keep='first', inplace=True)

df.drop('Course Area', axis=1, inplace=True)
df.to_csv('courses.csv', index=False)
31 changes: 31 additions & 0 deletions recommend_tfidf/generate_tfidf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# this file only needs to be run once to generate the tfidf vectors and save them to a pickle file

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import re

data = pd.read_csv('course/courses.csv')
data = data.dropna(subset=['CourseCode', 'Course Description'])
data = data[~data['Course Description'].str.contains(r'^[^\w]*$', flags=re.IGNORECASE)]

# Combine 'Description', 'CourseCode', 'Name', and 'Faculty' into a single entry
data['combined'] = data.apply(lambda row: ' '.join([str(row['Course Description']), str(row['CourseCode']), str(row['Name']), str(row['Faculty']), str(row['Campus']), str(row['Prerequisites'])]), axis=1)
course_descriptions = data["combined"].tolist()
course_titles = data["CourseCode"].tolist()

tfidfvectorizer = TfidfVectorizer(analyzer='word', stop_words= 'english', ngram_range=(2, 5))
vectors = tfidfvectorizer.fit_transform(course_descriptions)

print("Length of course_titles:", len(course_titles))
print("Length of vectors:", len(vectors.toarray()))

with open("recommend_tfidf/vectorizer.pkl", "wb") as f:
pickle.dump(tfidfvectorizer, f)

# Include "Name" and "Description" in the course_data DataFrame
course_data = pd.DataFrame({"course_title": course_titles, "vector": list(vectors.toarray()), "Name": data["Name"], "Description": data["Course Description"], "Instructor": data["Faculty"], "Location": data["Campus"], "Prerequisites": data["Prerequisites"], "GE(s) Satisfied": data["Course Area(s)"]})
print("course_data =", course_data)

course_data.to_pickle("recommend_tfidf/course_data.pkl")
78 changes: 78 additions & 0 deletions recommend_tfidf/recommend_tfidf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pickle
from scipy.spatial.distance import cosine
import numpy as np
import pandas as pd
import re

with open("recommend_tfidf/vectorizer.pkl", "rb") as f:
loaded_vectorizer = pickle.load(f)

with open("recommend_tfidf/course_data.pkl", "rb") as f:
loaded_course_data = pd.read_pickle(f)

loaded_course_vectors = np.array(loaded_course_data["vector"].tolist())

def similarity(vec1, vec2):
'''similarity(vec1, vec2) -> cosine similarity of vectors'''
return 1 - cosine(vec1, vec2)

def recommend(user_input, rec_num=10, upperlvl=False, cmc=True, pomona=True, hmc=True, scripps=True, pitzer=True, other=True):
query_vector = loaded_vectorizer.transform([user_input]).toarray().ravel()
similarities = []
for i in range(loaded_course_vectors.shape[0]):
similarities.append(similarity(query_vector, loaded_course_vectors[i]))

# get the indices of the most similar courses
indices = np.argsort(similarities)[::-1]

recommendations = []

# create the table with the most similar courses and their details
permited_colleges = []
default_colleges = ["CM", "PO", "HM", "SC", "PZ"]
if cmc:
permited_colleges.append("CM")
if pomona:
permited_colleges.append("PO")
if hmc:
permited_colleges.append("HM")
if scripps:
permited_colleges.append("SC")
if pitzer:
permited_colleges.append("PZ")
if other:
permited_colleges.append("JP")
permited_colleges.append("KS")
permited_colleges.append("JM")
permited_colleges.append("CH")
permited_colleges.append("PPO")
permited_colleges.append("AF")

num_added_classes = 0
for index in indices:
course_code = loaded_course_data.iloc[index]["course_title"]
course_college = re.search(r"(\w+)-", course_code).group(1)
if (course_college not in permited_colleges):
continue
course_lvl = re.search(r"\d+", course_code).group(0)
if (upperlvl and int(course_lvl) < 100):
continue
if(num_added_classes >= rec_num):
break
num_added_classes += 1
similarity_score = similarities[index]
course_details = {
"title": loaded_course_data.iloc[index]["Name"],
"description": loaded_course_data.iloc[index]["Description"],
"prerequisites": loaded_course_data.iloc[index]["Prerequisites"],
"instructor": loaded_course_data.iloc[index]["Instructor"],
"location": loaded_course_data.iloc[index]["Location"],
"ges": loaded_course_data.iloc[index]["GE(s) Satisfied"]
}
recommendations.append((course_code, similarity_score, course_details))

return recommendations

#table = recommend("machine")
#for row in table:
# print(row)
Binary file added recommend_tfidf/vectorizer.pkl
Binary file not shown.
4 changes: 2 additions & 2 deletions templates/head.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
source : function(request, response) {
$.ajax({
type: "POST",
url : "https://www.p-recs.com/search",
url : "http://127.0.0.1:5000/search",
dataType : "json",
cache: false,
data : {
Expand All @@ -33,4 +33,4 @@
minLength : 1
});
});
</script>
</script>
2 changes: 1 addition & 1 deletion templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
</div>
</div>
</body>
</html>
</html>
27 changes: 25 additions & 2 deletions templates/result.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

<div class="results">
<h3>Recs for {{ course }}</h3>

{% for table in tables %}
<div class="course-rec">
<button type="button" class="collapsible button">
Expand All @@ -33,7 +32,31 @@ <h3>Recs for {{ course }}</h3>
Prerequisites
</p>
<p>{% if table[2].prerequisites %}{{ table[2].prerequisites }}{% else %}None{% endif %}</p>
</div>
<p class="uppercase-title">
Instructors
</p>
{% if table[2].instructor %}
<p>{{ table[2].instructor }}</p>
{% else %}
<p>None</p>
{% endif %}
<p class="uppercase-title">
Location
</p>
{% if table[2].location %}
<p>{{ table[2].location }}</p>
{% else %}
<p>None</p>
{% endif %}
<p class="uppercase-title">
GE(s) Satisfied
</p>
{% if table[2].ges %}
<p>{{ table[2].ges }}</p>
{% else %}
<p>None</p>
{% endif %}

</div>
{% endfor %}
</div>
Expand Down
16 changes: 13 additions & 3 deletions templates/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,22 @@ <h1><a class="left-col" href="/">p-recs</a></h1>
<input type="text" name="search" id="searchBox" placeholder="Input course name or ID">
<!-- submit = button!; value = what shows up on the button-->
<div class="checkboxes">
<input type="checkbox" name="lowlvl" value="1"/> Remove lower level courses
<input type="checkbox" name="upperlvl" value="1"/> Remove lower level courses
<br>
<input type="checkbox" name="dept_filter" value="1"/> Remove same department courses
<input type="checkbox" checked name="cmc" value="1"/> Include CMC courses
<br>
<input type="checkbox" checked name="pomona" value="1"/> Include Pomona courses
<br>
<input type="checkbox" checked name="hmc" value="1"/> Include Harvey Mudd courses
<br>
<input type="checkbox" checked name="pitzer" value="1"/> Include Pitzer courses
<br>
<input type="checkbox" checked name="scripps" value="1"/> Include Scripps courses
<br>
<input type="checkbox" checked name="other" value="1"/> Include Other courses
</div>
<input type="submit" value="Get recs">
</form>
{% if error %}
<p class="error"><strong>Error:</strong> {{ error }}
{% endif %}
{% endif %}