-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.rb
70 lines (52 loc) · 2.13 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
require 'nokogiri'
require 'open-uri'
class Scraper
@@courses = File.open("all_courses", "rb") {|io| io.read}
def self.courses_to_hasharray
# Get a Nokogiri::HTML:Document for the page we’re interested in...
doc = Nokogiri::HTML(open('http://oyc.yale.edu/courses'))
courses = []
#need to discard first row (th)
doc.css('tr').drop(1).each do |row|
courses.push({:professor => row.css('td.views-field-field-course-professors-name-value').text.strip!,
:number => row.css('td.views-field-field-course-number-value').css('a').text,
:link => row.css('td.views-field-field-course-number-value').css('a')[0]['href'],
:department => row.css('td.views-field-field-course-department-nid').css('a')[1].text,
:department_link => row.css('td.views-field-field-course-department-nid').css('a')[1]['href'],
:title => row.css('td.views-field-title').text.strip!})
end
return courses
end
def self.get_sessions(hasharray)
hasharray.each_with_index do |course_hash, i|
url = 'http://oyc.yale.edu' + course_hash[:link] + '#sessions'
doc = Nokogiri::HTML(open(url))
sessions = []
doc.css('tbody').css('tr').each do |row|
sessions.push({:title => row.css('td.views-field-field-session-display-title-value').css('a')[0].text,
:link => row.css('td.views-field-field-session-display-title-value').css('a')[0]['href']})
end
course_hash[:sessions]=sessions
course_hash[:time]=sessions.length*2 #Time in hours for (1hr video + 1hr study)/lecture
hasharray[i] = course_hash
end
return hasharray
end
def self.coursearray
return get_sessions(courses_to_hasharray)
end
#have a :changed bool value in the session that says whether the schedule needs to be updated.
#Change when you post from either shed or avail
#Then, re run algo/schedule function...
#later, make droppable by starting part way in...
#make a :my_courses array of indexes on post
def self.update
File.open('all_courses', 'w') {|file| file.write(self.coursearray)}
@@courses = File.open("all_courses", "rb") {|io| io.read}
puts @@courses
puts @@courses.class
end
def self.courses
@@courses
end
end