Skip to content

Commit 29b7172

Browse files
committed
1.0
0 parents  commit 29b7172

11 files changed

+829
-0
lines changed

.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
config.py
2+
.idea/
3+
captcha/
4+
mentor/
5+
crack/
6+
homepage/htmls
7+
homepage/*.xlsx
8+
gui_get_captcha/

homepage/jiaowu.py

+353
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: SSSimon Yang
5+
6+
@file: jiaowu.py
7+
@time: 2019/8/25 11:30
8+
@desc:
9+
"""
10+
import os
11+
import re
12+
import xml.etree.ElementTree as ElementTree
13+
from itertools import product
14+
from urllib.parse import urljoin
15+
16+
import openpyxl
17+
import psycopg2
18+
from lxml import etree
19+
20+
import config
21+
from main.login import login
22+
23+
day_of_week_to_number = {
24+
"星期一": 1,
25+
"星期二": 2,
26+
"星期三": 3,
27+
"星期四": 4,
28+
"星期五": 5,
29+
"星期六": 6,
30+
"星期日": 7
31+
}
32+
33+
34+
def get_all_courses():
35+
term = "2019-2020学年上学期"
36+
html = "http://jwc.swjtu.edu.cn/vatuu/CourseAction"
37+
params = {"setAction": "queryCourseList",
38+
"viewType": "",
39+
"orderType": "teachId",
40+
"orderValue": "asc",
41+
"selectAction": "QueryAll",
42+
"key1": "",
43+
"key2": "",
44+
"key3": "",
45+
"key4": "",
46+
"selectTermId": "92",
47+
"selectTermName": "2019-2020第1学期",
48+
"courseType": "all",
49+
"selectTableType": "ThisTerm",
50+
"jumpPage": "1"}
51+
s = login(user_id=config.ly_user_id, user_password=config.ly_user_password)
52+
53+
def course_page(page_number):
54+
55+
params["jumpPage"] = page_number
56+
single_res = s.get(html, params=params)
57+
single_page = etree.HTML(single_res.text)
58+
lists = single_page.xpath("//table[@class='c-tb']/tbody/tr")
59+
rows = []
60+
for i in lists:
61+
number = i.xpath("td[2]/a/font/text()")[0].strip()
62+
code = i.xpath("td[3]/a/text()")[0].strip()
63+
name = i.xpath("td[4]/a/text()")[0].strip()
64+
credit = i.xpath("td[6]/a/text()")[0].strip()
65+
course_type = i.xpath("td[7]/a/text()")[0].strip()
66+
teacher_name_list = i.xpath("td[9]/a/text()")
67+
teacher_name_list = [j.strip() for j in teacher_name_list]
68+
teacher_name = "+".join(teacher_name_list)
69+
time_location_list = i.xpath("td[11]/text()")
70+
time_location_list = [j.strip() for j in time_location_list]
71+
time_location_list = [j for j in time_location_list if j]
72+
assert len(time_location_list) % 2 == 0, f"the length of {time_location_list} is out of control"
73+
time_location = "+".join(time_location_list)
74+
classes_list = i.xpath("td[12]/a/text()")
75+
classes_list = [j.strip() for j in classes_list]
76+
classes = "+".join(classes_list)
77+
status = i.xpath("td[13]/text()")[0].strip()
78+
number_of_people, class_capacity = status.split("/")
79+
campus = i.xpath("td[14]/a/text()")[0].strip()
80+
if len(i.xpath("td[15]/p")) == 4:
81+
name_list_url = i.xpath("td[15]/p[2]/a/@href")[0]
82+
else:
83+
name_list_url = i.xpath("td[15]/p[1]/a/@href")[0]
84+
name_list_url = urljoin(html, name_list_url)
85+
rows.append(
86+
(number, code, name, credit, course_type, teacher_name,
87+
time_location, classes, number_of_people, class_capacity, campus, name_list_url, term))
88+
c.executemany("INSERT INTO course values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) on conflict do nothing", rows)
89+
90+
print(f"{page_number}{len(rows)} and {c.statusmessage}")
91+
92+
res = s.get(html, params=params)
93+
page = etree.HTML(res.text)
94+
page_total = len(page.xpath("//select[@class='btn btn-page']/option"))
95+
96+
for page_num in range(page_total):
97+
course_page(page_num + 1)
98+
conn.commit()
99+
conn.close()
100+
101+
102+
def get_all_names():
103+
s = login(user_id=config.ly_user_id, user_password=config.ly_user_password)
104+
105+
def name_single(url, course, term):
106+
single_res = s.get(url)
107+
single_page = etree.HTML(single_res.text)
108+
lists = single_page.xpath("//table[@id='table2']/tr")
109+
if not lists:
110+
return
111+
lists.pop(0)
112+
rows = []
113+
for i in lists:
114+
student_id = i.xpath("td[2]/text()")[0].strip()
115+
student_name = i.xpath("td[3]/text()")[0].strip()
116+
student_gender = i.xpath("td[4]/text()")[0].strip()
117+
student_class = i.xpath("td[5]/text()")[0].strip()
118+
rows.append((course, student_id, student_name, student_class, student_gender, term))
119+
c.executemany("INSERT INTO student_course values (%s,%s,%s,%s,%s,%s) on conflict do nothing ", rows)
120+
print(f"课程{course} {len(rows)} and {c.statusmessage}")
121+
122+
c.execute("SELECT name_list_url,number,term FROM course order by number ")
123+
results = c.fetchall()
124+
for num, line in enumerate(results):
125+
name_single(*line)
126+
print(num + 1, line[1])
127+
if num + 1 % 100 == 0:
128+
conn.commit()
129+
conn.commit()
130+
conn.close()
131+
132+
133+
def single_student_course_time_location(student_id):
134+
c.execute(f"""select c.name, c.time_location,sc.student_name
135+
from course c,student_course sc
136+
where c.number = sc.course and sc.student_id = '{student_id}'""")
137+
courses = c.fetchall()
138+
139+
course_time_locations = []
140+
for course in courses:
141+
if course[1] == '':
142+
pass
143+
temp = course[1].split("+")
144+
course_time_locations.append([course[0], temp[0], temp[1]])
145+
if len(temp) == 4:
146+
course_time_locations.append([course[0], temp[2], temp[3]])
147+
148+
results = []
149+
for course_time_location in course_time_locations:
150+
course, time, location = course_time_location
151+
temp = time.split(" ")
152+
if len(temp) == 1:
153+
weeks = temp[0]
154+
day_of_week = 0 # user zero to imply unknown day_of_week
155+
time_of_day = 0
156+
else:
157+
assert len(temp) == 3, f"the length of {temp} is out of control"
158+
weeks = temp[0]
159+
day_of_week = day_of_week_to_number[temp[1]]
160+
nums = [int(i) for i in temp[2].strip("节").split("-")]
161+
time_of_day = list(range(nums[0], nums[1] + 1))
162+
results.append([course, weeks, day_of_week, time_of_day, location])
163+
return results
164+
165+
166+
def single_student_write_to_excel(ws, results):
167+
number_of_unknown = 0
168+
for result in results:
169+
course, weeks, day_of_week, time_of_days, location = result
170+
if day_of_week == 0:
171+
ws.cell(row=15 + number_of_unknown, column=2).value = " ".join([course, weeks, "时间未知", location])
172+
number_of_unknown = number_of_unknown + 1
173+
else:
174+
for time_of_day in time_of_days:
175+
ws.cell(row=time_of_day + 1, column=day_of_week + 1).value = " ".join([course, weeks, location])
176+
177+
178+
def get_time_to_excel(student_ids, student_names, job_name, detailed=False):
179+
people_of_all = len(student_ids)
180+
results = {}
181+
for student_id, student_name in zip(student_ids, student_names):
182+
results[student_name] = single_student_course_time_location(student_id)
183+
times_for_people = {}
184+
for student_name, result in results.items():
185+
single_times = []
186+
for i in result:
187+
if i[2] == 0:
188+
pass
189+
else:
190+
for j in i[3]:
191+
single_times.append((i[2], j))
192+
times_for_people[student_name] = single_times
193+
194+
times_for_time = {}
195+
for i, j in product(range(1, 6), range(1, 14)):
196+
times_for_time[(i, j)] = [people for people, times in times_for_people.items()
197+
if (i, j) in times]
198+
199+
wb = openpyxl.load_workbook("模板.xlsx")
200+
ws1 = wb["空闲人数"]
201+
ws2 = wb["非空闲人数"]
202+
203+
for i, j in product(range(1, 6), range(1, 14)):
204+
ws1.cell(column=i + 1, row=j + 1).value = people_of_all - len(times_for_time[(i, j)])
205+
ws2.cell(column=i + 1, row=j + 1).value = len(times_for_time[(i, j)])
206+
ws1.cell(row=15, column=1).value = f"共有 {people_of_all} 人"
207+
ws2.cell(row=15, column=1).value = f"共有 {people_of_all} 人"
208+
ws3 = wb["非空闲名单"]
209+
line = 1
210+
for time, value in times_for_time.items():
211+
if value:
212+
line = line + 1
213+
ws3.cell(line, 1).value = time[0]
214+
ws3.cell(line, 2).value = time[1]
215+
ws3.cell(line, 3).value = " ".join(value)
216+
217+
if not detailed:
218+
return
219+
for student_name, result in results.items():
220+
ws = wb.copy_worksheet(wb["模板"])
221+
ws.title = student_name
222+
single_student_write_to_excel(ws, result)
223+
224+
wb.active = wb.worksheets[0]
225+
wb.remove(wb["模板"])
226+
wb.save(f"{job_name}.xlsx")
227+
228+
229+
def check(student_ids, student_names):
230+
assert len(student_ids) == len(student_names)
231+
student_ids = tuple(student_ids)
232+
c.execute(f"""select distinct sc.student_id,sc.student_name
233+
from student_course sc
234+
where student_id in {student_ids}""")
235+
ids_and_names = c.fetchall()
236+
if len(ids_and_names) == 0:
237+
return False
238+
for i, j in zip(student_ids, student_names):
239+
if (i, j) not in ids_and_names:
240+
print(i, j)
241+
return False
242+
return True
243+
244+
245+
def process_excel(path):
246+
wb = openpyxl.load_workbook(path)
247+
ws = wb[wb.sheetnames[0]]
248+
student_ids = []
249+
student_names = []
250+
for i, j in ws.iter_rows(2):
251+
student_names.append(str(i.value))
252+
student_ids.append(str(j.value))
253+
return student_ids, student_names
254+
255+
256+
def main_excel(path, job_name="", detailed=False):
257+
try:
258+
_ids, _names = process_excel(path)
259+
except ValueError:
260+
print("excel格式错误")
261+
return 1
262+
if not check(_ids, _names):
263+
print("student ids and student names are not correspond")
264+
return 2
265+
if not job_name:
266+
job_name, _ = os.path.splitext(os.path.split(path)[1])
267+
job_name = job_name + "空闲时间统计"
268+
get_time_to_excel(_ids, _names, job_name=job_name, detailed=detailed)
269+
return 0
270+
271+
272+
def main_ids_and_names(ids_and_names, job_name='', detailed=False):
273+
_ids, _names = ids_and_names
274+
if not check(_ids, _names):
275+
print("student ids and student names are not correspond")
276+
return 2
277+
if not job_name:
278+
job_name = "空闲时间统计"
279+
get_time_to_excel(_ids, _names, job_name=job_name, detailed=detailed)
280+
return 0
281+
282+
283+
def get_all_classes():
284+
s = login(user_id=config.ly_user_id, user_password=config.ly_user_password)
285+
res = s.get("http://jwc.swjtu.edu.cn/vatuu/AjaxXML?selectType=CollegeInfo&selectValue=allCollege")
286+
page = ElementTree.fromstring(res.text)
287+
college_names = [i.text for i in page.iter("college_name")]
288+
college_codes = [i.text for i in page.iter("college_code")]
289+
url = "http://jwc.swjtu.edu.cn/vatuu/PublicInfoQueryAction"
290+
data = {
291+
"setAction": "queryClass",
292+
"collegeCode": "16",
293+
"btn1": "执行查询"
294+
}
295+
for college_name, college_code in list(zip(college_names, college_codes))[2:3]:
296+
print(college_name)
297+
data["collegeCode"] = college_code
298+
res = s.post(url, data=data)
299+
page = etree.HTML(res.text)
300+
trs = page.xpath("//table[@class='table_gray']/tr")
301+
trs = trs[2:]
302+
if len(trs) <= 1:
303+
continue
304+
class_codes = [i.xpath("td[2]/text()")[0].strip() for i in trs]
305+
class_names = [i.xpath("td[3]/text()")[0].strip() for i in trs]
306+
_college_names = [i.xpath("td[4]/text()")[0].strip() for i in trs]
307+
major_names = [i.xpath("td[5]/text()")[0].strip() for i in trs]
308+
grade = [i.xpath("td[6]/text()")[0].strip() for i in trs]
309+
class_num = [i.xpath("td[7]/text()")[0].strip() for i in trs]
310+
c.executemany("INSERT INTO class values (%s,%s,%s,%s,%s,%s) ",
311+
zip(class_codes, class_names, _college_names, major_names, grade, class_num))
312+
conn.commit()
313+
314+
315+
def students():
316+
c.execute("""select distinct student_id,student_name,student_class,student_gender,major_name,college_name,grade
317+
from student_course,class
318+
where student_class = class_name""")
319+
results = [(*i[0:6], re.match(r"(20)?\d{2}", i[0]).group(), str(i[-1]),
320+
"true" if re.search(r".*\[.*\].*", i[2]) else "false") for i in c]
321+
c.executemany("INSERT INTO student values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
322+
results)
323+
conn.commit()
324+
c.execute("""select distinct student_id,student_name,student_class,student_gender
325+
from student_course
326+
where student_class not in (select class_name from class)""")
327+
results = [(*i, "null", "null", re.match(r"(20)?\d{2}", i[0]).group(), "0",
328+
"true" if re.search(r".*\[.*\].*", i[2]) else "false") for i in c]
329+
# enrol_grade change manually
330+
c.executemany("INSERT INTO student values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
331+
results)
332+
333+
conn.commit()
334+
335+
336+
if __name__ == '__main__':
337+
conn = psycopg2.connect(dbname=config.dbname, user=config.user, password=config.password, host=config.host,
338+
port=config.port)
339+
c = conn.cursor()
340+
341+
# get_all_courses()
342+
# get_all_classes()
343+
# get_all_names()
344+
# students()
345+
346+
# main_excel(path="***.xlsx", job_name="***", detailed=True)
347+
348+
# names = ['***', '***']
349+
# ids = ['**********',
350+
# '**********']
351+
# main_ids_and_names((ids, names), detailed=True, job_name="temp")
352+
353+
conn.close()

0 commit comments

Comments
 (0)