|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +@author: SSSimon Yang |
| 5 | + |
| 6 | +@file: jiaowu.py |
| 7 | +@time: 2019/8/25 11:30 |
| 8 | +@desc: |
| 9 | +""" |
| 10 | +import os |
| 11 | +import re |
| 12 | +import xml.etree.ElementTree as ElementTree |
| 13 | +from itertools import product |
| 14 | +from urllib.parse import urljoin |
| 15 | + |
| 16 | +import openpyxl |
| 17 | +import psycopg2 |
| 18 | +from lxml import etree |
| 19 | + |
| 20 | +import config |
| 21 | +from main.login import login |
| 22 | + |
| 23 | +day_of_week_to_number = { |
| 24 | + "星期一": 1, |
| 25 | + "星期二": 2, |
| 26 | + "星期三": 3, |
| 27 | + "星期四": 4, |
| 28 | + "星期五": 5, |
| 29 | + "星期六": 6, |
| 30 | + "星期日": 7 |
| 31 | +} |
| 32 | + |
| 33 | + |
| 34 | +def get_all_courses(): |
| 35 | + term = "2019-2020学年上学期" |
| 36 | + html = "http://jwc.swjtu.edu.cn/vatuu/CourseAction" |
| 37 | + params = {"setAction": "queryCourseList", |
| 38 | + "viewType": "", |
| 39 | + "orderType": "teachId", |
| 40 | + "orderValue": "asc", |
| 41 | + "selectAction": "QueryAll", |
| 42 | + "key1": "", |
| 43 | + "key2": "", |
| 44 | + "key3": "", |
| 45 | + "key4": "", |
| 46 | + "selectTermId": "92", |
| 47 | + "selectTermName": "2019-2020第1学期", |
| 48 | + "courseType": "all", |
| 49 | + "selectTableType": "ThisTerm", |
| 50 | + "jumpPage": "1"} |
| 51 | + s = login(user_id=config.ly_user_id, user_password=config.ly_user_password) |
| 52 | + |
| 53 | + def course_page(page_number): |
| 54 | + |
| 55 | + params["jumpPage"] = page_number |
| 56 | + single_res = s.get(html, params=params) |
| 57 | + single_page = etree.HTML(single_res.text) |
| 58 | + lists = single_page.xpath("//table[@class='c-tb']/tbody/tr") |
| 59 | + rows = [] |
| 60 | + for i in lists: |
| 61 | + number = i.xpath("td[2]/a/font/text()")[0].strip() |
| 62 | + code = i.xpath("td[3]/a/text()")[0].strip() |
| 63 | + name = i.xpath("td[4]/a/text()")[0].strip() |
| 64 | + credit = i.xpath("td[6]/a/text()")[0].strip() |
| 65 | + course_type = i.xpath("td[7]/a/text()")[0].strip() |
| 66 | + teacher_name_list = i.xpath("td[9]/a/text()") |
| 67 | + teacher_name_list = [j.strip() for j in teacher_name_list] |
| 68 | + teacher_name = "+".join(teacher_name_list) |
| 69 | + time_location_list = i.xpath("td[11]/text()") |
| 70 | + time_location_list = [j.strip() for j in time_location_list] |
| 71 | + time_location_list = [j for j in time_location_list if j] |
| 72 | + assert len(time_location_list) % 2 == 0, f"the length of {time_location_list} is out of control" |
| 73 | + time_location = "+".join(time_location_list) |
| 74 | + classes_list = i.xpath("td[12]/a/text()") |
| 75 | + classes_list = [j.strip() for j in classes_list] |
| 76 | + classes = "+".join(classes_list) |
| 77 | + status = i.xpath("td[13]/text()")[0].strip() |
| 78 | + number_of_people, class_capacity = status.split("/") |
| 79 | + campus = i.xpath("td[14]/a/text()")[0].strip() |
| 80 | + if len(i.xpath("td[15]/p")) == 4: |
| 81 | + name_list_url = i.xpath("td[15]/p[2]/a/@href")[0] |
| 82 | + else: |
| 83 | + name_list_url = i.xpath("td[15]/p[1]/a/@href")[0] |
| 84 | + name_list_url = urljoin(html, name_list_url) |
| 85 | + rows.append( |
| 86 | + (number, code, name, credit, course_type, teacher_name, |
| 87 | + time_location, classes, number_of_people, class_capacity, campus, name_list_url, term)) |
| 88 | + c.executemany("INSERT INTO course values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) on conflict do nothing", rows) |
| 89 | + |
| 90 | + print(f"{page_number} 页 {len(rows)} and {c.statusmessage}") |
| 91 | + |
| 92 | + res = s.get(html, params=params) |
| 93 | + page = etree.HTML(res.text) |
| 94 | + page_total = len(page.xpath("//select[@class='btn btn-page']/option")) |
| 95 | + |
| 96 | + for page_num in range(page_total): |
| 97 | + course_page(page_num + 1) |
| 98 | + conn.commit() |
| 99 | + conn.close() |
| 100 | + |
| 101 | + |
| 102 | +def get_all_names(): |
| 103 | + s = login(user_id=config.ly_user_id, user_password=config.ly_user_password) |
| 104 | + |
| 105 | + def name_single(url, course, term): |
| 106 | + single_res = s.get(url) |
| 107 | + single_page = etree.HTML(single_res.text) |
| 108 | + lists = single_page.xpath("//table[@id='table2']/tr") |
| 109 | + if not lists: |
| 110 | + return |
| 111 | + lists.pop(0) |
| 112 | + rows = [] |
| 113 | + for i in lists: |
| 114 | + student_id = i.xpath("td[2]/text()")[0].strip() |
| 115 | + student_name = i.xpath("td[3]/text()")[0].strip() |
| 116 | + student_gender = i.xpath("td[4]/text()")[0].strip() |
| 117 | + student_class = i.xpath("td[5]/text()")[0].strip() |
| 118 | + rows.append((course, student_id, student_name, student_class, student_gender, term)) |
| 119 | + c.executemany("INSERT INTO student_course values (%s,%s,%s,%s,%s,%s) on conflict do nothing ", rows) |
| 120 | + print(f"课程{course} {len(rows)} and {c.statusmessage}") |
| 121 | + |
| 122 | + c.execute("SELECT name_list_url,number,term FROM course order by number ") |
| 123 | + results = c.fetchall() |
| 124 | + for num, line in enumerate(results): |
| 125 | + name_single(*line) |
| 126 | + print(num + 1, line[1]) |
| 127 | + if num + 1 % 100 == 0: |
| 128 | + conn.commit() |
| 129 | + conn.commit() |
| 130 | + conn.close() |
| 131 | + |
| 132 | + |
| 133 | +def single_student_course_time_location(student_id): |
| 134 | + c.execute(f"""select c.name, c.time_location,sc.student_name |
| 135 | + from course c,student_course sc |
| 136 | + where c.number = sc.course and sc.student_id = '{student_id}'""") |
| 137 | + courses = c.fetchall() |
| 138 | + |
| 139 | + course_time_locations = [] |
| 140 | + for course in courses: |
| 141 | + if course[1] == '': |
| 142 | + pass |
| 143 | + temp = course[1].split("+") |
| 144 | + course_time_locations.append([course[0], temp[0], temp[1]]) |
| 145 | + if len(temp) == 4: |
| 146 | + course_time_locations.append([course[0], temp[2], temp[3]]) |
| 147 | + |
| 148 | + results = [] |
| 149 | + for course_time_location in course_time_locations: |
| 150 | + course, time, location = course_time_location |
| 151 | + temp = time.split(" ") |
| 152 | + if len(temp) == 1: |
| 153 | + weeks = temp[0] |
| 154 | + day_of_week = 0 # user zero to imply unknown day_of_week |
| 155 | + time_of_day = 0 |
| 156 | + else: |
| 157 | + assert len(temp) == 3, f"the length of {temp} is out of control" |
| 158 | + weeks = temp[0] |
| 159 | + day_of_week = day_of_week_to_number[temp[1]] |
| 160 | + nums = [int(i) for i in temp[2].strip("节").split("-")] |
| 161 | + time_of_day = list(range(nums[0], nums[1] + 1)) |
| 162 | + results.append([course, weeks, day_of_week, time_of_day, location]) |
| 163 | + return results |
| 164 | + |
| 165 | + |
| 166 | +def single_student_write_to_excel(ws, results): |
| 167 | + number_of_unknown = 0 |
| 168 | + for result in results: |
| 169 | + course, weeks, day_of_week, time_of_days, location = result |
| 170 | + if day_of_week == 0: |
| 171 | + ws.cell(row=15 + number_of_unknown, column=2).value = " ".join([course, weeks, "时间未知", location]) |
| 172 | + number_of_unknown = number_of_unknown + 1 |
| 173 | + else: |
| 174 | + for time_of_day in time_of_days: |
| 175 | + ws.cell(row=time_of_day + 1, column=day_of_week + 1).value = " ".join([course, weeks, location]) |
| 176 | + |
| 177 | + |
| 178 | +def get_time_to_excel(student_ids, student_names, job_name, detailed=False): |
| 179 | + people_of_all = len(student_ids) |
| 180 | + results = {} |
| 181 | + for student_id, student_name in zip(student_ids, student_names): |
| 182 | + results[student_name] = single_student_course_time_location(student_id) |
| 183 | + times_for_people = {} |
| 184 | + for student_name, result in results.items(): |
| 185 | + single_times = [] |
| 186 | + for i in result: |
| 187 | + if i[2] == 0: |
| 188 | + pass |
| 189 | + else: |
| 190 | + for j in i[3]: |
| 191 | + single_times.append((i[2], j)) |
| 192 | + times_for_people[student_name] = single_times |
| 193 | + |
| 194 | + times_for_time = {} |
| 195 | + for i, j in product(range(1, 6), range(1, 14)): |
| 196 | + times_for_time[(i, j)] = [people for people, times in times_for_people.items() |
| 197 | + if (i, j) in times] |
| 198 | + |
| 199 | + wb = openpyxl.load_workbook("模板.xlsx") |
| 200 | + ws1 = wb["空闲人数"] |
| 201 | + ws2 = wb["非空闲人数"] |
| 202 | + |
| 203 | + for i, j in product(range(1, 6), range(1, 14)): |
| 204 | + ws1.cell(column=i + 1, row=j + 1).value = people_of_all - len(times_for_time[(i, j)]) |
| 205 | + ws2.cell(column=i + 1, row=j + 1).value = len(times_for_time[(i, j)]) |
| 206 | + ws1.cell(row=15, column=1).value = f"共有 {people_of_all} 人" |
| 207 | + ws2.cell(row=15, column=1).value = f"共有 {people_of_all} 人" |
| 208 | + ws3 = wb["非空闲名单"] |
| 209 | + line = 1 |
| 210 | + for time, value in times_for_time.items(): |
| 211 | + if value: |
| 212 | + line = line + 1 |
| 213 | + ws3.cell(line, 1).value = time[0] |
| 214 | + ws3.cell(line, 2).value = time[1] |
| 215 | + ws3.cell(line, 3).value = " ".join(value) |
| 216 | + |
| 217 | + if not detailed: |
| 218 | + return |
| 219 | + for student_name, result in results.items(): |
| 220 | + ws = wb.copy_worksheet(wb["模板"]) |
| 221 | + ws.title = student_name |
| 222 | + single_student_write_to_excel(ws, result) |
| 223 | + |
| 224 | + wb.active = wb.worksheets[0] |
| 225 | + wb.remove(wb["模板"]) |
| 226 | + wb.save(f"{job_name}.xlsx") |
| 227 | + |
| 228 | + |
| 229 | +def check(student_ids, student_names): |
| 230 | + assert len(student_ids) == len(student_names) |
| 231 | + student_ids = tuple(student_ids) |
| 232 | + c.execute(f"""select distinct sc.student_id,sc.student_name |
| 233 | + from student_course sc |
| 234 | + where student_id in {student_ids}""") |
| 235 | + ids_and_names = c.fetchall() |
| 236 | + if len(ids_and_names) == 0: |
| 237 | + return False |
| 238 | + for i, j in zip(student_ids, student_names): |
| 239 | + if (i, j) not in ids_and_names: |
| 240 | + print(i, j) |
| 241 | + return False |
| 242 | + return True |
| 243 | + |
| 244 | + |
| 245 | +def process_excel(path): |
| 246 | + wb = openpyxl.load_workbook(path) |
| 247 | + ws = wb[wb.sheetnames[0]] |
| 248 | + student_ids = [] |
| 249 | + student_names = [] |
| 250 | + for i, j in ws.iter_rows(2): |
| 251 | + student_names.append(str(i.value)) |
| 252 | + student_ids.append(str(j.value)) |
| 253 | + return student_ids, student_names |
| 254 | + |
| 255 | + |
| 256 | +def main_excel(path, job_name="", detailed=False): |
| 257 | + try: |
| 258 | + _ids, _names = process_excel(path) |
| 259 | + except ValueError: |
| 260 | + print("excel格式错误") |
| 261 | + return 1 |
| 262 | + if not check(_ids, _names): |
| 263 | + print("student ids and student names are not correspond") |
| 264 | + return 2 |
| 265 | + if not job_name: |
| 266 | + job_name, _ = os.path.splitext(os.path.split(path)[1]) |
| 267 | + job_name = job_name + "空闲时间统计" |
| 268 | + get_time_to_excel(_ids, _names, job_name=job_name, detailed=detailed) |
| 269 | + return 0 |
| 270 | + |
| 271 | + |
| 272 | +def main_ids_and_names(ids_and_names, job_name='', detailed=False): |
| 273 | + _ids, _names = ids_and_names |
| 274 | + if not check(_ids, _names): |
| 275 | + print("student ids and student names are not correspond") |
| 276 | + return 2 |
| 277 | + if not job_name: |
| 278 | + job_name = "空闲时间统计" |
| 279 | + get_time_to_excel(_ids, _names, job_name=job_name, detailed=detailed) |
| 280 | + return 0 |
| 281 | + |
| 282 | + |
| 283 | +def get_all_classes(): |
| 284 | + s = login(user_id=config.ly_user_id, user_password=config.ly_user_password) |
| 285 | + res = s.get("http://jwc.swjtu.edu.cn/vatuu/AjaxXML?selectType=CollegeInfo&selectValue=allCollege") |
| 286 | + page = ElementTree.fromstring(res.text) |
| 287 | + college_names = [i.text for i in page.iter("college_name")] |
| 288 | + college_codes = [i.text for i in page.iter("college_code")] |
| 289 | + url = "http://jwc.swjtu.edu.cn/vatuu/PublicInfoQueryAction" |
| 290 | + data = { |
| 291 | + "setAction": "queryClass", |
| 292 | + "collegeCode": "16", |
| 293 | + "btn1": "执行查询" |
| 294 | + } |
| 295 | + for college_name, college_code in list(zip(college_names, college_codes))[2:3]: |
| 296 | + print(college_name) |
| 297 | + data["collegeCode"] = college_code |
| 298 | + res = s.post(url, data=data) |
| 299 | + page = etree.HTML(res.text) |
| 300 | + trs = page.xpath("//table[@class='table_gray']/tr") |
| 301 | + trs = trs[2:] |
| 302 | + if len(trs) <= 1: |
| 303 | + continue |
| 304 | + class_codes = [i.xpath("td[2]/text()")[0].strip() for i in trs] |
| 305 | + class_names = [i.xpath("td[3]/text()")[0].strip() for i in trs] |
| 306 | + _college_names = [i.xpath("td[4]/text()")[0].strip() for i in trs] |
| 307 | + major_names = [i.xpath("td[5]/text()")[0].strip() for i in trs] |
| 308 | + grade = [i.xpath("td[6]/text()")[0].strip() for i in trs] |
| 309 | + class_num = [i.xpath("td[7]/text()")[0].strip() for i in trs] |
| 310 | + c.executemany("INSERT INTO class values (%s,%s,%s,%s,%s,%s) ", |
| 311 | + zip(class_codes, class_names, _college_names, major_names, grade, class_num)) |
| 312 | + conn.commit() |
| 313 | + |
| 314 | + |
| 315 | +def students(): |
| 316 | + c.execute("""select distinct student_id,student_name,student_class,student_gender,major_name,college_name,grade |
| 317 | + from student_course,class |
| 318 | + where student_class = class_name""") |
| 319 | + results = [(*i[0:6], re.match(r"(20)?\d{2}", i[0]).group(), str(i[-1]), |
| 320 | + "true" if re.search(r".*\[.*\].*", i[2]) else "false") for i in c] |
| 321 | + c.executemany("INSERT INTO student values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", |
| 322 | + results) |
| 323 | + conn.commit() |
| 324 | + c.execute("""select distinct student_id,student_name,student_class,student_gender |
| 325 | + from student_course |
| 326 | + where student_class not in (select class_name from class)""") |
| 327 | + results = [(*i, "null", "null", re.match(r"(20)?\d{2}", i[0]).group(), "0", |
| 328 | + "true" if re.search(r".*\[.*\].*", i[2]) else "false") for i in c] |
| 329 | + # enrol_grade change manually |
| 330 | + c.executemany("INSERT INTO student values (%s,%s,%s,%s,%s,%s,%s,%s,%s)", |
| 331 | + results) |
| 332 | + |
| 333 | + conn.commit() |
| 334 | + |
| 335 | + |
| 336 | +if __name__ == '__main__': |
| 337 | + conn = psycopg2.connect(dbname=config.dbname, user=config.user, password=config.password, host=config.host, |
| 338 | + port=config.port) |
| 339 | + c = conn.cursor() |
| 340 | + |
| 341 | + # get_all_courses() |
| 342 | + # get_all_classes() |
| 343 | + # get_all_names() |
| 344 | + # students() |
| 345 | + |
| 346 | + # main_excel(path="***.xlsx", job_name="***", detailed=True) |
| 347 | + |
| 348 | + # names = ['***', '***'] |
| 349 | + # ids = ['**********', |
| 350 | + # '**********'] |
| 351 | + # main_ids_and_names((ids, names), detailed=True, job_name="temp") |
| 352 | + |
| 353 | + conn.close() |
0 commit comments