-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_extract.py
96 lines (76 loc) · 2.16 KB
/
data_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from regularexp import *
from tocsv import to_csv
def grade_line_splitter(s):
grade = s.split(',')
split = []
for g in grade:
split.append([text_before_para(g),text_between_para(g)]);
return split
def extractor(src):
with open(src) as f:
content = f.readlines()
content = [x.strip() for x in content]
# name of the college from content[4]
college_name = content[4]
exam_name = content[6]
grade_lines = []
deptname = []
dept_list = []
student = []
temp_grade = []
prev_gradeline = False
prev_empty = False
content = [x.replace("[Full Time]","") for x in content]
for line in content:
if(line):
if (is_dept(line)):
deptname = line
dept_list.append([deptname])
elif (is_reg_num(line)):
student.append([deptname,line])
elif (is_course_code(line)):
continue
elif (is_grade_line(line)):
if(prev_gradeline is True):
temp_grade[-1] = temp_grade[-1][:-1]
temp_grade.append(','+line)
grade_lines.append("".join(temp_grade))
temp_grade = []
prev_gradeline = False
else:
prev_gradeline = True
temp_grade.append(line)
prev_empty = False
else:
prev_empty = True
if(prev_gradeline is True and prev_empty is True):
grade_lines.append("".join(temp_grade))
temp_grade = []
prev_gradeline = False
grade_lines = [x.replace(" ","") for x in grade_lines]
count = 0
for i in student:
i.append([grade_line_splitter(grade_lines[count])])
count = count + 1
for i in dept_list:
temp = []
for j,k,l in student:
if (i[0] == j):
temp.append([k]+l)
i.append(temp)
college_name = college_name.replace(","," ")
college_name = college_name.replace("Exam Centre: ","")
save_name = "./csv/"+exam_name+".csv"
to_csv(dept_list, college_name, save_name)
#text_file = open("{}/{}.txt".format(des,college_name),"w+")
#text_file.write("{} for {}\n".format(exam_name,content[4]))
#for i in dept_list:
# text_file.write("\n{}\n".format(i[0]))
# for j in i[1]:
# text_file.write("\n{}".format(j[0]))
# for k,l in j[1]:
# text_file.write("\n{} grade for {}".format(l,k))
# text_file.write("\n")
#text_file.close()
if __name__ == '__main__':
extractor("./4.txt",".")