Skip to content

Commit eadc4e6

Browse files
committed
Migrate to Python3
1 parent 72aab00 commit eadc4e6

File tree

28 files changed

+222
-1020
lines changed

28 files changed

+222
-1020
lines changed

filters/AlignedProportion/AlignedProportion.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def initialize(self, source_language, target_language, extra_args):
6969

7070
f.close()
7171
if self.model_exist:
72-
print "Loaded stats from the model file."
72+
print("Loaded stats from the model file.")
7373

7474
if extra_args['emit scores'] == True:
7575
self.num_of_scans = 1
@@ -89,9 +89,9 @@ def finalize(self):
8989
self.trg_var = (self.trg_sum_sq - (self.trg_sum * self.trg_sum) / self.n) / (self.n - 1)
9090
self.trg_var = math.sqrt(self.trg_var)
9191

92-
print "Aligned Proportion:"
93-
print "source mean & deviation:", self.src_mean, "\t", self.src_var
94-
print "target mean & deviation:", self.trg_mean, "\t", self.trg_var
92+
print("Aligned Proportion:")
93+
print("source mean & deviation: {}\t{}".format(self.src_mean, self.src_var))
94+
print("target mean & deviation: {}\t{}".format(self.trg_mean, self.trg_var))
9595

9696
f = open(self.model_filename, 'a')
9797
lang_pair = self.src_language + self.trg_language

filters/AlignedSequenceLength/AlignedSequenceLength.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def initialize(self, source_language, target_language, extra_args):
7070

7171
f.close()
7272
if self.model_exist:
73-
print "Loaded stats from the model file."
73+
print("Loaded stats from the model file.")
7474

7575
if extra_args['emit scores'] == True:
7676
self.num_of_scans = 1

filters/BigramAlignedProportion/BigramAlignedProportion.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def initialize(self, source_language, target_language, extra_args):
6969

7070
f.close()
7171
if self.model_exist:
72-
print "Loaded stats from the model file."
72+
print("Loaded stats from the model file.")
7373

7474
if extra_args['emit scores'] == True:
7575
self.num_of_scans = 1
@@ -89,9 +89,9 @@ def finalize(self):
8989
self.trg_var = (self.trg_sum_sq - (self.trg_sum * self.trg_sum) / self.n) / (self.n - 1)
9090
self.trg_var = math.sqrt(self.trg_var)
9191

92-
print "Bigram Aligned Proportion:"
93-
print "source mean & deviation:", self.src_mean, "\t", self.src_var
94-
print "target mean & deviation:", self.trg_mean, "\t", self.trg_var
92+
print("Bigram Aligned Proportion:")
93+
print("source mean & deviation: {}\t{}".format(self.src_mean, self.src_var))
94+
print("target mean & deviation: {}\t{}".format(self.trg_mean, self.trg_var))
9595

9696
f = open(self.model_filename, 'a')
9797
lang_pair = self.src_language + self.trg_language
@@ -214,6 +214,5 @@ def decide(self, tu):
214214
trg_ratio = abs(trg_ratio - self.trg_mean)
215215

216216
if src_ratio > self.var_mult * self.src_var or trg_ratio > self.var_mult * self.trg_var:
217-
# if src_ratio < self.s_thresh or trg_ratio < self.t_thresh:
218217
return 'reject'
219218
return 'accept'

filters/FirstUnalignedWord/FirstUnalignedWord.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def initialize(self, source_language, target_language, extra_args):
6868

6969
f.close()
7070
if self.model_exist:
71-
print "Loaded stats from the model file."
71+
print("Loaded stats from the model file.")
7272

7373
if extra_args['emit scores'] == True:
7474
self.num_of_scans = 1
@@ -183,6 +183,5 @@ def decide(self, tu):
183183
first_trg = abs(first_trg - self.trg_mean)
184184

185185
if first_src > self.var_mult * self.src_var or first_trg > self.var_mult * self.trg_var:
186-
# if first_src > self.s_thresh or first_trg > self.t_thresh:
187186
return 'reject'
188187
return 'accept'

filters/Lang_Identifier/Lang_Identifier.py

-48
This file was deleted.

filters/Lang_Identifier/langid.py

-614
This file was deleted.

filters/LastUnalignedWord/LastUnalignedWord.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def initialize(self, source_language, target_language, extra_args):
6969

7070
f.close()
7171
if self.model_exist:
72-
print "Loaded stats from the model file."
72+
print("Loaded stats from the model file.")
7373

7474
if extra_args['emit scores'] == True:
7575
self.num_of_scans = 1

filters/LengthRatio/LengthRatio.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def initialize(self, source_language, target_language, extra_args):
5555

5656
f.close()
5757
if self.model_exist:
58-
print "Loaded stats from the model file."
58+
print("Loaded stats from the model file.")
5959

6060
if extra_args['emit scores'] == True:
6161
self.num_of_scans = 1

filters/LengthStats/LengthStats.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ def finalize(self):
3030
self.src_mean = self.src_sum / self.n
3131
self.trg_mean = self.trg_sum / self.n
3232

33-
print 'src length mean:', self.src_mean
34-
print 'trg length mean:', self.trg_mean
35-
print 'src word mean:', self.src_wsum / self.n
36-
print 'trg word mean:', self.trg_wsum / self.n
33+
print('src length mean:', self.src_mean)
34+
print('trg length mean:', self.trg_mean)
35+
print('src word mean:', self.src_wsum / self.n)
36+
print('trg word mean:', self.trg_wsum / self.n)
3737

3838
def process_tu(self, tu, num_of_finished_scans):
3939
self.src_wsum += len(tu.src_tokens)

filters/LongestAlignedSequence/LongestAlignedSequence.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def initialize(self, source_language, target_language, extra_args):
6969

7070
f.close()
7171
if self.model_exist:
72-
print "Loaded stats from the model file."
72+
print("Loaded stats from the model file.")
7373

7474
if extra_args['emit scores'] == True:
7575
self.num_of_scans = 1

filters/LongestUnalignedSequence/LongestUnalignedSequence.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def initialize(self, source_language, target_language, extra_args):
6868

6969
f.close()
7070
if self.model_exist:
71-
print "Loaded stats from the model file."
71+
print("Loaded stats from the model file.")
7272

7373
if extra_args['emit scores'] == True:
7474
self.num_of_scans = 1

filters/NumberOfUnalignedSequences/NumberOfUnalignedSequences.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def initialize(self, source_language, target_language, extra_args):
6868

6969
f.close()
7070
if self.model_exist:
71-
print "Loaded stats from the model file."
71+
print("Loaded stats from the model file.")
7272

7373
if extra_args['emit scores'] == True:
7474
self.num_of_scans = 1
@@ -97,9 +97,9 @@ def finalize(self):
9797

9898
f.close()
9999

100-
print "Number Of Unaligned Sequences:"
101-
print "source mean & deviation:", self.src_mean, "\t", self.src_var
102-
print "target mean & deviation:", self.trg_mean, "\t", self.trg_var
100+
print("Number Of Unaligned Sequences:")
101+
print("source mean & deviation: {}\t{}".format(self.src_mean, self.src_var))
102+
print("target mean & deviation: {}\t{}".format(self.trg_mean, self.trg_var))
103103

104104
self.s_thresh = np.percentile(self.src_scores, self.var_mult)
105105
self.t_thresh = np.percentile(self.trg_scores, self.var_mult)

filters/ReverseLengthRatio/ReverseLengthRatio.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def initialize(self, source_language, target_language, extra_args):
5656

5757
f.close()
5858
if self.model_exist:
59-
print "Loaded stats from the model file."
59+
print("Loaded stats from the model file.")
6060

6161
if extra_args['emit scores'] == True:
6262
self.num_of_scans = 1

filters/ReverseWordRatio/ReverseWordRatio.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def initialize(self, source_language, target_language, extra_args):
5656

5757
f.close()
5858
if self.model_exist:
59-
print "Loaded stats from the model file."
59+
print("Loaded stats from the model file.")
6060

6161
if extra_args['emit scores'] == True:
6262
self.num_of_scans = 1

filters/TagFinder/TagFinder.py

+9-89
Original file line numberDiff line numberDiff line change
@@ -63,72 +63,7 @@ def finalize(self):
6363
pass
6464

6565
def process_tu(self, tu, num_of_finished_scans):
66-
minus_points = 0
67-
68-
# - Dates ----------------------------------------------------------------
69-
src_dates = len(self.date_re.findall(tu.src_phrase))
70-
trg_dates = len(self.date_re.findall(tu.trg_phrase))
71-
if src_dates != trg_dates:
72-
minus_points += 1
73-
# print "date"
74-
75-
tu.src_phrase = self.date_re.sub("", tu.src_phrase)
76-
tu.trg_phrase = self.date_re.sub("", tu.trg_phrase)
77-
78-
# - Numbers --------------------------------------------------------------
79-
src_nums = len(self.num_re.findall(tu.src_phrase))
80-
trg_nums = len(self.num_re.findall(tu.trg_phrase))
81-
if src_nums != trg_nums:
82-
minus_points += 1
83-
# print "num"
84-
# print tu.src_phrase
85-
# print tu.trg_phrase
86-
87-
# - Reference tags -------------------------------------------------------
88-
src_ref = len(self.ref_re.findall(tu.src_phrase))
89-
trg_ref = len(self.ref_re.findall(tu.trg_phrase))
90-
if src_ref != trg_ref:
91-
minus_points += 1
92-
# print "ref"
93-
94-
tu.src_phrase = self.ref_re.sub("", tu.src_phrase)
95-
tu.trg_phrase = self.ref_re.sub("", tu.trg_phrase)
96-
97-
# - XML tags -------------------------------------------------------------
98-
src_xml_tag = len(self.xml_re.findall(tu.src_phrase))
99-
trg_xml_tag = len(self.xml_re.findall(tu.trg_phrase))
100-
if src_xml_tag != trg_xml_tag:
101-
minus_points += 1
102-
# print "xml"
103-
104-
# - Emails ---------------------------------------------------------------
105-
src_emails = len(self.email_re.findall(tu.src_phrase))
106-
trg_emails = len(self.email_re.findall(tu.trg_phrase))
107-
if src_emails != trg_emails:
108-
minus_points += 1
109-
# print "email"
110-
111-
# - URLs -----------------------------------------------------------------
112-
src_urls = len(self.url_re.findall(tu.src_phrase))
113-
trg_urls = len(self.url_re.findall(tu.trg_phrase))
114-
if src_urls != trg_urls:
115-
minus_points += 1
116-
# print "url"
117-
118-
# - Image tags -----------------------------------------------------------
119-
src_img_tag = len(self.image_re.findall(tu.src_phrase))
120-
trg_img_tag = len(self.image_re.findall(tu.trg_phrase))
121-
if src_img_tag != trg_img_tag:
122-
minus_points += 1
123-
# print "img"
124-
125-
# - Category tags --------------------------------------------------------
126-
src_cat_tag = len(self.category_re.findall(tu.src_phrase))
127-
trg_cat_tag = len(self.category_re.findall(tu.trg_phrase))
128-
if src_cat_tag != trg_cat_tag:
129-
minus_points += 1
130-
# print "cat"
131-
66+
minus_points = self.find_mismatches(tu)
13267
if minus_points > 1:
13368
return [0]
13469
return [1]
@@ -137,14 +72,19 @@ def do_after_a_full_scan(self, num_of_finished_scans):
13772
pass
13873

13974
def decide(self, tu):
140-
minus_points = 0
75+
minus_points = self.find_mismatches(tu)
14176

77+
if minus_points > 1:
78+
return 'reject'
79+
return 'accept'
80+
81+
def find_mismatches(self, tu):
82+
minus_points = 0
14283
# - Dates ----------------------------------------------------------------
14384
src_dates = len(self.date_re.findall(tu.src_phrase))
14485
trg_dates = len(self.date_re.findall(tu.trg_phrase))
14586
if src_dates != trg_dates:
14687
minus_points += 1
147-
# print "date"
14888

14989
tu.src_phrase = self.date_re.sub("", tu.src_phrase)
15090
tu.trg_phrase = self.date_re.sub("", tu.trg_phrase)
@@ -154,16 +94,12 @@ def decide(self, tu):
15494
trg_nums = len(self.num_re.findall(tu.trg_phrase))
15595
if src_nums != trg_nums:
15696
minus_points += 1
157-
# print "num"
158-
# print tu.src_phrase
159-
# print tu.trg_phrase
16097

16198
# - Reference tags -------------------------------------------------------
16299
src_ref = len(self.ref_re.findall(tu.src_phrase))
163100
trg_ref = len(self.ref_re.findall(tu.trg_phrase))
164101
if src_ref != trg_ref:
165102
minus_points += 1
166-
# print "ref"
167103

168104
tu.src_phrase = self.ref_re.sub("", tu.src_phrase)
169105
tu.trg_phrase = self.ref_re.sub("", tu.trg_phrase)
@@ -173,46 +109,30 @@ def decide(self, tu):
173109
trg_xml_tag = len(self.xml_re.findall(tu.trg_phrase))
174110
if src_xml_tag != trg_xml_tag:
175111
minus_points += 1
176-
# print "xml"
177112

178113
# - Emails ---------------------------------------------------------------
179114
src_emails = len(self.email_re.findall(tu.src_phrase))
180115
trg_emails = len(self.email_re.findall(tu.trg_phrase))
181116
if src_emails != trg_emails:
182117
minus_points += 1
183-
# print "email"
184118

185119
# - URLs -----------------------------------------------------------------
186120
src_urls = len(self.url_re.findall(tu.src_phrase))
187121
trg_urls = len(self.url_re.findall(tu.trg_phrase))
188122
if src_urls != trg_urls:
189123
minus_points += 1
190-
# print "url"
191124

192125
# - Image tags -----------------------------------------------------------
193126
src_img_tag = len(self.image_re.findall(tu.src_phrase))
194127
trg_img_tag = len(self.image_re.findall(tu.trg_phrase))
195128
if src_img_tag != trg_img_tag:
196129
minus_points += 1
197-
# print "img"
198130

199131
# - Category tags --------------------------------------------------------
200132
src_cat_tag = len(self.category_re.findall(tu.src_phrase))
201133
trg_cat_tag = len(self.category_re.findall(tu.trg_phrase))
202134
if src_cat_tag != trg_cat_tag:
203135
minus_points += 1
204-
# print "cat"
205136

206-
if src_cat_tag > 0:
207-
print "category tag -> edit them"
208-
print src_cat_tag
209-
if trg_cat_tag > 0:
210-
print "category tag -> edit them"
211-
print trg_cat_tag
212-
213-
# ------------------------------------------------------------------------
214-
# ------------------------------------------------------------------------
137+
return minus_points
215138

216-
if minus_points > 1:
217-
return 'reject'
218-
return 'accept'

filters/UnalignedSequenceLength/UnalignedSequenceLength.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def initialize(self, source_language, target_language, extra_args):
6969

7070
f.close()
7171
if self.model_exist:
72-
print "Loaded stats from the model file."
72+
print("Loaded stats from the model file.")
7373

7474
if extra_args['emit scores'] == True:
7575
self.num_of_scans = 1

0 commit comments

Comments
 (0)