-
Notifications
You must be signed in to change notification settings - Fork 96
/
easylist_pac.py
2276 lines (2089 loc) · 92.4 KB
/
easylist_pac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__author__ = 'stsmith'
# easylist_pac: Convert EasyList Tracker and Adblocking rules to an efficient Proxy Auto Configuration file
# Copyright (C) 2017-2020 by Steven T. Smith <steve dot t dot smith at gmail dot com>, GPL
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse as ap, copy, datetime, functools as fnt, numpy as np, os, re, sys, time, urllib.request, warnings
try:
machine_learning_flag = True
import multiprocessing as mp, scipy.sparse as sps
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
except ImportError as e:
machine_learning_flag = False
print(e)
warnings.warn("Install scikit-learn for more accurate EasyList rule selection.")
try:
plot_flag = True
import matplotlib as mpl, matplotlib.pyplot as plt
# Legible plot style defaults
# http://matplotlib.org/api/matplotlib_configuration_api.html
# http://matplotlib.org/users/customizing.html
mpl.rcParams['figure.figsize'] = (10.0, 5.0)
mpl.rc('font', **{'family': 'sans-serif', 'weight': 'bold', 'size': 14})
mpl.rc('axes', **{'titlesize': 20, 'titleweight': 'bold', 'labelsize': 16, 'labelweight': 'bold'})
mpl.rc('legend', **{'fontsize': 14})
mpl.rc('figure', **{'titlesize': 16, 'titleweight': 'bold'})
mpl.rc('lines', **{'linewidth': 2.5, 'markersize': 18, 'markeredgewidth': 0})
mpl.rc('mathtext',
**{'fontset': 'custom', 'rm': 'sans:bold', 'bf': 'sans:bold', 'it': 'sans:italic', 'sf': 'sans:bold',
'default': 'it'})
# plt.rc('text',usetex=False) # [default] usetex should be False
mpl.rcParams['text.latex.preamble'] = [r'\\usepackage{amsmath,sfmath} \\boldmath']
except ImportError as e:
plot_flag = False
print(e)
warnings.warn("Install matplotlib to plot rule priorities.")
class EasyListPAC:
'''Create a Proxy Auto Configuration file from EasyList rule sets.'''
def __init__(self):
self.parseArgs()
self.easylists_download_latest()
self.parse_and_filter_rule_files()
self.prioritize_rules()
if not self.my_extra_rules_off:
self.easylist_append_rules(my_extra_rules)
if self.debug:
print("Good rules and strengths:\n" + '\n'.join('{: 5d}:\t{}\t\t[{:2.1f}]'.format(i,r,s) for (i,(r,s)) in enumerate(zip(self.good_rules,self.good_signal))))
print("\nBad rules and strengths:\n" + '\n'.join('{: 5d}:\t{}\t\t[{:2.1f}]'.format(i,r,s) for (i,(r,s)) in enumerate(zip(self.bad_rules,self.bad_signal))))
if plot_flag:
# plt.plot(np.arange(len(self.good_signal)), self.good_signal, '.')
# plt.show()
plt.plot(np.arange(len(self.bad_signal)), self.bad_signal, '.')
plt.xlabel('Rule index')
plt.ylabel('Bad rule distance (logit)')
plt.show()
return
self.parse_easylist_rules()
self.create_pac_file()
def parseArgs(self):
# blackhole specification in arguments
# best choice is the LAN IP address of the http://hostname/proxy.pac web server or a dedicated blackhole server, e.g. 192.168.0.2:8119
parser = ap.ArgumentParser()
parser.add_argument('-b', '--blackhole', help="Blackhole IP:port", type=str, default='127.0.0.1:8119')
parser.add_argument('-d', '--download-dir', help="Download directory", type=str, default='~/Downloads')
parser.add_argument('-g', '--debug', help="Debug: Just print rules", action='store_true')
parser.add_argument('-moff', '--my_extra_rules_turnoff_flag', help="Turn off adding my extra rules", default=False, action='store_true')
parser.add_argument('-p', '--proxy', help="Proxy host:port", type=str, default='')
parser.add_argument('-P', '--PAC-original', help="Original proxy.pac file", type=str, default='proxy.pac.orig')
parser.add_argument('-rb', '--bad-rule-max', help="Maximum number of bad rules (-1 for unlimited)", type=int,
default=19999)
parser.add_argument('-rg', '--good-rule-max', help="Maximum number of good rules (-1 for unlimited)",
type=int, default=1099)
parser.add_argument('-th', '--truncate_hash', help="Truncate hash object length to maximum number", type=int,
default=3999)
parser.add_argument('-tr', '--truncate_regex', help="Truncate regex rules to maximum number", type=int,
default=499)
parser.add_argument('-w', '--sliding-window', help="Sliding window training and test (slow)", action='store_true')
parser.add_argument('-x', '--Extra_EasyList_URLs', help="Extra Easylsit URLs", type=str, nargs='+', default=[])
parser.add_argument('-*', '--wildcard-limit', help="Limit the number of wildcards", type=int, default=999)
parser.add_argument('-@@', '--exceptions_include_flag', help="Include exception rules", action='store_true')
args = parser.parse_args()
self.args = parser.parse_args()
self.blackhole_ip_port = args.blackhole
self.easylist_dir = os.path.expanduser(args.download_dir)
self.debug = args.debug
self.my_extra_rules_off = args.my_extra_rules_turnoff_flag
self.proxy_host_port = args.proxy
self.orig_pac_file = os.path.join(self.easylist_dir, args.PAC_original)
# n.b. negative limits are set to no limits using [:None] slicing trick
self.good_rule_max = args.good_rule_max if args.good_rule_max >= 0 else None
self.bad_rule_max = args.bad_rule_max if args.bad_rule_max >= 0 else None
self.truncate_hash_max = args.truncate_hash if args.truncate_hash >= 0 else None
self.truncate_alternatives_max = args.truncate_regex if args.truncate_regex >= 0 else None
self.sliding_window = args.sliding_window
self.exceptions_include_flag = args.exceptions_include_flag
self.wildcard_named_group_limit = args.wildcard_limit if args.wildcard_limit >= 0 else None
self.extra_easylist_urls = args.Extra_EasyList_URLs
return self.args
def easylists_download_latest(self):
easylist_url = 'https://easylist.to/easylist/easylist.txt'
easyprivacy_url = 'https://easylist.to/easylist/easyprivacy.txt'
fanboy_annoyance_url = 'https://easylist.to/easylist/fanboy-annoyance.txt'
fanboy_antifacebook = 'https://raw.githubusercontent.com/ryanbr/fanboy-adblock/master/fanboy-antifacebook.txt'
self.download_list = [fanboy_antifacebook, fanboy_annoyance_url, easyprivacy_url, easylist_url] + self.extra_easylist_urls
self.file_list = []
for url in self.download_list:
fname = os.path.basename(url)
fname_full = os.path.join(self.easylist_dir, fname)
file_utc = file_to_utc(fname_full) if os.path.isfile(os.path.join(self.easylist_dir, fname)) else 0.
resp = urllib.request.urlopen(urllib.request.Request(url, headers={'User-Agent': user_agent}))
url_utc = last_modified_to_utc(last_modified_resp(resp))
if (url_utc > file_utc) or (os.path.getsize(fname_full) == 0): # download the newer file
with open(fname_full, mode='w', encoding='utf-8') as out_file:
out_file.write(resp.read().decode('utf-8'))
self.file_list.append(fname_full)
def parse_and_filter_rule_files(self):
"""Parse all rules into good and bad lists. Use flags to specify included/excluded rules."""
self.good_rules = []
self.bad_rules = []
self.good_opts = []
self.bad_opts = []
self.good_rules_include_flag = []
self.bad_rules_include_flag = []
for file in self.file_list:
with open(file, 'r', encoding='utf-8') as fd:
self.easylist_append_rules(fd)
def easylist_append_rules(self, fd):
"""Append EasyList rules from file to good and bad lists."""
for line in fd:
line = line.rstrip()
try:
self.easylist_append_one_rule(line)
except self.RuleIgnored as e:
if self.debug: print(e,flush=True)
continue
class RuleIgnored(Exception):
pass
def easylist_append_one_rule(self, line):
"""Append EasyList rules from line to good and bad lists."""
ignore_rules_flag = False
ignored_rules_count = 0
line_orig = line
# configuration lines and selector rules should already be filtered out
if re_test(configuration_re, line) or re_test(selector_re, line): raise self.RuleIgnored("Rule '{}' not added.".format(line))
exception_flag = exception_filter(line) # block default; pass if True
line = exception_re.sub(r'\1', line)
option_exception_re = not3dimppuposgh_option_exception_re # ignore these options by default
# delete all easylist options **prior** to regex and selector cases
# ignore domain limits for now
opts = '' # default: no options in the rule
if re_test(option_re, line):
opts = option_re.sub(r'\2', line)
# domain-specific and other option exceptions: ignore
# too many rules (>~ 10k) bog down the browser; make reasonable exclusions here
line = option_re.sub(r'\1', line) # delete all the options and continue
# ignore these cases
# comment case: ignore
if re_test(comment_re, line):
if re_test(commentname_sections_ignore_re, line):
ignored_rules_comment_start = comment_re.sub('', line)
if not ignore_rules_flag:
ignored_rules_count = 0
ignore_rules_flag = True
print('Ignore rules following comment ', end='', flush=True)
print('"{}"… '.format(ignored_rules_comment_start), end='', flush=True)
else:
if ignore_rules_flag: print('\n {:d} rules ignored.'.format(ignored_rules_count), flush=True)
ignored_rules_count = 0
ignore_rules_flag = False
raise self.RuleIgnored("Rule '{}' not added.".format(line))
if ignore_rules_flag:
ignored_rules_count += 1
self.append_rule(exception_flag, line, opts, False)
raise self.RuleIgnored("Rule '{}' not added.".format(line))
# blank url case: ignore
if re_test(httpempty_re, line): raise self.RuleIgnored("Rule '{}' not added.".format(line))
# blank line case: ignore
if not bool(line): raise self.RuleIgnored("Rule '{}' not added.".format(line))
# block default or pass exception
if exception_flag:
option_exception_re = not3dimppuposgh_option_exception_re # ignore these options within exceptions
if not self.exceptions_include_flag:
self.append_rule(exception_flag, line, opts, False)
raise self.RuleIgnored("Rule '{}' not added.".format(line))
# specific options: ignore
if re_test(option_exception_re, opts):
self.append_rule(exception_flag, line, opts, False)
raise self.RuleIgnored("Rule '{}' not added.".format(line))
# add all remaining rules
self.append_rule(exception_flag, line, opts, True)
def append_rule(self,exception_flag,rule, opts, include_rule_flag):
if not bool(rule): return # last chance to reject blank lines -- shouldn't happen
if exception_flag:
self.good_rules.append(rule)
self.good_opts.append(option_tokenizer(opts))
self.good_rules_include_flag.append(include_rule_flag)
else:
self.bad_rules.append(rule)
self.bad_opts.append(option_tokenizer(opts))
self.bad_rules_include_flag.append(include_rule_flag)
def good_class_test(self,rule,opts=''):
return not bool(badregex_regex_filters_re.search(rule))
def bad_class_test(self,rule,opts=''):
"""Bad rule of interest if a match for the bad regex's or specific rule options,
e.g. non-domain specific popups or images."""
return bool(badregex_regex_filters_re.search(rule)) \
or (bool(opts) and bool(thrdp_im_pup_os_option_re.search(opts))
and not bool(not3dimppupos_option_exception_re.search(opts)))
def prioritize_rules(self):
# use bootstrap regex preferences
# https://github.com/seatgeek/fuzzywuzzy would be great here if there were such a thing for regex
self.good_signal = np.array([self.good_class_test(x,opts) for (x,opts,f) in zip(self.good_rules,self.good_opts,self.good_rules_include_flag) if f], dtype=np.int)
self.bad_signal = np.array([self.bad_class_test(x,opts) for (x,opts,f) in zip(self.bad_rules,self.bad_opts,self.bad_rules_include_flag) if f], dtype=np.int)
self.good_columns = np.array([i for (i,f) in enumerate(self.good_rules_include_flag) if f],dtype=int)
self.bad_columns = np.array([i for (i,f) in enumerate(self.bad_rules_include_flag) if f],dtype=int)
# Logistic Regression for more accurate rule priorities
if machine_learning_flag:
print("Performing logistic regression on rule sets. This will take a few minutes…",end='',flush=True)
self.logreg_priorities()
print(" done.", flush=True)
# truncate to positive signal strengths
if not self.debug:
self.good_rule_max = min(self.good_rule_max,np.count_nonzero(self.good_signal > 0)) \
if isinstance(self.good_rule_max,(int,np.int)) else np.count_nonzero(self.good_signal > 0)
self.bad_rule_max = min(self.bad_rule_max, np.count_nonzero(self.bad_signal > 0)) \
if isinstance(self.bad_rule_max,(int,np.int)) else np.count_nonzero(self.bad_signal > 0)
# prioritize and limit the rules
good_pridx = np.array([e[0] for e in sorted(enumerate(self.good_signal),key=lambda e: e[1],reverse=True)],dtype=int)[:self.good_rule_max]
self.good_columns = self.good_columns[good_pridx]
self.good_signal = self.good_signal[good_pridx]
self.good_rules = [self.good_rules[k] for k in self.good_columns]
bad_pridx = np.array([e[0] for e in sorted(enumerate(self.bad_signal),key=lambda e: e[1],reverse=True)],dtype=int)[:self.bad_rule_max]
self.bad_columns = self.bad_columns[bad_pridx]
self.bad_signal = self.bad_signal[bad_pridx]
self.bad_rules = [self.bad_rules[k] for k in self.bad_columns]
# include hardcoded rules
for rule in include_these_good_rules:
if rule not in self.good_rules: self.good_rules.append(rule)
for rule in include_these_bad_rules:
if rule not in self.bad_rules: self.bad_rules.append(rule)
# rules are now ordered
self.good_columns = np.arange(0,len(self.good_rules),dtype=self.good_columns.dtype)
self.bad_columns = np.arange(0,len(self.bad_rules),dtype=self.bad_columns.dtype)
return
def logreg_priorities(self):
"""Rule prioritization using logistic regression on bootstrap preferences."""
self.good_fv_json = {}
self.good_column_hash = {}
for col, (rule,opts) in enumerate(zip(self.good_rules,self.good_opts)):
feature_vector_append_column(rule, opts, col, self.good_fv_json)
self.good_column_hash[rule] = col
self.bad_fv_json = {}
self.bad_column_hash = {}
for col, (rule,opts) in enumerate(zip(self.bad_rules,self.bad_opts)):
feature_vector_append_column(rule, opts, col, self.bad_fv_json)
self.bad_column_hash[rule] = col
self.good_fv_mat, self.good_row_hash = fv_to_mat(self.good_fv_json, self.good_rules)
self.bad_fv_mat, self.bad_row_hash = fv_to_mat(self.bad_fv_json, self.bad_rules)
self.good_X_all = StandardScaler(with_mean=False).fit_transform(self.good_fv_mat.astype(np.float))
self.good_y_all = np.array([self.good_class_test(x,opts) for (x,opts) in zip(self.good_rules, self.good_opts)], dtype=np.int)
self.bad_X_all = StandardScaler(with_mean=False).fit_transform(self.bad_fv_mat.astype(np.float))
self.bad_y_all = np.array([self.bad_class_test(x,opts) for (x,opts) in zip(self.bad_rules, self.bad_opts)], dtype=np.int)
self.logit_fit_method_sample_weights()
# inverse regularization signal; smaller values give more sparseness, less model rigidity
self.C = 1.e1
self.logreg_test_in_training()
if self.sliding_window: self.logreg_sliding_window()
return
def debug_feature_vector(self,rule_substring=r'google.com/pagead'):
for j, rule in enumerate(self.bad_rules):
if rule.find(rule_substring) >= 0: break
col = j
print(self.bad_rules[col])
_, rows = self.bad_fv_mat[col,:].nonzero() # fv_mat is transposed
print(rows)
for row in rows:
print('Row {:d}: {}:: {:g}'.format(row, self.bad_row_hash[int(row)], self.bad_fv_mat[col, row]))
def logit_fit_method_sample_weights(self):
# weights for LogisticRegression.fit()
self.good_w_all = np.ones(len(self.good_y_all))
self.bad_w_all = np.ones(len(self.bad_y_all))
# add more weight for each of these regex matches
for i, rule in enumerate(self.bad_rules):
self.bad_w_all[i] += 1/max(1,len(rule)) # slight disadvantage for longer rules
for regex in high_weight_regex:
self.bad_w_all[i] += len(regex.findall(rule))
# these options have more weight
self.bad_w_all[i] += bool(thrdp_im_pup_os_option_re.search(self.bad_opts[i]))
return
def logreg_test_in_training(self):
"""fast, initial method: test vectors in the training data"""
self.good_fv_logreg = LogisticRegression(C=self.C, penalty='l2', solver='liblinear', tol=0.01)
self.bad_fv_logreg = LogisticRegression(C=self.C, penalty='l2', solver='liblinear', tol=0.01)
good_x_test = self.good_X_all[self.good_columns]
good_X = self.good_X_all
good_y = self.good_y_all
good_w = self.good_w_all
bad_x_test = self.bad_X_all[self.bad_columns]
bad_X = self.bad_X_all
bad_y = self.bad_y_all
bad_w = self.bad_w_all
if good_x_test.shape[0] > 0:
self.good_fv_logreg.fit(good_X, good_y, sample_weight=good_w)
self.good_signal = self.good_fv_logreg.decision_function(good_x_test)
if bad_x_test.shape[0] > 0:
self.bad_fv_logreg.fit(bad_X, bad_y, sample_weight=bad_w)
self.bad_signal = self.bad_fv_logreg.decision_function(bad_x_test)
return
def logreg_sliding_window(self):
"""bootstrap the signal strengths by removing test vectors from training"""
# pre-prioritize using test-in-target values and limit the rules
if not self.debug:
good_preidx = np.array([e[0] for e in sorted(enumerate(self.good_signal),key=lambda e: e[1],reverse=True)],dtype=int)[:int(np.ceil(1.4*self.good_rule_max))]
self.good_columns = self.good_columns[good_preidx]
bad_preidx = np.array([e[0] for e in sorted(enumerate(self.bad_signal),key=lambda e: e[1],reverse=True)],dtype=int)[:int(np.ceil(1.4*self.bad_rule_max))]
self.bad_columns = self.bad_columns[bad_preidx]
# multithreaded loop for speed
use_blocked_not_sklearn_mp = True # it's a lot faster to block it yourself
if use_blocked_not_sklearn_mp:
# init w/ target-in-training results
good_fv_logreg = copy.deepcopy(self.good_fv_logreg)
good_fv_logreg.penalty = 'l2'
good_fv_logreg.solver = 'sag'
good_fv_logreg.warm_start = True
good_fv_logreg.n_jobs = 1 # achieve parallelism via block processing
bad_fv_logreg = copy.deepcopy(self.bad_fv_logreg)
bad_fv_logreg.penalty = 'l2'
bad_fv_logreg.solver = 'sag'
bad_fv_logreg.warm_start = True
bad_fv_logreg.n_jobs = 1 # achieve parallelism via block processing
if False: # debug mp: turn off multiprocessing with a monkeypatch
class NotAMultiProcess(mp.Process):
def start(self): self.run()
def join(self): pass
mp.Process = NotAMultiProcess
# this is probably efficient with Linux's copy-on-write fork(); unsure about BSD/macOS
# must refactor to use shared Array() [along with warm_start coeff's] to ensure
# see https://stackoverflow.com/questions/5549190/is-shared-readonly-data-copied-to-different-processes-for-python-multiprocessing/
# distribute training and tests across multiprocessors
def training_op(queue, X_all, y_all, w_all, fv_logreg, columns, column_block):
"""Training and test operation put into a mp.Queue.
columns[column_block] and signal[column_block] are the rule columns and corresponding signal strengths
"""
res = np.zeros(len(column_block))
for k in range(len(column_block)):
mask = np.zeros(len(y_all), dtype=bool)
mask[columns[column_block[k]]] = True
mask = np.logical_not(mask)
x_test = X_all[np.logical_not(mask)]
X = X_all[mask]
y = y_all[mask]
w = w_all[mask]
fv_logreg.fit(X, y, sample_weight=w)
res[k] = fv_logreg.decision_function(x_test)[0]
queue.put((column_block,res)) # signal[column_block] = res
return
num_threads = mp.cpu_count()
# good
q = mp.Queue()
jobs = []
self.good_signal = np.zeros(len(self.good_columns))
block_length = len(self.good_columns) // num_threads
column_block = np.arange(0, block_length)
while len(column_block) > 0:
column_block = column_block[np.where(column_block < len(self.good_columns))]
fv_logreg = copy.deepcopy(good_fv_logreg) # each process gets its own .coeff_'s
column_block_copy = np.copy(column_block) # each process gets its own block of columns
p = mp.Process(target=training_op, args=(q, self.good_X_all, self.good_y_all, self.good_w_all, fv_logreg, self.good_columns, column_block_copy))
p.start()
jobs.append(p)
column_block += len(column_block)
# process the results in the queue
for i in range(len(jobs)):
column_block, res = q.get()
self.good_signal[column_block] = res
# join all jobs and wait for them to complete
for p in jobs: p.join()
# bad
q = mp.Queue()
jobs = []
self.bad_signal = np.zeros(len(self.bad_columns))
block_length = len(self.bad_columns) // num_threads
column_block = np.arange(0, block_length)
while len(column_block) > 0:
column_block = column_block[np.where(column_block < len(self.bad_columns))]
fv_logreg = copy.deepcopy(bad_fv_logreg) # each process gets its own .coeff_'s
column_block_copy = np.copy(column_block) # each process gets its own block of columns
p = mp.Process(target=training_op, args=(q, self.bad_X_all, self.bad_y_all, self.bad_w_all, fv_logreg, self.bad_columns, column_block_copy))
p.start()
jobs.append(p)
column_block += len(column_block)
# process the results in the queue
for i in range(len(jobs)):
column_block, res = q.get()
self.bad_signal[column_block] = res
# join all jobs and wait for them to complete
for p in jobs: p.join()
else: # if use_blocked_not_sklearn_mp:
def training_op(X_all, y_all, w_all, fv_logreg, columns, signal):
"""Training and test operations reusing results with multiprocessing."""
res = np.zeros(len(signal))
for k in range(len(res)):
mask = np.zeros(len(y_all), dtype=bool)
mask[columns[k]] = True
mask = np.logical_not(mask)
x_test = X_all[np.logical_not(mask)]
X = X_all[mask]
y = y_all[mask]
w = w_all[mask]
fv_logreg.fit(X, y, sample_weight=w)
res[k] = fv_logreg.decision_function(x_test)[0]
signal[:] = res
return
# good
training_op(self.good_X_all, self.good_y_all, self.good_w_all, self.good_fv_logreg, self.good_columns, self.good_signal)
# bad
training_op(self.bad_X_all, self.bad_y_all, self.bad_w_all, self.bad_fv_logreg, self.bad_columns, self.bad_signal)
return
def parse_easylist_rules(self):
for rule in self.good_rules: self.easylist_to_javascript_vars(rule)
for rule in self.bad_rules: self.easylist_to_javascript_vars(rule)
ordered_unique_all_js_var_lists()
return
def easylist_to_javascript_vars(self,rule,ignore_huge_url_regex_rule_list=False):
rule = rule.rstrip()
rule_orig = rule
exception_flag = exception_filter(rule) # block default; pass if True
rule = exception_re.sub(r'\1', rule)
option_exception_re = not3dimppuposgh_option_exception_re # ignore these options by default
opts = '' # default: no options in the rule
if re_test(option_re, rule):
opts = option_re.sub(r'\2', rule)
# domain-specific and other option exceptions: ignore
# too many rules (>~ 10k) bog down the browser; make reasonable exclusions here
rule = option_re.sub(r'\1', rule) # delete all the options and continue
# ignore these cases
# comment case: ignore
if re_test(comment_re, rule): return
# block default or pass exception
if exception_flag:
option_exception_re = not3dimppuposgh_option_exception_re # ignore these options within exceptions
if not self.exceptions_include_flag: return
# specific options: ignore
if re_test(option_exception_re, opts): return
# blank url case: ignore
if re_test(httpempty_re, rule): return
# blank line case: ignore
if not rule: return
# treat each of the these cases separately, here and in Javascript
# regex case
if re_test(regex_re, rule):
if regex_ignore_test(rule): return
rule = regex_re.sub(r'\1', rule)
if exception_flag:
good_url_regex.append(rule)
else:
if not re_test(badregex_regex_filters_re,
rule): return # limit bad regex's to those in the filter
bad_url_regex.append(rule)
return
# now that regex's are handled, delete unnecessary wildcards, e.g. /.../*
rule = wildcard_begend_re.sub(r'\1', rule)
# domain anchors, || or '|http://a.b' -> domain anchor 'a.b' for regex efficiency in JS
if re_test(domain_anch_re, rule) or re_test(scheme_anchor_re, rule):
# strip off initial || or |scheme://
if re_test(domain_anch_re, rule):
rule = domain_anch_re.sub(r'\1', rule)
elif re_test(scheme_anchor_re, rule):
rule = scheme_anchor_re.sub("", rule)
# host subcase
if re_test(da_hostonly_re, rule):
rule = da_hostonly_re.sub(r'\1', rule)
if not re_test(wild_anch_sep_exc_re, rule): # exact subsubcase
if not re_test(badregex_regex_filters_re, rule):
return # limit bad regex's to those in the filter
if exception_flag:
good_da_host_exact.append(rule)
else:
bad_da_host_exact.append(rule)
return
else: # regex subsubcase
if regex_ignore_test(rule): return
if exception_flag:
good_da_host_regex.append(rule)
else:
if not re_test(badregex_regex_filters_re,
rule): return # limit bad regex's to those in the filter
bad_da_host_regex.append(rule)
return
# hostpath subcase
if re_test(da_hostpath_re, rule):
rule = da_hostpath_re.sub(r'\1', rule)
if not re_test(wild_sep_exc_noanch_re, rule) and re_test(pathend_re, rule): # exact subsubcase
rule = re.sub(r'\|$', '', rule) # strip EOL anchors
if not re_test(badregex_regex_filters_re, rule):
return # limit bad regex's to those in the filter
if exception_flag:
good_da_hostpath_exact.append(rule)
else:
bad_da_hostpath_exact.append(rule)
return
else: # regex subsubcase
if regex_ignore_test(rule): return
# ignore option rules for some regex rules
if re_test(alloption_exception_re, opts): return
if exception_flag:
good_da_hostpath_regex.append(rule)
else:
if not re_test(badregex_regex_filters_re,
rule): return # limit bad regex's to those in the filter
bad_da_hostpath_regex.append(rule)
return
# hostpathquery default case
if True:
# if re_test(re.compile(r'^go\.'),rule):
# pass
if regex_ignore_test(rule): return
if exception_flag:
good_da_regex.append(rule)
else:
bad_da_regex.append(rule)
return
# all other non-regex patterns
if True:
if regex_ignore_test(rule): return
if not ignore_huge_url_regex_rule_list:
if re_test(alloption_exception_re, opts): return
if exception_flag:
good_url_parts.append(rule)
else:
if not re_test(badregex_regex_filters_re,
rule): return # limit bad regex's to those in the filter
bad_url_parts.append(rule)
return # superfluous return
def create_pac_file(self):
self.proxy_pac_init()
self.proxy_pac = self.proxy_pac_preamble \
+ "\n".join(["// " + l for l in self.easylist_strategy.split("\n")]) \
+ self.js_init_object('good_da_host_exact') \
+ self.js_init_regexp('good_da_host_regex', True) \
+ self.js_init_object('good_da_hostpath_exact') \
+ self.js_init_regexp('good_da_hostpath_regex', True) \
+ self.js_init_regexp('good_da_regex', True) \
+ self.js_init_object('good_da_host_exceptions_exact') \
+ self.js_init_object('bad_da_host_exact') \
+ self.js_init_regexp('bad_da_host_regex', True) \
+ self.js_init_object('bad_da_hostpath_exact') \
+ self.js_init_regexp('bad_da_hostpath_regex', True) \
+ self.js_init_regexp('bad_da_regex', True) \
+ self.js_init_regexp('good_url_parts') \
+ self.js_init_regexp('bad_url_parts') \
+ self.js_init_regexp('good_url_regex', regex_flag=True) \
+ self.js_init_regexp('bad_url_regex', regex_flag=True) \
+ self.proxy_pac_postamble
for l in ['good_da_host_exact',
'good_da_host_regex',
'good_da_hostpath_exact',
'good_da_hostpath_regex',
'good_da_regex',
'good_da_host_exceptions_exact',
'bad_da_host_exact',
'bad_da_host_regex',
'bad_da_hostpath_exact',
'bad_da_hostpath_regex',
'bad_da_regex',
'good_url_parts',
'bad_url_parts',
'good_url_regex',
'bad_url_regex']:
print("{}: {:d} rules".format(l, len(globals()[l])), flush=True)
with open(os.path.join(self.easylist_dir, 'proxy.pac'), 'w', encoding='utf-8') as fd:
fd.write(self.proxy_pac)
def proxy_pac_init(self):
self.pac_proxy = 'PROXY {}'.format(self.proxy_host_port) if self.proxy_host_port else 'DIRECT'
# define a default, user-supplied FindProxyForURL function
self.default_FindProxyForURL_function = '''\
function FindProxyForURL(url, host)
{
if (
isPlainHostName(host) ||
shExpMatch(host, "10.*") ||
shExpMatch(host, "172.16.*") ||
shExpMatch(host, "192.168.*") ||
shExpMatch(host, "127.*") ||
dnsDomainIs(host, ".local") || dnsDomainIs(host, ".LOCAL")
)
return "DIRECT";
else if (
/*
Proxy bypass hostnames
*/
/*
Fix iOS 13 PAC file issue with Mail.app
See: https://forums.developer.apple.com/thread/121928
*/
// Apple
(host == "imap.mail.me.com") || (host == "smtp.mail.me.com") ||
dnsDomainIs(host, "imap.mail.me.com") || dnsDomainIs(host, "smtp.mail.me.com") ||
(host == "p03-imap.mail.me.com") || (host == "p03-smtp.mail.me.com") ||
dnsDomainIs(host, "p03-imap.mail.me.com") || dnsDomainIs(host, "p03-smtp.mail.me.com") ||
(host == "p66-imap.mail.me.com") || (host == "p66-smtp.mail.me.com") ||
dnsDomainIs(host, "p66-imap.mail.me.com") || dnsDomainIs(host, "p66-smtp.mail.me.com") ||
// Google
(host == "imap.gmail.com") || (host == "smtp.gmail.com") ||
dnsDomainIs(host, "imap.gmail.com") || dnsDomainIs(host, "smtp.gmail.com") ||
// Yahoo
(host == "imap.mail.yahoo.com") || (host == "smtp.mail.yahoo.com") ||
dnsDomainIs(host, "imap.mail.yahoo.com") || dnsDomainIs(host, "smtp.mail.yahoo.com") ||
// Comcast
(host == "imap.comcast.net") || (host == "smtp.comcast.net") ||
dnsDomainIs(host, "imap.comcast.net") || dnsDomainIs(host, "smtp.comcast.net") ||
// Apple Enterprise Network Domains; https://support.apple.com/en-us/HT210060
(host == "albert.apple.com") || dnsDomainIs(host, "albert.apple.com") ||
(host == "captive.apple.com") || dnsDomainIs(host, "captive.apple.com") ||
(host == "gs.apple.com") || dnsDomainIs(host, "gs.apple.com") ||
(host == "humb.apple.com") || dnsDomainIs(host, "humb.apple.com") ||
(host == "static.ips.apple.com") || dnsDomainIs(host, "static.ips.apple.com") ||
(host == "tbsc.apple.com") || dnsDomainIs(host, "tbsc.apple.com") ||
(host == "time-ios.apple.com") || dnsDomainIs(host, "time-ios.apple.com") ||
(host == "time.apple.com") || dnsDomainIs(host, "time.apple.com") ||
(host == "time-macos.apple.com") || dnsDomainIs(host, "time-macos.apple.com") ||
dnsDomainIs(host, ".push.apple.com") ||
(host == "gdmf.apple.com") || dnsDomainIs(host, "gdmf.apple.com") ||
(host == "deviceenrollment.apple.com") || dnsDomainIs(host, "deviceenrollment.apple.com") ||
(host == "deviceservices-external.apple.com") || dnsDomainIs(host, "deviceservices-external.apple.com") ||
(host == "identity.apple.com") || dnsDomainIs(host, "identity.apple.com") ||
(host == "iprofiles.apple.com") || dnsDomainIs(host, "iprofiles.apple.com") ||
(host == "mdmenrollment.apple.com") || dnsDomainIs(host, "mdmenrollment.apple.com") ||
(host == "setup.icloud.com") || dnsDomainIs(host, "setup.icloud.com") ||
(host == "appldnld.apple.com") || dnsDomainIs(host, "appldnld.apple.com") ||
(host == "gg.apple.com") || dnsDomainIs(host, "gg.apple.com") ||
(host == "gnf-mdn.apple.com") || dnsDomainIs(host, "gnf-mdn.apple.com") ||
(host == "gnf-mr.apple.com") || dnsDomainIs(host, "gnf-mr.apple.com") ||
(host == "gs.apple.com") || dnsDomainIs(host, "gs.apple.com") ||
(host == "ig.apple.com") || dnsDomainIs(host, "ig.apple.com") ||
(host == "mesu.apple.com") || dnsDomainIs(host, "mesu.apple.com") ||
(host == "oscdn.apple.com") || dnsDomainIs(host, "oscdn.apple.com") ||
(host == "osrecovery.apple.com") || dnsDomainIs(host, "osrecovery.apple.com") ||
(host == "skl.apple.com") || dnsDomainIs(host, "skl.apple.com") ||
(host == "swcdn.apple.com") || dnsDomainIs(host, "swcdn.apple.com") ||
(host == "swdist.apple.com") || dnsDomainIs(host, "swdist.apple.com") ||
(host == "swdownload.apple.com") || dnsDomainIs(host, "swdownload.apple.com") ||
(host == "swpost.apple.com") || dnsDomainIs(host, "swpost.apple.com") ||
(host == "swscan.apple.com") || dnsDomainIs(host, "swscan.apple.com") ||
(host == "updates-http.cdn-apple.com") || dnsDomainIs(host, "updates-http.cdn-apple.com") ||
(host == "updates.cdn-apple.com") || dnsDomainIs(host, "updates.cdn-apple.com") ||
(host == "xp.apple.com") || dnsDomainIs(host, "xp.apple.com") ||
dnsDomainIs(host, ".itunes.apple.com") ||
dnsDomainIs(host, ".apps.apple.com") ||
dnsDomainIs(host, ".mzstatic.com") ||
(host == "ppq.apple.com") || dnsDomainIs(host, "ppq.apple.com") ||
(host == "lcdn-registration.apple.com") || dnsDomainIs(host, "lcdn-registration.apple.com") ||
(host == "crl.apple.com") || dnsDomainIs(host, "crl.apple.com") ||
(host == "crl.entrust.net") || dnsDomainIs(host, "crl.entrust.net") ||
(host == "crl3.digicert.com") || dnsDomainIs(host, "crl3.digicert.com") ||
(host == "crl4.digicert.com") || dnsDomainIs(host, "crl4.digicert.com") ||
(host == "ocsp.apple.com") || dnsDomainIs(host, "ocsp.apple.com") ||
(host == "ocsp.digicert.com") || dnsDomainIs(host, "ocsp.digicert.com") ||
(host == "ocsp.entrust.net") || dnsDomainIs(host, "ocsp.entrust.net") ||
(host == "ocsp.verisign.net") || dnsDomainIs(host, "ocsp.verisign.net") ||
// Zoom
dnsDomainIs(host, ".zoom.us")
)
return "PROXY localhost:3128";
else
return "PROXY localhost:3128";
}
'''
if os.path.isfile(self.orig_pac_file):
with open(self.orig_pac_file, 'r', encoding='utf-8') as fd:
self.original_FindProxyForURL_function = fd.read()
else:
self.original_FindProxyForURL_function = self.default_FindProxyForURL_function
# change last 'return "PROXY ..."' to 'return EasyListFindProxyForURL(url, host)'
def re_sub_last(pattern, repl, string, **kwargs):
'''re.sub on the last match in a string'''
# ensure that pattern is grouped
# (note that (?:) is not caught)
pattern_grouped = pattern if bool(re.match(r'\(.+\)',pattern)) else r'({})'.format(pattern)
spl = re.split(pattern_grouped, string, **kwargs)
if len(spl) == 1: return string
spl[-2] = re.sub(pattern, repl, spl[-2], **kwargs)
return ''.join(spl)
self.original_FindProxyForURL_function = re_sub_last(r'return[\s]+"PROXY[^"]+"', 'return EasyListFindProxyForURL(url, host)',
self.original_FindProxyForURL_function)
# proxy.pac preamble
self.calling_command = ' '.join([os.path.basename(sys.argv[0])] + sys.argv[1:])
self.proxy_pac_preamble = '''\
// PAC (Proxy Auto Configuration) Filter from EasyList rules
//
// Copyright (C) 2017 by Steven T. Smith <steve dot t dot smith at gmail dot com>, GPL
// https://github.com/essandess/easylist-pac-privoxy/
//
// PAC file created on {}
// Created with command: {}
//
// http://www.gnu.org/licenses/lgpl.txt
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// If you normally use a proxy, replace "DIRECT" below with
// "PROXY MACHINE:PORT"
// where MACHINE is the IP address or host name of your proxy
// server and PORT is the port number of your proxy server.
//
// Influenced in part by code from King of the PAC from http://securemecca.com/pac.html
// Define the blackhole proxy for blocked adware and trackware
var normal = "DIRECT";
var proxy = "{}"; // e.g. 127.0.0.1:3128
// var blackhole_ip_port = "127.0.0.1:8119"; // ngnix-hosted blackhole
// var blackhole_ip_port = "8.8.8.8:53"; // GOOG DNS blackhole; do not use: no longer works with iOS 11—causes long waits on some sites
var blackhole_ip_port = "{}"; // on iOS a working blackhole requires return code 200;
// e.g. use the adblock2privoxy nginx server as a blackhole
var blackhole = "PROXY " + blackhole_ip_port;
// The hostnames must be consistent with EasyList format.
// These special RegExp characters will be escaped below: [.?+@]
// This EasyList wildcard will be transformed to an efficient RegExp: *
//
// EasyList format references:
// https://adblockplus.org/filters
// https://adblockplus.org/filter-cheatsheet
// Create object hashes or compile efficient NFA's from all filters
// Various alternate filtering and regex approaches were timed using node and at jsperf.com
// Too many rules (>~ 10k) bog down the browser; make reasonable exclusions here:
'''.format(time.strftime("%a, %d %b %Y %X GMT", time.gmtime()),self.calling_command,self.pac_proxy,self.blackhole_ip_port)
self.proxy_pac_postamble = '''
// Add any good networks here. Format is network folowed by a comma and
// optional white space, and then the netmask.
// LAN, loopback, Apple (direct and Akamai e.g. e4805.a.akamaiedge.net), Microsoft (updates and services)
// Apple Enterprise Network; https://support.apple.com/en-us/HT210060
var GoodNetworks_Array = [ "10.0.0.0, 255.0.0.0",
"172.16.0.0, 255.240.0.0",
"17.248.128.0, 255.255.192.0",
"17.250.64.0, 255.255.192.0",
"17.248.192.0, 255.255.224.0",
"192.168.0.0, 255.255.0.0",
"127.0.0.0, 255.0.0.0",
"17.0.0.0, 255.0.0.0",
"23.2.8.68, 255.255.255.255",
"23.2.145.78, 255.255.255.255",
"23.39.179.17, 255.255.255.255",
"23.63.98.0, 255.255.254.0",
"104.70.71.223, 255.255.255.255",
"104.73.77.224, 255.255.255.255",
"104.96.184.235, 255.255.255.255",
"104.96.188.194, 255.255.255.255",
"65.52.0.0, 255.255.252.0" ];
// Apple iAd, Microsoft telemetry
var GoodNetworks_Exceptions_Array = [ "17.172.28.11, 255.255.255.255",
"134.170.30.202, 255.255.255.255",
"137.116.81.24, 255.255.255.255",
"157.56.106.189, 255.255.255.255",
"184.86.53.99, 255.255.255.255",
"2.22.61.43, 255.255.255.255",
"2.22.61.66, 255.255.255.255",
"204.79.197.200, 255.255.255.255",
"23.218.212.69, 255.255.255.255",
"65.39.117.230, 255.255.255.255",
"65.52.108.33, 255.255.255.255",
"65.55.108.23, 255.255.255.255",
"64.4.54.254, 255.255.255.255" ];
// Akamai: 23.64.0.0/14, 23.0.0.0/12, 23.32.0.0/11, 104.64.0.0/10
// Add any bad networks here. Format is network folowed by a comma and
// optional white space, and then the netmask.
// From securemecca.com: Adobe marketing cloud, 2o7, omtrdc, Sedo domain parking, flyingcroc, accretive
var BadNetworks_Array = [ "61.139.105.128, 255.255.255.192",
"63.140.35.160, 255.255.255.248",
"63.140.35.168, 255.255.255.252",
"63.140.35.172, 255.255.255.254",
"63.140.35.174, 255.255.255.255",
"66.150.161.32, 255.255.255.224",
"66.235.138.0, 255.255.254.0",
"66.235.141.0, 255.255.255.0",
"66.235.143.48, 255.255.255.254",
"66.235.143.64, 255.255.255.254",
"66.235.153.16, 255.255.255.240",
"66.235.153.32, 255.255.255.248",
"81.31.38.0, 255.255.255.128",
"82.98.86.0, 255.255.255.0",
"89.185.224.0, 255.255.224.0",
"207.66.128.0, 255.255.128.0" ];
// block these schemes; use the command line for ftp, rsync, etc. instead
var bad_schemes_RegExp = RegExp("^(?:ftp|sftp|tftp|ftp-data|rsync|finger|gopher)", "i")
// RegExp for schemes; lengths from
// perl -lane 'BEGIN{$l=0;} {!/^#/ && do{$ll=length($F[0]); if($ll>$l){$l=$ll;}};} END{print $l;}' /etc/services
var schemepart_RegExp = RegExp("^([\\\\w*+-]{2,15}):\\\\/{0,2}","i");
var hostpart_RegExp = RegExp("^((?:[\\\\w-]+\\\\.)+[a-zA-Z0-9-]{2,24}\\\\.?)", "i");
var querypart_RegExp = RegExp("^((?:[\\\\w-]+\\\\.)+[a-zA-Z0-9-]{2,24}\\\\.?[\\\\w~%.\\\\/^*-]*)(\\\\??\\\\S*?)$", "i");
var domainpart_RegExp = RegExp("^(?:[\\\\w-]+\\\\.)*((?:[\\\\w-]+\\\\.)[a-zA-Z0-9-]{2,24})\\\\.?", "i");
//////////////////////////////////////////////////
// Define the is_ipv4_address function and vars //
//////////////////////////////////////////////////
var ipv4_RegExp = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/;
function is_ipv4_address(host)
{
var ipv4_pentary = host.match(ipv4_RegExp);
var is_valid_ipv4 = false;
if (ipv4_pentary) {
is_valid_ipv4 = true;
for( i = 1; i <= 4; i++) {
if (ipv4_pentary[i] >= 256) {
is_valid_ipv4 = false;
}
}
}
return is_valid_ipv4;
}
// object hashes
// Note: original stackoverflow-based hasOwnProperty does not woth within iOS kernel
var hasOwnProperty = function(obj, prop) {
return obj.hasOwnProperty(prop);
}
/////////////////////
// Done Setting Up //
/////////////////////
// debug with Chrome at chrome://net-export
// alert("Debugging message.")
//////////////////////////////////
// Define the FindProxyFunction //
//////////////////////////////////
var use_pass_rules_parts_flag = true; // use the pass rules for url parts, then apply the block rules
var alert_flag = false; // use for short-circuit '&&' to print debugging statements
var debug_flag = false; // use for short-circuit '&&' to print debugging statements
// EasyList filtering for FindProxyForURL(url, host)
function EasyListFindProxyForURL(url, host)
{
var host_is_ipv4 = is_ipv4_address(host);
var host_ipv4_address;
alert_flag && alert("url is: " + url);
alert_flag && alert("host is: " + host);
// Extract scheme and url without scheme
var scheme = url.match(schemepart_RegExp)
scheme = scheme.length > 0? scheme[1] : "";
// Remove the scheme and extract the path for regex efficiency
var url_noscheme = url.replace(schemepart_RegExp,"");
var url_pathonly = url_noscheme.replace(hostpart_RegExp,"");
var url_noquery = url_noscheme.replace(querypart_RegExp,"$1");
// Remove the server name from the url and host if host is not an IPv4 address
var url_noserver = !host_is_ipv4 ? url_noscheme.replace(domainpart_RegExp,"$1") : url_noscheme;
var url_noservernoquery = !host_is_ipv4 ? url_noquery.replace(domainpart_RegExp,"$1") : url_noscheme;
var host_noserver = !host_is_ipv4 ? host.replace(domainpart_RegExp,"$1") : host;
// Debugging results
if (debug_flag && alert_flag) {
alert("url_noscheme is: " + url_noscheme);
alert("url_pathonly is: " + url_pathonly);
alert("url_noquery is: " + url_noquery);
alert("url_noserver is: " + url_noserver);
alert("url_noservernoquery is: " + url_noservernoquery);
alert("host_noserver is: " + host_noserver);
}
// Short circuit to blackhole for good_da_host_exceptions
if ( hasOwnProperty(good_da_host_exceptions_exact_JSON,host) ) {
alert_flag && alert("good_da_host_exceptions_exact_JSON blackhole!");
return blackhole;
}
///////////////////////////////////////////////////////////////////////
// Check to make sure we can get an IPv4 address from the given host //
// name. If we cannot do that then skip the Networks tests. //
///////////////////////////////////////////////////////////////////////
host_ipv4_address = host_is_ipv4 ? host : (isResolvable(host) ? dnsResolve(host) : false);
if (host_ipv4_address) {
alert_flag && alert("host ipv4 address is: " + host_ipv4_address);
/////////////////////////////////////////////////////////////////////////////
// If the IP translates to one of the GoodNetworks_Array (with exceptions) //
// we pass it because it is considered safe. //
/////////////////////////////////////////////////////////////////////////////
for (i in GoodNetworks_Exceptions_Array) {
tmpNet = GoodNetworks_Exceptions_Array[i].split(/,\s*/);
if (isInNet(host_ipv4_address, tmpNet[0], tmpNet[1])) {
alert_flag && alert("GoodNetworks_Exceptions_Array Blackhole: " + host_ipv4_address);
return blackhole;
}
}
for (i in GoodNetworks_Array) {
tmpNet = GoodNetworks_Array[i].split(/,\s*/);
if (isInNet(host_ipv4_address, tmpNet[0], tmpNet[1])) {
alert_flag && alert("GoodNetworks_Array PASS: " + host_ipv4_address);
return proxy;
}
}
///////////////////////////////////////////////////////////////////////
// If the IP translates to one of the BadNetworks_Array we fail it //