@@ -386,6 +386,8 @@ def scan_rec(self, path, scan_like_data,filenames_set,check_dev=True,dev_call=No
386
386
self_scan_rec = self .scan_rec
387
387
388
388
filenames_set_add = filenames_set .add
389
+ self_header_ext_stats = self .header .ext_stats
390
+ self_header_ext_stats_size = self .header .ext_stats_size
389
391
try :
390
392
with scandir (path ) as res :
391
393
@@ -402,7 +404,10 @@ def scan_rec(self, path, scan_like_data,filenames_set,check_dev=True,dev_call=No
402
404
403
405
is_dir ,is_file ,is_symlink = entry .is_dir (),entry .is_file (),entry .is_symlink ()
404
406
405
- self .ext_statistics [pathlib_Path (entry ).suffix ]+= 1
407
+ ext = pathlib_Path (entry ).suffix
408
+
409
+ if is_file :
410
+ self_header_ext_stats [ext ]+= 1
406
411
407
412
self .info_line_current = entry_name
408
413
@@ -450,6 +455,7 @@ def scan_rec(self, path, scan_like_data,filenames_set,check_dev=True,dev_call=No
450
455
else :
451
456
has_files = False
452
457
size = int (stat_res .st_size )
458
+ self_header_ext_stats_size [ext ]+= size
453
459
454
460
local_folder_size += size
455
461
@@ -478,7 +484,8 @@ def scan(self,cde_list,check_dev=True):
478
484
479
485
self .header .sum_size = 0
480
486
481
- self .ext_statistics = defaultdict (int )
487
+ self .header .ext_stats = defaultdict (int )
488
+ self .header .ext_stats_size = defaultdict (int )
482
489
self .scan_data = {}
483
490
484
491
#########################
@@ -508,9 +515,6 @@ def scan(self,cde_list,check_dev=True):
508
515
509
516
self .info_line = ''
510
517
511
- #for ext,stat in sorted(self.ext_statistics.items(),key = lambda x : x[1],reverse=True):
512
- # print(ext,stat)
513
-
514
518
def prepare_customdata_pool_rec (self ,scan_like_data ,parent_path ):
515
519
scan_path = self .header .scan_path
516
520
self_prepare_customdata_pool_rec = self .prepare_customdata_pool_rec
@@ -602,6 +606,13 @@ def threaded_cde(timeout_semi_list):
602
606
time_start_all = perf_counter ()
603
607
604
608
aborted_string = 'Custom data extraction was aborted.'
609
+
610
+ files_cde_errors_quant = defaultdict (int )
611
+
612
+ files_cde_quant = 0
613
+ files_cde_size = 0
614
+ files_cde_size_extracted = 0
615
+
605
616
for (scan_like_list ,subpath ,rule_nr ,size ) in self .customdata_pool .values ():
606
617
607
618
self .killed = False
@@ -629,9 +640,7 @@ def threaded_cde(timeout_semi_list):
629
640
subprocess = uni_popen (command ,shell )
630
641
timeout_semi_list [0 ]= (timeout_val ,subprocess )
631
642
except Exception as re :
632
- #print('threaded_cde error:',re)
633
- subprocess = None
634
- timeout_semi_list [0 ]= (timeout_val ,subprocess )
643
+ timeout_semi_list [0 ]= None
635
644
returncode = 201
636
645
output = str (re )
637
646
else :
@@ -655,20 +664,21 @@ def threaded_cde(timeout_semi_list):
655
664
output_list_append ('Killed.' )
656
665
657
666
output = '\n ' .join (output_list ).strip ()
667
+ if not output :
668
+ returncode = 203
658
669
659
670
#####################################
660
671
661
672
time_end = perf_counter ()
662
673
customdata_stats_time [rule_nr ]+= time_end - time_start
663
674
664
675
if returncode or self .killed or aborted :
665
- self_header .files_cde_errors_quant [rule_nr ]+= 1
666
- self_header .files_cde_errors_quant_all += 1
676
+ files_cde_errors_quant [rule_nr ]+= 1
667
677
668
678
if not aborted :
669
- self_header . files_cde_quant += 1
670
- self_header . files_cde_size += size
671
- self_header . files_cde_size_extracted += asizeof (output )
679
+ files_cde_quant += 1
680
+ files_cde_size += size
681
+ files_cde_size_extracted += asizeof (output )
672
682
673
683
new_elem = {}
674
684
new_elem ['cd_ok' ]= bool (returncode == 0 and not self .killed and not aborted )
@@ -694,6 +704,13 @@ def threaded_cde(timeout_semi_list):
694
704
695
705
time_end_all = perf_counter ()
696
706
707
+ self_header .files_cde_errors_quant = files_cde_errors_quant
708
+ self_header .files_cde_errors_quant_all = sum (files_cde_errors_quant .values ())
709
+
710
+ self_header .files_cde_quant = files_cde_quant
711
+ self_header .files_cde_size = files_cde_size
712
+ self_header .files_cde_size_extracted = files_cde_size_extracted
713
+
697
714
customdata_stats_time_all [0 ]= time_end_all - time_start_all
698
715
sys .exit () #thread
699
716
@@ -709,7 +726,7 @@ def threaded_cde(timeout_semi_list):
709
726
kill_subprocess (subprocess )
710
727
self .killed = True
711
728
else :
712
- sleep (0.2 )
729
+ sleep (0.1 )
713
730
714
731
cde_thread .join ()
715
732
@@ -1142,6 +1159,28 @@ def prepare_info(self):
1142
1159
info_list .append (' ' + ' ' .join (line_list ))
1143
1160
self .txtinfo_basic = self .txtinfo_basic + f'\n \n { loaded_fs_info } \n { loaded_cd_info } '
1144
1161
1162
+ try :
1163
+ longest = max ({len (ext ) for ext in self .header .ext_stats })+ 2
1164
+
1165
+ sublist = []
1166
+ for ext ,ext_stat in sorted (self .header .ext_stats .items (),key = lambda x : x [1 ],reverse = True ):
1167
+ sublist .append (f'{ ext .ljust (longest )} { fnumber (ext_stat ).rjust (12 )} { bytes_to_str (self .header .ext_stats_size [ext ]).rjust (12 )} ' )
1168
+ info_list .append ('' )
1169
+ info_list .append ('Files extensions statistics by quantity:' )
1170
+ info_list .append ('========================================' )
1171
+ info_list .extend (sublist )
1172
+
1173
+ sublist_size = []
1174
+ for ext ,ext_stat in sorted (self .header .ext_stats_size .items (),key = lambda x : x [1 ],reverse = True ):
1175
+ sublist_size .append (f'{ ext .ljust (longest )} { bytes_to_str (self .header .ext_stats_size [ext ]).rjust (12 )} { fnumber (self .header .ext_stats [ext ]).rjust (12 )} ' )
1176
+ info_list .append ('' )
1177
+ info_list .append ('Files extensions statistics by sum size:' )
1178
+ info_list .append ('========================================' )
1179
+ info_list .extend (sublist_size )
1180
+ except Exception as se :
1181
+ #print(se)
1182
+ pass
1183
+
1145
1184
self .txtinfo = '\n ' .join (info_list )
1146
1185
1147
1186
def has_cd (self ):
@@ -1485,20 +1524,39 @@ def find_items_in_records(self,
1485
1524
1486
1525
records_to_process .sort (reverse = True ,key = lambda x : x .header .quant_files )
1487
1526
1527
+ params = (size_min ,size_max ,
1528
+ t_min ,t_max ,
1529
+ find_filename_search_kind ,name_expr ,name_case_sens ,
1530
+ find_cd_search_kind ,cd_expr ,cd_case_sens ,
1531
+ filename_fuzzy_threshold ,cd_fuzzy_threshold )
1532
+
1533
+ searchinfofile = sep .join ([self .db_dir ,'searchinfo' ])
1534
+ try :
1535
+ with open (searchinfofile , "wb" ) as f :
1536
+ f .write (ZstdCompressor (level = 8 ,threads = 1 ).compress (dumps (params )))
1537
+ except Exception as e :
1538
+ print (e )
1539
+
1488
1540
record_commnad_list = {}
1489
1541
is_frozen = bool (getattr (sys , 'frozen' , False ))
1490
1542
1491
1543
for record_nr ,record in enumerate (records_to_process ):
1544
+ curr_command_list = record_commnad_list [record_nr ] = []
1545
+
1492
1546
if windows :
1493
1547
if is_frozen :
1494
- curr_command_list = record_commnad_list [ record_nr ] = [ 'record.exe' , 'load' , record . file_path ]
1548
+ curr_command_list . append ( 'record.exe' )
1495
1549
else :
1496
- curr_command_list = record_commnad_list [ record_nr ] = [ 'python' ,'src\\ record.py' , 'load' , record . file_path ]
1550
+ curr_command_list . extend ([ 'python' ,'src\\ record.py' ])
1497
1551
else :
1498
1552
if is_frozen :
1499
- curr_command_list = record_commnad_list [ record_nr ] = [ './record' , 'load' , record . file_path ]
1553
+ curr_command_list . append ( './record' )
1500
1554
else :
1501
- curr_command_list = record_commnad_list [record_nr ] = ['python3' ,'./src/record.py' , 'load' ,record .file_path ]
1555
+ curr_command_list .extend (['python3' ,'./src/record.py' ])
1556
+
1557
+ curr_command_list .extend (['search' ,record .file_path ])
1558
+
1559
+ curr_command_list .append (searchinfofile )
1502
1560
1503
1561
if t_min :
1504
1562
curr_command_list .extend ( ['--timestamp_min' ,str (t_min ) ] )
0 commit comments