@@ -113,6 +113,13 @@ def preprocess_data(self):
113
113
df .sort_values (by = self .time_col , inplace = True )
114
114
df = df .reset_index (drop = True )
115
115
116
+ # scale resource to 0~100
117
+ resource_max = df [self .resource_col ].max ()
118
+ resource_scaling_factor = 1 if resource_max <= 100 else 10 ** np .ceil (np .log10 (resource_max / 100 ))
119
+ self .logger .info (f'resource scaling factor: { resource_scaling_factor } ' )
120
+ df [self .resource_col ] = df [self .resource_col ] / resource_scaling_factor
121
+ self .resource_target = self .resource_target / resource_scaling_factor
122
+
116
123
features = self .traffic_cols
117
124
118
125
self .logger .info (f'checkout before filtering NaN: '
@@ -628,7 +635,12 @@ def bin2str(x):
628
635
629
636
630
637
class EstimationException (Exception ):
631
- pass
638
+ def __init__ (self , message , info ):
639
+ self .message = message
640
+ self .info = info
641
+
642
+ def __str__ (self ):
643
+ return self .message
632
644
633
645
634
646
def estimate (data : pd .DataFrame ,
@@ -639,7 +651,9 @@ def estimate(data: pd.DataFrame,
639
651
traffic_cols : list [str ],
640
652
resource_target : float ,
641
653
time_delta_hours : int ,
642
- test_dataset_size_in_seconds : int = 86400 ) -> pd .DataFrame :
654
+ test_dataset_size_in_seconds : int = 86400 ,
655
+ min_correlation_allowed : float = 0.9 ,
656
+ max_mse_allowed : float = 10.0 ) -> pd .DataFrame :
643
657
logging .basicConfig (level = logging .INFO ,
644
658
format = '%(asctime)s - %(levelname)s: %(message)s' )
645
659
logger = logging .getLogger ()
@@ -660,19 +674,30 @@ def estimate(data: pd.DataFrame,
660
674
estimator .test ()
661
675
logger .info (f'********* testing cost time: { time .time () - st10 } *********' )
662
676
663
- if (estimator .pearsonr [0 ] >= 0.9 and estimator .pearsonr [1 ] < 0.01
664
- and estimator .big_e_10 == 0 and estimator .mse < 10 ):
677
+ logger .info (f'********* [linear] correlation: { estimator .pearsonr [0 ]} , significance: { estimator .pearsonr [1 ]} , big_e_10: { estimator .big_e_10 } , mse: { estimator .mse } *********' )
678
+ logger .info (f'********* [residual] correlation: { estimator .pearsonr_rf [0 ]} , significance: { estimator .pearsonr_rf [1 ]} , big_e_10: { estimator .big_e_10_rf } , mse: { estimator .mse_rf } *********' )
679
+
680
+ if (estimator .pearsonr [0 ] >= min_correlation_allowed and estimator .pearsonr [1 ] < 0.01
681
+ and estimator .big_e_10 == 0 and estimator .mse <= max_mse_allowed ):
665
682
st10 = time .time ()
666
683
estimator .policy_linear ()
667
684
logger .info (f'********* linear policy cost time: { time .time () - st10 } *********' )
668
685
return estimator .output
669
686
670
- elif (estimator .pearsonr_rf [0 ] >= 0.9 and estimator .pearsonr_rf [1 ] < 0.01 and estimator .big_e_10_rf == 0
671
- and estimator .mse_rf < 10 and estimator .pearsonr [0 ] >= 0.6 and estimator .pearsonr [1 ] < 0.01 ):
687
+ elif (estimator .pearsonr_rf [0 ] >= min_correlation_allowed and estimator .pearsonr_rf [1 ] < 0.01 and estimator .big_e_10_rf == 0
688
+ and estimator .mse_rf <= max_mse_allowed and estimator .pearsonr [0 ] >= 0.6 and estimator .pearsonr [1 ] < 0.01 ):
672
689
st10 = time .time ()
673
690
estimator .policy_residual ()
674
691
logger .info (f'********* residual policy cost time: { time .time () - st10 } *********' )
675
692
return estimator .output
676
693
677
694
else :
678
- raise EstimationException ("no policy fits" )
695
+ raise EstimationException ('no policy fits' ,
696
+ {'linear' : {'correlation' : estimator .pearsonr [0 ],
697
+ 'significance' : estimator .pearsonr [1 ],
698
+ 'big_e_10' : estimator .big_e_10 ,
699
+ 'mse' : estimator .mse },
700
+ 'residual' : {'correlation' : estimator .pearsonr_rf [0 ],
701
+ 'significance' : estimator .pearsonr_rf [1 ],
702
+ 'big_e_10' : estimator .big_e_10_rf ,
703
+ 'mse' : estimator .mse_rf }})
0 commit comments