Skip to content

Commit 1a33534

Browse files
authored
Merge pull request #89 from zqzten/algorithm
Algo: Introduce estimation threshold control to replicas estimation
2 parents 32eaf29 + 53ddab4 commit 1a33534

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

algorithm/kapacity/portrait/horizontal/predictive/main.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@ def parse_args():
8383
parser.add_argument('--re-test-dataset-size-in-seconds',
8484
help='size of test dataset in seconds for replicas estimation model',
8585
required=False, default=86400)
86+
parser.add_argument('--re-min-correlation-allowed',
87+
help='minimum allowed correlation of replicas estimation model,'
88+
'the estimation would fail if the model\'s correlation is lower than this threshold,'
89+
'this arg should be a float number within range [0, 1]',
90+
required=False, default=0.9)
91+
parser.add_argument('--re-max-mse-allowed',
92+
help='maximum allowed MSE of replicas estimation model,'
93+
'the estimation would fail if the model\'s MSE is larger than this threshold,'
94+
'this arg should be a float number within range [0, +∞)',
95+
required=False, default=10.0)
8696
parser.add_argument('--scaling-freq', help='frequency of scaling, the duration should be larger than the frequency'
8797
'of the time series forecasting model',
8898
required=True)
@@ -131,12 +141,14 @@ def predict_replicas(args, metric_ctx, pred_traffics):
131141
traffic_col,
132142
metric_ctx.resource_target,
133143
int(args.re_time_delta_hours),
134-
int(args.re_test_dataset_size_in_seconds))
144+
int(args.re_test_dataset_size_in_seconds),
145+
float(args.re_min_correlation_allowed),
146+
float(args.re_max_mse_allowed))
135147
if 'NO_RESULT' in pred['rule_code'].unique():
136148
raise RuntimeError('there exist points that no replica number would meet the resource target, please consider setting a more reasonable resource target')
137149
return pred
138150
except estimator.EstimationException as e:
139-
raise RuntimeError("replicas estimation failed, this may be caused by insufficient or irregular history data") from e
151+
raise RuntimeError(f'replicas estimation failed, this may be caused by insufficient or irregular history data, detailed estimation info: {e.info}') from e
140152

141153

142154
def merge_history_dict(history_dict):

algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py

+32-7
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ def preprocess_data(self):
113113
df.sort_values(by=self.time_col, inplace=True)
114114
df = df.reset_index(drop=True)
115115

116+
# scale resource to 0~100
117+
resource_max = df[self.resource_col].max()
118+
resource_scaling_factor = 1 if resource_max <= 100 else 10**np.ceil(np.log10(resource_max / 100))
119+
self.logger.info(f'resource scaling factor: {resource_scaling_factor}')
120+
df[self.resource_col] = df[self.resource_col] / resource_scaling_factor
121+
self.resource_target = self.resource_target / resource_scaling_factor
122+
116123
features = self.traffic_cols
117124

118125
self.logger.info(f'checkout before filtering NaN: '
@@ -628,7 +635,12 @@ def bin2str(x):
628635

629636

630637
class EstimationException(Exception):
631-
pass
638+
def __init__(self, message, info):
639+
self.message = message
640+
self.info = info
641+
642+
def __str__(self):
643+
return self.message
632644

633645

634646
def estimate(data: pd.DataFrame,
@@ -639,7 +651,9 @@ def estimate(data: pd.DataFrame,
639651
traffic_cols: list[str],
640652
resource_target: float,
641653
time_delta_hours: int,
642-
test_dataset_size_in_seconds: int = 86400) -> pd.DataFrame:
654+
test_dataset_size_in_seconds: int = 86400,
655+
min_correlation_allowed: float = 0.9,
656+
max_mse_allowed: float = 10.0) -> pd.DataFrame:
643657
logging.basicConfig(level=logging.INFO,
644658
format='%(asctime)s - %(levelname)s: %(message)s')
645659
logger = logging.getLogger()
@@ -660,19 +674,30 @@ def estimate(data: pd.DataFrame,
660674
estimator.test()
661675
logger.info(f'********* testing cost time: {time.time() - st10} *********')
662676

663-
if (estimator.pearsonr[0] >= 0.9 and estimator.pearsonr[1] < 0.01
664-
and estimator.big_e_10 == 0 and estimator.mse < 10):
677+
logger.info(f'********* [linear] correlation: {estimator.pearsonr[0]}, significance: {estimator.pearsonr[1]}, big_e_10: {estimator.big_e_10}, mse: {estimator.mse} *********')
678+
logger.info(f'********* [residual] correlation: {estimator.pearsonr_rf[0]}, significance: {estimator.pearsonr_rf[1]}, big_e_10: {estimator.big_e_10_rf}, mse: {estimator.mse_rf} *********')
679+
680+
if (estimator.pearsonr[0] >= min_correlation_allowed and estimator.pearsonr[1] < 0.01
681+
and estimator.big_e_10 == 0 and estimator.mse <= max_mse_allowed):
665682
st10 = time.time()
666683
estimator.policy_linear()
667684
logger.info(f'********* linear policy cost time: {time.time() - st10} *********')
668685
return estimator.output
669686

670-
elif (estimator.pearsonr_rf[0] >= 0.9 and estimator.pearsonr_rf[1] < 0.01 and estimator.big_e_10_rf == 0
671-
and estimator.mse_rf < 10 and estimator.pearsonr[0] >= 0.6 and estimator.pearsonr[1] < 0.01):
687+
elif (estimator.pearsonr_rf[0] >= min_correlation_allowed and estimator.pearsonr_rf[1] < 0.01 and estimator.big_e_10_rf == 0
688+
and estimator.mse_rf <= max_mse_allowed and estimator.pearsonr[0] >= 0.6 and estimator.pearsonr[1] < 0.01):
672689
st10 = time.time()
673690
estimator.policy_residual()
674691
logger.info(f'********* residual policy cost time: {time.time() - st10} *********')
675692
return estimator.output
676693

677694
else:
678-
raise EstimationException("no policy fits")
695+
raise EstimationException('no policy fits',
696+
{'linear': {'correlation': estimator.pearsonr[0],
697+
'significance': estimator.pearsonr[1],
698+
'big_e_10': estimator.big_e_10,
699+
'mse': estimator.mse},
700+
'residual': {'correlation': estimator.pearsonr_rf[0],
701+
'significance': estimator.pearsonr_rf[1],
702+
'big_e_10': estimator.big_e_10_rf,
703+
'mse': estimator.mse_rf}})

0 commit comments

Comments
 (0)