From b801b5b2b2999a8d4667d79702afe51f490ad8b9 Mon Sep 17 00:00:00 2001 From: cr7258 Date: Thu, 13 Mar 2025 22:58:22 +0800 Subject: [PATCH] doc: add ai statistics metric doc --- .../extensions/ai-statistics/README.md | 40 ++++++++++++++++-- .../extensions/ai-statistics/README_EN.md | 42 +++++++++++++++++-- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/plugins/wasm-go/extensions/ai-statistics/README.md b/plugins/wasm-go/extensions/ai-statistics/README.md index fad5ebfc4e..331c0410a8 100644 --- a/plugins/wasm-go/extensions/ai-statistics/README.md +++ b/plugins/wasm-go/extensions/ai-statistics/README.md @@ -100,11 +100,43 @@ attributes: apply_to_span: false ``` #### 监控 + +``` +# counter 类型,输入 token 数量的累加值 +route_upstream_model_consumer_metric_input_token{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 24 + +# counter 类型,输出 token 数量的累加值 +route_upstream_model_consumer_metric_output_token{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 507 + +# counter 类型,流式请求和非流式请求消耗总时间的累加值 +route_upstream_model_consumer_metric_llm_service_duration{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 6470 + +# counter 类型,流式请求和非流式请求次数的累加值 +route_upstream_model_consumer_metric_llm_duration_count{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 2 + +# counter 类型,流式请求首个 token 延时的累加值 +route_upstream_model_consumer_metric_llm_first_token_duration{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 340 + +# counter 类型,流式请求次数的累加值 +route_upstream_model_consumer_metric_llm_stream_duration_count{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 1 +``` + +以下是使用指标的几个示例: + +流式请求首个 token 的平均延时: + +``` +irate(route_upstream_model_consumer_metric_llm_first_token_duration[2m]) +/ +irate(route_upstream_model_consumer_metric_llm_stream_duration_count[2m]) +``` + +流式请求和非流式请求平均消耗的总时长: + ``` -route_upstream_model_metric_input_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 343 -route_upstream_model_metric_output_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 153 -route_upstream_model_metric_llm_service_duration{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 3725 -route_upstream_model_metric_llm_duration_count{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 1 +irate(route_upstream_model_consumer_metric_llm_service_duration[2m]) +/ +irate(route_upstream_model_consumer_metric_llm_duration_count[2m]) ``` #### 日志 diff --git a/plugins/wasm-go/extensions/ai-statistics/README_EN.md b/plugins/wasm-go/extensions/ai-statistics/README_EN.md index 37e58186db..8935e5bcd4 100644 --- a/plugins/wasm-go/extensions/ai-statistics/README_EN.md +++ b/plugins/wasm-go/extensions/ai-statistics/README_EN.md @@ -100,11 +100,45 @@ attributes: apply_to_span: false ``` #### Metric + +Here is the English translation: + +``` +# counter, cumulative count of input tokens +route_upstream_model_consumer_metric_input_token{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 24 + +# counter, cumulative count of output tokens +route_upstream_model_consumer_metric_output_token{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 507 + +# counter, cumulative total duration of both streaming and non-streaming requests +route_upstream_model_consumer_metric_llm_service_duration{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 6470 + +# counter, cumulative count of both streaming and non-streaming requests +route_upstream_model_consumer_metric_llm_duration_count{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 2 + +# counter, cumulative latency of the first token in streaming requests +route_upstream_model_consumer_metric_llm_first_token_duration{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 340 + +# counter, cumulative count of streaming requests +route_upstream_model_consumer_metric_llm_stream_duration_count{ai_route="ai-route-aliyun.internal",ai_cluster="outbound|443||llm-aliyun.internal.dns",ai_model="qwen-turbo",ai_consumer="none"} 1 +``` + +Below are some example usages of these metrics: + +Average latency of the first token in streaming requests: + +``` +irate(route_upstream_model_consumer_metric_llm_first_token_duration[2m]) +/ +irate(route_upstream_model_consumer_metric_llm_stream_duration_count[2m]) +``` + +Average process duration of both streaming and non-streaming requests: + ``` -route_upstream_model_metric_input_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 343 -route_upstream_model_metric_output_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 153 -route_upstream_model_metric_llm_service_duration{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 3725 -route_upstream_model_metric_llm_duration_count{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 1 +irate(route_upstream_model_consumer_metric_llm_service_duration[2m]) +/ +irate(route_upstream_model_consumer_metric_llm_duration_count[2m]) ``` #### Log