From efc6ccd1377d1b4289c94bf3849d5c52abe9cf79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cclaudio-code=E2=80=9D?= Date: Mon, 23 Dec 2024 20:09:09 -0300 Subject: [PATCH] added test to validate creation/read token in cache --- .../ai/anthropic/api/AnthropicApi.java | 6 ++-- .../ai/anthropic/api/StreamHelper.java | 4 ++- .../ai/anthropic/api/AnthropicApiIT.java | 32 ++++++++++++------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java index bce3742f27c..dce8e05fabf 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java @@ -76,8 +76,6 @@ public class AnthropicApi { private static final String HEADER_ANTHROPIC_BETA = "anthropic-beta"; - public static final String BETA_PROMPT_CACHING = "prompt-caching-2024-07-31"; - private static final Predicate SSE_DONE_PREDICATE = "[DONE]"::equals; private final RestClient restClient; @@ -905,7 +903,9 @@ public record ChatCompletionResponse( public record Usage( // @formatter:off @JsonProperty("input_tokens") Integer inputTokens, - @JsonProperty("output_tokens") Integer outputTokens) { + @JsonProperty("output_tokens") Integer outputTokens, + @JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens, + @JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) { // @formatter:off } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java index ae62eb0748c..f3a515e324d 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java @@ -174,7 +174,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) { if (messageDeltaEvent.usage() != null) { var totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(), - messageDeltaEvent.usage().outputTokens()); + messageDeltaEvent.usage().outputTokens(), + contentBlockReference.get().usage.cacheCreationInputTokens(), + contentBlockReference.get().usage.cacheReadInputTokens()); contentBlockReference.get().withUsage(totalUsage); } } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java index 8fd9ce31f94..3d53cbd6947 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java @@ -27,10 +27,7 @@ import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse; import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock; import org.springframework.ai.anthropic.api.AnthropicApi.Role; -import org.springframework.ai.retry.RetryUtils; import org.springframework.http.ResponseEntity; -import org.springframework.web.client.RestClient; -import org.springframework.web.reactive.function.client.WebClient; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -46,17 +43,30 @@ public class AnthropicApiIT { @Test void chatWithPromptCache() { - AnthropicApi anthropicApiBeta = new AnthropicApi(AnthropicApi.DEFAULT_BASE_URL, - System.getenv("ANTHROPIC_API_KEY"), AnthropicApi.DEFAULT_ANTHROPIC_VERSION, RestClient.builder(), - WebClient.builder(), RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER, AnthropicApi.BETA_PROMPT_CACHING); + String userMessageText = "It could be either a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which " + + "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" " + + "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English " + + "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". " + + "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\""; + AnthropicMessage chatCompletionMessage = new AnthropicMessage( - List.of(new ContentBlock("Tell me a Joke?", AnthropicCacheType.EPHEMERAL.cacheControl())), Role.USER); + List.of(new ContentBlock(userMessageText.repeat(20), AnthropicCacheType.EPHEMERAL.cacheControl())), + Role.USER); - ResponseEntity response = anthropicApiBeta - .chatCompletionEntity(new ChatCompletionRequest(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), - List.of(chatCompletionMessage), null, 100, 0.8, false)); + ChatCompletionRequest chatCompletionRequest = new ChatCompletionRequest( + AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, + false); + AnthropicApi.Usage createdCacheToken = anthropicApi.chatCompletionEntity(chatCompletionRequest) + .getBody() + .usage(); - assertThat(response).isNotNull(); + assertThat(createdCacheToken.cacheCreationInputTokens()).isGreaterThan(0); + assertThat(createdCacheToken.cacheReadInputTokens()).isEqualTo(0); + + AnthropicApi.Usage readCacheToken = anthropicApi.chatCompletionEntity(chatCompletionRequest).getBody().usage(); + + assertThat(readCacheToken.cacheCreationInputTokens()).isEqualTo(0); + assertThat(readCacheToken.cacheReadInputTokens()).isGreaterThan(0); } @Test