Skip to content

Commit

Permalink
added test to validate creation/read token in cache
Browse files Browse the repository at this point in the history
  • Loading branch information
Claudio-code committed Dec 23, 2024
1 parent cafb350 commit efc6ccd
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ public class AnthropicApi {

private static final String HEADER_ANTHROPIC_BETA = "anthropic-beta";

public static final String BETA_PROMPT_CACHING = "prompt-caching-2024-07-31";

private static final Predicate<String> SSE_DONE_PREDICATE = "[DONE]"::equals;

private final RestClient restClient;
Expand Down Expand Up @@ -905,7 +903,9 @@ public record ChatCompletionResponse(
public record Usage(
// @formatter:off
@JsonProperty("input_tokens") Integer inputTokens,
@JsonProperty("output_tokens") Integer outputTokens) {
@JsonProperty("output_tokens") Integer outputTokens,
@JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens,
@JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) {
// @formatter:off
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) {

if (messageDeltaEvent.usage() != null) {
var totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
messageDeltaEvent.usage().outputTokens());
messageDeltaEvent.usage().outputTokens(),
contentBlockReference.get().usage.cacheCreationInputTokens(),
contentBlockReference.get().usage.cacheReadInputTokens());
contentBlockReference.get().withUsage(totalUsage);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
import org.springframework.ai.anthropic.api.AnthropicApi.Role;
import org.springframework.ai.retry.RetryUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.web.client.RestClient;
import org.springframework.web.reactive.function.client.WebClient;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
Expand All @@ -46,17 +43,30 @@ public class AnthropicApiIT {

@Test
void chatWithPromptCache() {
AnthropicApi anthropicApiBeta = new AnthropicApi(AnthropicApi.DEFAULT_BASE_URL,
System.getenv("ANTHROPIC_API_KEY"), AnthropicApi.DEFAULT_ANTHROPIC_VERSION, RestClient.builder(),
WebClient.builder(), RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER, AnthropicApi.BETA_PROMPT_CACHING);
String userMessageText = "It could be either a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which "
+ "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" "
+ "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English "
+ "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". "
+ "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\"";

AnthropicMessage chatCompletionMessage = new AnthropicMessage(
List.of(new ContentBlock("Tell me a Joke?", AnthropicCacheType.EPHEMERAL.cacheControl())), Role.USER);
List.of(new ContentBlock(userMessageText.repeat(20), AnthropicCacheType.EPHEMERAL.cacheControl())),
Role.USER);

ResponseEntity<ChatCompletionResponse> response = anthropicApiBeta
.chatCompletionEntity(new ChatCompletionRequest(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(),
List.of(chatCompletionMessage), null, 100, 0.8, false));
ChatCompletionRequest chatCompletionRequest = new ChatCompletionRequest(
AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), List.of(chatCompletionMessage), null, 100, 0.8,
false);
AnthropicApi.Usage createdCacheToken = anthropicApi.chatCompletionEntity(chatCompletionRequest)
.getBody()
.usage();

assertThat(response).isNotNull();
assertThat(createdCacheToken.cacheCreationInputTokens()).isGreaterThan(0);
assertThat(createdCacheToken.cacheReadInputTokens()).isEqualTo(0);

AnthropicApi.Usage readCacheToken = anthropicApi.chatCompletionEntity(chatCompletionRequest).getBody().usage();

assertThat(readCacheToken.cacheCreationInputTokens()).isEqualTo(0);
assertThat(readCacheToken.cacheReadInputTokens()).isGreaterThan(0);
}

@Test
Expand Down

0 comments on commit efc6ccd

Please sign in to comment.