Skip to content

Commit

Permalink
v0.12.22 (#17969)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich authored Mar 1, 2025
1 parent 7529a74 commit 5815613
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 25 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# ChangeLog

## [2025-02-28]

### `llama-index-core` [0.12.22]

- fix agentworkflow tool call tracking on final response (#17968)

### `llama-index-readers-github` [0.6.0]

- Ensure that Github reader uses timeout and retries params (#17959)

### `llama-index-readers-web` [0.3.7]

- chore: update FireCrawlWebReader integration to support extract (#17957)

## [2025-02-27]

### `llama-index-core` [0.12.21]
Expand Down
16 changes: 15 additions & 1 deletion docs/docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# ChangeLog

## [2025-02-28]

### `llama-index-core` [0.12.22]

- fix agentworkflow tool call tracking on final response (#17968)

### `llama-index-readers-github` [0.6.0]

- Ensure that Github reader uses timeout and retries params (#17959)

### `llama-index-readers-web` [0.3.7]

- chore: update FireCrawlWebReader integration to support extract (#17957)

## [2025-02-27]

### `llama-index-core` [0.12.21]
Expand All @@ -10,7 +24,7 @@
- Feature/remove retriever tool template override (#17909)
- only modify delta if 'Answer:' was actually detected (#17901)
- Fix CitationQueryEngine init function for response_synthesizer (#17897)
- fix ChatSummaryMemoryBuffer._summarize_oldest_chat_history (#17845)
- fix ChatSummaryMemoryBuffer.\_summarize_oldest_chat_history (#17845)
- fix: make base64 detection more robust across the board (#17930)
- fix: stepwise execution breaks when steps do async work (#17914)
- safer workflow cancel + fix restored context bug (#17938)
Expand Down
2 changes: 1 addition & 1 deletion llama-index-core/llama_index/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Init file of LlamaIndex."""

__version__ = "0.12.21"
__version__ = "0.12.22"

import logging
from logging import NullHandler
Expand Down
2 changes: 1 addition & 1 deletion llama-index-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ name = "llama-index-core"
packages = [{include = "llama_index"}]
readme = "README.md"
repository = "https://github.com/run-llama/llama_index"
version = "0.12.21"
version = "0.12.22"

[tool.poetry.dependencies]
SQLAlchemy = {extras = ["asyncio"], version = ">=1.4.49"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,13 @@ def __init__(self, **kwargs: Any) -> None:
self._sync_client = InferenceClient(**self._get_inference_client_kwargs())
self._async_client = AsyncInferenceClient(**self._get_inference_client_kwargs())

# set context window if not provided
info = self._sync_client.get_endpoint_info()
if "max_input_tokens" in info and kwargs.get("context_window") is None:
self.context_window = info["max_input_tokens"]
# set context window if not provided, if we can get the endpoint info
try:
info = self._sync_client.get_endpoint_info()
if "max_input_tokens" in info and kwargs.get("context_window") is None:
self.context_window = info["max_input_tokens"]
except Exception:
pass

def _get_inference_client_kwargs(self) -> Dict[str, Any]:
"""Extract the Hugging Face InferenceClient construction parameters."""
Expand Down Expand Up @@ -224,7 +227,7 @@ def _to_huggingface_messages(

def _parse_streaming_tool_calls(
self, tool_call_strs: List[str]
) -> List[ToolSelection | str]:
) -> List[Union[ToolSelection, str]]:
tool_calls = []
# Try to parse into complete objects, otherwise keep as strings
for tool_call_str in tool_call_strs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-llms-huggingface-api"
readme = "README.md"
version = "0.4.0"
version = "0.4.1"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import pytest
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from huggingface_hub.inference._generated.types import ChatCompletionOutput

STUB_MODEL_NAME = "placeholder_model"
STUB_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"


@pytest.fixture(name="hf_inference_api")
Expand Down Expand Up @@ -45,15 +46,17 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
generated_response = (
" It's based on the book of the same name by James Fenimore Cooper."
)
conversational_return = {
"choices": [
{
"message": {
"content": generated_response,
conversational_return = ChatCompletionOutput.parse_obj(
{
"choices": [
{
"message": {
"content": generated_response,
}
}
}
],
}
],
}
)

with patch.object(
hf_inference_api._sync_client,
Expand All @@ -67,6 +70,8 @@ def test_chat(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
mock_conversational.assert_called_once_with(
messages=[{"role": m.role.value, "content": m.content} for m in messages],
model=STUB_MODEL_NAME,
temperature=0.1,
max_tokens=256,
)

def test_chat_text_generation(
Expand Down Expand Up @@ -97,6 +102,8 @@ def test_chat_text_generation(
assert response.message.content == conversational_return
mock_complete.assert_called_once_with(
"System: You are an expert movie reviewer\nUser: Which movie is the best?\nAssistant:",
model=STUB_MODEL_NAME,
temperature=0.1,
max_new_tokens=256,
)

Expand All @@ -109,5 +116,7 @@ def test_complete(self, hf_inference_api: HuggingFaceInferenceAPI) -> None:
return_value=generated_text,
) as mock_text_generation:
response = hf_inference_api.complete(prompt)
mock_text_generation.assert_called_once_with(prompt, max_new_tokens=256)
mock_text_generation.assert_called_once_with(
prompt, model=STUB_MODEL_NAME, temperature=0.1, max_new_tokens=256
)
assert response.text == generated_text
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ name = "llama-index"
packages = [{from = "_llama-index", include = "llama_index"}]
readme = "README.md"
repository = "https://github.com/run-llama/llama_index"
version = "0.12.21"
version = "0.12.22"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand All @@ -57,7 +57,7 @@ llama-index-agent-openai = "^0.4.0"
llama-index-readers-file = "^0.4.0"
llama-index-readers-llama-parse = ">=0.4.0"
llama-index-indices-managed-llama-cloud = ">=0.4.0"
llama-index-core = "^0.12.21"
llama-index-core = "^0.12.22"
llama-index-multi-modal-llms-openai = "^0.4.0"
llama-index-cli = "^0.4.1"
nltk = ">3.8.1" # avoids a CVE, temp until next release, should be in llama-index-core
Expand Down

0 comments on commit 5815613

Please sign in to comment.