Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enhance multimodal support for images and audio in instructor #1212

Merged
merged 2 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,27 +66,34 @@ import instructor
from openai import OpenAI
from pydantic import BaseModel


class UserInfo(BaseModel):
name: str
age: int


# Initialize the OpenAI client with Instructor
client = instructor.from_openai(OpenAI())


# Define hook functions
def log_kwargs(**kwargs):
print(f"Function called with kwargs: {kwargs}")


def log_exception(exception: Exception):
print(f"An exception occurred: {str(exception)}")


client.on("completion:kwargs", log_kwargs)
client.on("completion:error", log_exception)

user_info = client.chat.completions.create(
model="gpt-4o-mini",
response_model=UserInfo,
messages=[{"role": "user", "content": "Extract the user name: 'John is 20 years old'"}],
messages=[
{"role": "user", "content": "Extract the user name: 'John is 20 years old'"}
],
)

"""
Expand Down
4 changes: 2 additions & 2 deletions docs/blog/posts/announcing-gemini-tool-calling-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class User(BaseModel):

client = instructor.from_gemini(
client=genai.GenerativeModel(
model_name="models/gemini-1.5-flash-latest", # (1)!
model_name="models/gemini-1.5-flash-latest", # (1)!
)
)

Expand Down Expand Up @@ -105,7 +105,7 @@ class User(BaseModel):


client = instructor.from_vertexai(
client=GenerativeModel("gemini-1.5-pro-preview-0409"), # (1)!
client=GenerativeModel("gemini-1.5-pro-preview-0409"), # (1)!
)


Expand Down
12 changes: 6 additions & 6 deletions docs/blog/posts/anthropic-prompt-caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ Let's first initialize our Anthropic client, this will be the same as what we've
```python
from instructor import Instructor, Mode, patch
from anthropic import Anthropic
from pydantic import BaseModel


client = Instructor(
Expand All @@ -203,9 +202,10 @@ client = Instructor(
We'll then create a new `Character` class that will be used to extract out a single character from the text and read in our source text ( roughly 2856 tokens using the Anthropic tokenizer).

```python
with open("./book.txt", "r") as f:
with open("./book.txt") as f:
book = f.read()


class Character(BaseModel):
name: str
description: str
Expand All @@ -215,7 +215,7 @@ Once we've done this, we can then make an api call to get the description of the

```python
for _ in range(2):
resp, completion = client.chat.completions.create_with_completion( # (1)!
resp, completion = client.chat.completions.create_with_completion( # (1)!
model="claude-3-haiku-20240307",
messages=[
{
Expand All @@ -224,7 +224,7 @@ for _ in range(2):
{
"type": "text",
"text": "<book>" + book + "</book>",
"cache_control": {"type": "ephemeral"}, # (2)!
"cache_control": {"type": "ephemeral"}, # (2)!
},
{
"type": "text",
Expand All @@ -238,7 +238,7 @@ for _ in range(2):
)
assert isinstance(resp, Character)

print(completion.usage) # (3)!
print(completion.usage) # (3)!
print(resp)
```

Expand Down Expand Up @@ -307,7 +307,7 @@ class Character(BaseModel):
description: str


with open("./book.txt", "r") as f:
with open("./book.txt") as f:
book = f.read()

for _ in range(2):
Expand Down
57 changes: 27 additions & 30 deletions docs/blog/posts/bad-schemas-could-break-llms.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ from datasets import load_dataset, Dataset, DatasetDict

splits = ["test", "train"]


def generate_gsm8k(split):
ds = load_dataset("gsm8k", "main", split=split, streaming=True)
for row in ds:
Expand All @@ -60,6 +61,7 @@ def generate_gsm8k(split):
"reasoning": reasoning,
}


# Create the dataset for train and test splits
train_dataset = Dataset.from_generator(lambda: generate_gsm8k("train"))
test_dataset = Dataset.from_generator(lambda: generate_gsm8k("test"))
Expand Down Expand Up @@ -143,6 +145,7 @@ class Answer(BaseModel):
chain_of_thought: str
answer: int


class OnlyAnswer(BaseModel):
answer: int
```
Expand Down Expand Up @@ -214,22 +217,26 @@ class Answer(BaseModel):
chain_of_thought: str
answer: int


class AnswerWithCalculation(BaseModel):
chain_of_thought: str
required_calculations: list[str]
answer: int


class AssumptionBasedAnswer(BaseModel):
assumptions: list[str]
logic_flow: str
answer: int


class ErrorAwareCalculation(BaseModel):
key_steps: list[str]
potential_pitfalls: list[str]
intermediate_results: list[str]
answer: int


class AnswerWithNecessaryCalculationAndFinalChoice(BaseModel):
chain_of_thought: str
necessary_calculations: list[str]
Expand Down Expand Up @@ -279,43 +286,33 @@ In fact, the only thing that changed was the last two parameters. Upon closer in

```python
{
"chain_of_thought": "In the race, there are a total of 240 Asians. Given that 80 were Japanese, we can calculate the number of Chinese participants by subtracting the number of Japanese from the total number of Asians: 240 - 80 = 160. Now, it is given that there are 60 boys on the Chinese team. Therefore, to find the number of girls on the Chinese team, we subtract the number of boys from the total number of Chinese participants: 160 - 60 = 100 girls. Thus, the number of girls on the Chinese team is 100.",
"necessary_calculations": [
"Total Asians = 240",
"Japanese participants = 80",
"Chinese participants = Total Asians - Japanese participants = 240 - 80 = 160",
"Boys in Chinese team = 60",
"Girls in Chinese team = Chinese participants - Boys in Chinese team = 160 - 60 = 100"
],
"potential_final_choices": [
"60",
"100",
"80",
"120"
],
"final_choice": 2
"chain_of_thought": "In the race, there are a total of 240 Asians. Given that 80 were Japanese, we can calculate the number of Chinese participants by subtracting the number of Japanese from the total number of Asians: 240 - 80 = 160. Now, it is given that there are 60 boys on the Chinese team. Therefore, to find the number of girls on the Chinese team, we subtract the number of boys from the total number of Chinese participants: 160 - 60 = 100 girls. Thus, the number of girls on the Chinese team is 100.",
"necessary_calculations": [
"Total Asians = 240",
"Japanese participants = 80",
"Chinese participants = Total Asians - Japanese participants = 240 - 80 = 160",
"Boys in Chinese team = 60",
"Girls in Chinese team = Chinese participants - Boys in Chinese team = 160 - 60 = 100",
],
"potential_final_choices": ["60", "100", "80", "120"],
"final_choice": 2,
}
```

This meant that instead of the final answer of 100, our model was generating potential responses it could give and returning the final choice as the index of that answer. Simply renaming our response model here to `potential_final_answers` and `final_answer` resulted in the original result of `95%` again.

```python
{
"chain_of_thought": "First, we need to determine how many Asians were Chinese. Since there were 240 Asians in total and 80 of them were Japanese, we can find the number of Chinese by subtracting the number of Japanese from the total: 240 - 80 = 160. Now, we know that there are 160 Chinese participants. Given that there were 60 boys on the Chinese team, we can find the number of girls by subtracting the number of boys from the total number of Chinese: 160 - 60 = 100. Therefore, there are 100 girls on the Chinese team.",
"necessary_calculations": [
"Total Asians = 240",
"Number of Japanese = 80",
"Number of Chinese = 240 - 80 = 160",
"Number of boys on Chinese team = 60",
"Number of girls on Chinese team = 160 - 60 = 100"
],
"potential_final_answers": [
"100",
"60",
"80",
"40"
],
"answer": 100
"chain_of_thought": "First, we need to determine how many Asians were Chinese. Since there were 240 Asians in total and 80 of them were Japanese, we can find the number of Chinese by subtracting the number of Japanese from the total: 240 - 80 = 160. Now, we know that there are 160 Chinese participants. Given that there were 60 boys on the Chinese team, we can find the number of girls by subtracting the number of boys from the total number of Chinese: 160 - 60 = 100. Therefore, there are 100 girls on the Chinese team.",
"necessary_calculations": [
"Total Asians = 240",
"Number of Japanese = 80",
"Number of Chinese = 240 - 80 = 160",
"Number of boys on Chinese team = 60",
"Number of girls on Chinese team = 160 - 60 = 100",
],
"potential_final_answers": ["100", "60", "80", "40"],
"answer": 100,
}
```

Expand Down
12 changes: 7 additions & 5 deletions docs/blog/posts/best_framework.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,27 @@ Here's an example of extracting structured user data from an LLM:
from pydantic import BaseModel
import instructor


class User(BaseModel):
name: str
age: int


client = instructor.from_openai(openai.OpenAI())

user = client.chat.completions.create(
model="gpt-3.5-turbo",
response_model=User, # (1)!
response_model=User, # (1)!
messages=[
{
"role": "user",
"content": "Extract the user's name and age from this: John is 25 years old"
"content": "Extract the user's name and age from this: John is 25 years old",
}
]
],
)

print(user) # (2)!
# > User(name='John', age=25)
print(user) # (2)!
#> User(name='John', age=25)
```

1. Notice that now we have a new response_model parameter that we pass in to the completions.create method. This parameter lets us specify the structure we want the LLM output to be mapped to. In this case, we're using a Pydantic model called User that describes a user's name and age.
Expand Down
3 changes: 3 additions & 0 deletions docs/blog/posts/caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,18 @@ print(f"Time taken: {time.perf_counter() - start}")
def decorator(func):
def wrapper(*args, **kwargs):
print("Do something before") # (1)
#> Do something before
result = func(*args, **kwargs)
print("Do something after") # (2)
#> Do something after
return result

return wrapper


@decorator
def say_hello():
#> Hello!
print("Hello!")


Expand Down
2 changes: 2 additions & 0 deletions docs/blog/posts/chat-with-your-pdf-with-gemini.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ client = instructor.from_gemini(
)
)


# Define your output structure
class Summary(BaseModel):
summary: str


# Upload the PDF
file = genai.upload_file("path/to/your.pdf")

Expand Down
20 changes: 10 additions & 10 deletions docs/blog/posts/distilation-part1.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,16 @@ for _ in range(10):
a = random.randint(100, 999)
b = random.randint(100, 999)
print(fn(a, b))
#> a=873 b=234 result=204282
#> a=902 b=203 result=183106
#> a=962 b=284 result=273208
#> a=491 b=739 result=362849
#> a=193 b=400 result=77200
#> a=300 b=448 result=134400
#> a=952 b=528 result=502656
#> a=574 b=797 result=457478
#> a=482 b=204 result=98328
#> a=781 b=278 result=217118
#> a=444 b=204 result=90576
#> a=194 b=489 result=94866
#> a=199 b=467 result=92933
#> a=967 b=452 result=437084
#> a=718 b=370 result=265660
#> a=926 b=144 result=133344
#> a=847 b=570 result=482790
#> a=649 b=227 result=147323
#> a=487 b=180 result=87660
#> a=665 b=400 result=266000
```

## The Intricacies of Fine-tuning Language Models
Expand Down
Loading
Loading