From 6c1d8fc7ae3976fc414f42663da281ca876a9107 Mon Sep 17 00:00:00 2001 From: hadley Date: Fri, 17 Jan 2025 17:34:57 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20tidyvers?= =?UTF-8?q?e/ellmer@421a351be174f6284a961cfc31f07286143ec273=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dev/articles/ellmer.html | 14 +-- dev/articles/prompt-design.html | 135 ++++++++++++++------------- dev/articles/structured-data.html | 66 +++++++------ dev/pkgdown.yml | 2 +- dev/reference/Chat.html | 2 +- dev/reference/chat_claude.html | 26 +++--- dev/reference/chat_openai.html | 101 ++++++++++---------- dev/reference/content_image_url.html | 28 +++--- dev/reference/token_usage.html | 4 +- dev/reference/tool.html | 6 +- dev/search.json | 2 +- 11 files changed, 196 insertions(+), 190 deletions(-) diff --git a/dev/articles/ellmer.html b/dev/articles/ellmer.html index 6e2519b..f989746 100644 --- a/dev/articles/ellmer.html +++ b/dev/articles/ellmer.html @@ -160,18 +160,20 @@

What is a token?chat <- chat_openai(model = "gpt-4o") . <- chat$chat("Who created R?", echo = FALSE) chat -#> <Chat turns=2 tokens=11/52> +#> <Chat turns=2 tokens=11/74> #> ── user ────────────────────────────────────────────────────────────────── #> Who created R? #> ── assistant ───────────────────────────────────────────────────────────── -#> R was created by statisticians Ross Ihaka and Robert Gentleman in the -#> mid-1990s. They were both working at the University of Auckland in New -#> Zealand when they developed the R programming language, which is now -#> widely used for statistical computing and graphics. +#> R was created by Ross Ihaka and Robert Gentleman in the early 1990s. They +#> were both statisticians at the University of Auckland, New Zealand. R was +#> developed as a free and open-source programming language and software +#> environment for statistical computing and graphics. It has since grown to +#> be widely used in data analysis, statistical modeling, and various other +#> scientific applications. token_usage() #> name input output -#> 1 OpenAI-api.openai.com/v1 11 52 +#> 1 OpenAI-api.openai.com/v1 11 74

If you want to learn more about tokens and tokenizers, I’d recommend watching the first 20-30 minutes of Let’s build the GPT Tokenizer by Andrej Karpathy. You certainly don’t need to learn how diff --git a/dev/articles/prompt-design.html b/dev/articles/prompt-design.html index 001ae18..d80ca6d 100644 --- a/dev/articles/prompt-design.html +++ b/dev/articles/prompt-design.html @@ -215,8 +215,8 @@

Basic flavour#> df %>% #> group_by(age, sex) %>% #> summarise(across( -#> a:z, -#> list( +#> .cols = a:z, +#> .fns = list( #> mean = ~mean(., na.rm = TRUE), #> median = ~median(., na.rm = TRUE) #> ) @@ -226,11 +226,11 @@

Basic flavour#> #> This code will: #> 1. Group the data by age and sex -#> 2. Use `across()` to apply both mean and median functions to all -#> variables from 'a' to 'z' +#> 2. Use `across()` to apply both mean and median functions to all columns +#> from 'a' to 'z' #> 3. Handle missing values with `na.rm = TRUE` -#> 4. Create new column names in the format `variable_mean` and -#> `variable_median` +#> 4. The result will have columns named like `a_mean`, `a_median`, +#> `b_mean`, `b_median`, etc. #> #> If you want to make the output more tidy (long format), you could add: #> @@ -238,8 +238,8 @@

Basic flavour#> df %>% #> group_by(age, sex) %>% #> summarise(across( -#> a:z, -#> list( +#> .cols = a:z, +#> .fns = list( #> mean = ~mean(., na.rm = TRUE), #> median = ~median(., na.rm = TRUE) #> ) @@ -464,46 +464,46 @@

Getting started#> { #> "ingredients": [ #> { -#> "name": "dark brown sugar", -#> "amount": "¾ cup", -#> "weight": "150g" +#> "quantity": "¾ cup", +#> "metric_quantity": "150g", +#> "item": "dark brown sugar" #> }, #> { -#> "name": "eggs", -#> "amount": "2 large" +#> "quantity": "2", +#> "item": "large eggs" #> }, #> { -#> "name": "sour cream", -#> "amount": "¾ cup", -#> "weight": "165g" +#> "quantity": "¾ cup", +#> "metric_quantity": "165g", +#> "item": "sour cream" #> }, #> { -#> "name": "unsalted butter", -#> "amount": "½ cup", -#> "weight": "113g", -#> "notes": "melted" +#> "quantity": "½ cup", +#> "metric_quantity": "113g", +#> "item": "unsalted butter", +#> "state": "melted" #> }, #> { -#> "name": "vanilla extract", -#> "amount": "1 teaspoon" +#> "quantity": "1 teaspoon", +#> "item": "vanilla extract" #> }, #> { -#> "name": "kosher salt", -#> "amount": "¾ teaspoon" +#> "quantity": "¾ teaspoon", +#> "item": "kosher salt" #> }, #> { -#> "name": "neutral oil", -#> "amount": "⅓ cup", -#> "volume": "80ml" +#> "quantity": "⅓ cup", +#> "metric_quantity": "80ml", +#> "item": "neutral oil" #> }, #> { -#> "name": "all-purpose flour", -#> "amount": "1½ cups", -#> "weight": "190g" +#> "quantity": "1½ cups", +#> "metric_quantity": "190g", +#> "item": "all-purpose flour" #> }, #> { -#> "name": "sugar", -#> "weight": "150g plus 1½ teaspoons" +#> "quantity": "150g plus 1½ teaspoons", +#> "item": "sugar" #> } #> ] #> } @@ -539,7 +539,7 @@

Provide examples#> ```json #> [ #> {"name": "dark brown sugar", "quantity": 150, "unit": "g"}, -#> {"name": "large eggs", "quantity": 2, "unit": null}, +#> {"name": "large eggs", "quantity": 2, "unit": "count"}, #> {"name": "sour cream", "quantity": 165, "unit": "g"}, #> {"name": "unsalted butter, melted", "quantity": 113, "unit": "g"}, #> {"name": "vanilla extract", "quantity": 1, "unit": "teaspoon"}, @@ -637,34 +637,37 @@

Structured data#> Using model = "gpt-4o". data <- chat$extract_data(ingredients, type = type_object(ingredients = type_ingredients)) do.call(rbind, lapply(data$ingredients, as.data.frame)) -#> X[[i]] -#> name.1 dark brown sugar -#> name.2 large eggs -#> name.3 sour cream -#> name.4 unsalted butter, melted -#> name.5 vanilla extract -#> name.6 kosher salt -#> name.7 neutral oil -#> name.8 all-purpose flour -#> name.9 sugar -#> quantity.1 150 -#> quantity.2 2 -#> quantity.3 165 -#> quantity.4 113 -#> quantity.5 1 -#> quantity.6 0.75 -#> quantity.7 80 -#> quantity.8 190 -#> quantity.9 150 -#> unit.1 g -#> unit.2 count -#> unit.3 g -#> unit.4 g -#> unit.5 teaspoon -#> unit.6 teaspoon -#> unit.7 ml -#> unit.8 g -#> unit.9 g +#> X[[i]] +#> name.1 dark brown sugar +#> name.2 large eggs +#> name.3 sour cream +#> name.4 unsalted butter, melted +#> name.5 vanilla extract +#> name.6 kosher salt +#> name.7 neutral oil +#> name.8 all-purpose flour +#> name.9 sugar +#> name.10 sugar +#> quantity.1 150 +#> quantity.2 2 +#> quantity.3 165 +#> quantity.4 113 +#> quantity.5 1 +#> quantity.6 0.75 +#> quantity.7 80 +#> quantity.8 190 +#> quantity.9 150 +#> quantity.10 1.5 +#> unit.1 g +#> unit.2 units +#> unit.3 g +#> unit.4 g +#> unit.5 teaspoon +#> unit.6 teaspoon +#> unit.7 ml +#> unit.8 g +#> unit.9 g +#> unit.10 teaspoon +#> [1] "blue and gray"

Data types basics @@ -232,19 +232,15 @@

Example 1: Article summarisation#> Using model = "gpt-4o". data <- chat$extract_data(text, type = type_summary) cat(data$summary) -#> In this insightful article, the AI Policy Team at Anthropic argues for the necessity of third-party testing as a central component of AI policy to mitigate societal harm. The deployment of powerful generative AI systems requires a robust testing framework to validate their safety and responsible use. This notion is underscored by the potential risks these AI systems pose, such as election integrity threats, harmful discrimination, and national security issues. -#> -#> The article promotes the idea of a third-party oversight regime focused on understanding AI model behaviors through effective tests and the participation of trusted third-party administrators. The piece outlines how such a system could facilitate collaboration between industry, government, and academia while avoiding the pitfalls of a regulatory environment that overly burdens small AI innovators. -#> -#> It emphasizes the importance of prototype testing regimes and incremental developments in creating the infrastructure needed for effective oversight. National security is cited as a critical domain for initial third-party testing efforts, with opportunities for further collaborations between private companies, universities, and governments in ensuring AI safety and promoting responsible system deployment. +#> The article advocates for the implementation of a third-party testing regime for frontier AI systems as a means to ensure their safety and mitigate risks of misuse or accidents. In response to the challenges posed by large-scale generative AI models, the article suggests a collaborative framework involving industry, government, and academia to develop effective testing standards. It emphasizes the necessity of such a regime to build trust in AI systems, prevent potential abuses, and create a robust infrastructure for future AI developments. The proposal includes a two-stage testing procedure, with fast, automated initial tests followed by more thorough secondary evaluations if necessary, supported by adequate government resources. The accompanied regulation aims to balance safety assurance with administrative feasibility, addressing national security concerns, and mitigating regulatory capture risks. Anthropic pledges to prototype and advocate for these testing systems, viewing them as essential components of broader AI policy objectives. The article also addresses the balance between open dissemination of models and safety, framing third-party testing as a safeguard against potential negative implications of AI technology. str(data) #> List of 5 -#> $ author : chr "AI Policy Team at Anthropic" -#> $ topics : chr [1:4] "AI Policy" "AI Safety" "Technology" "Regulation" -#> $ summary : chr "In this insightful article, the AI Policy Team at Anthropic argues for the necessity of third-party testing as "| __truncated__ -#> $ coherence : int 90 -#> $ persuasion: num 0.85

+#> $ author : chr "Anonymous" +#> $ topics : chr [1:5] "AI policy" "AI testing" "regulation" "third-party testing" ... +#> $ summary : chr "The article advocates for the implementation of a third-party testing regime for frontier AI systems as a means"| __truncated__ +#> $ coherence : int 85 +#> $ persuasion: num 0.7

Example 2: Named entity recognition @@ -270,15 +266,17 @@

Example 2: Named entity recognition< #> 2 Google organization #> 3 New York location #> 4 Sarah person -#> 5 Acme Inc. organization -#> 6 San Francisco location -#> context -#> 1 Works at Google in New York and met with Sarah in San Francisco. -#> 2 An organization where John works. -#> 3 The city where John works at Google. -#> 4 Met with John in San Francisco and is the CEO of Acme Inc. -#> 5 An organization where Sarah is the CEO. -#> 6 The city where John met Sarah last week.

+#> 5 CEO person +#> 6 Acme Inc. organization +#> 7 San Francisco location +#> context +#> 1 John is mentioned as working at Google in New York. +#> 2 Google is the company where John works. +#> 3 New York is the location where Google and John are situated. +#> 4 Sarah is introduced as the CEO of Acme Inc., who John met with last week. +#> 5 Sarah holds the position of CEO at Acme Inc. +#> 6 Acme Inc. is the company of which Sarah is the CEO. +#> 7 San Francisco is the place where John met with Sarah last week.

Example 3: Sentiment analysis @@ -301,8 +299,8 @@

Example 3: Sentiment analysisstr(chat$extract_data(text, type = type_sentiment)) #> List of 3 #> $ positive_score: num 0.1 -#> $ negative_score: num 0.7 -#> $ neutral_score : num 0.2

+#> $ negative_score: num 0.6 +#> $ neutral_score : num 0.3

Note that we’ve asked nicely for the scores to sum 1, and they do in this example (at least when I ran the code), but it’s not guaranteed.

@@ -338,8 +336,8 @@

Example 4: Text classificationdata <- chat$extract_data(text, type = type_classification) data #> name score -#> 1 Technology 0.85 -#> 2 Business 0.10 +#> 1 Technology 0.80 +#> 2 Business 0.15 #> 3 Other 0.05
@@ -395,15 +393,15 @@

Example 6: Extracting data from image <- content_image_file("congressional-assets.png") data <- chat$extract_data(image, type = type_assets) data -#> assert_name owner -#> 1 11 Zinfandel Lane - Home & Vineyard [RP] JT -#> 2 25 Point Lobos - Commercial Property [RP] SP +#> assert_name owner +#> 1 11 Zinfandel Lane - Home & Vineyard JT +#> 2 25 Point Lobos - Commercial Property SP #> location asset_value_low asset_value_high #> 1 St. Helena/Napa, CA, US 5000001 25000000 #> 2 San Francisco/San Francisco, CA, US 5000001 25000000 #> income_type income_low income_high tx_gt_1000 #> 1 Grape Sales 100001 1000000 TRUE -#> 2 Rent 100001 1000000 TRUE

+#> 2 Rent 100001 1000000 FALSE
@@ -449,10 +447,10 @@

Required vs optional#> [1] "Hadley Wickham" #> #> $date -#> [1] "2023-10-02" +#> [1] "2023-10-04" str(data) #> 'data.frame': 2 obs. of 9 variables: -#> $ assert_name : chr "11 Zinfandel Lane - Home & Vineyard [RP]" "25 Point Lobos - Commercial Property [RP]" +#> $ assert_name : chr "11 Zinfandel Lane - Home & Vineyard" "25 Point Lobos - Commercial Property" #> $ owner : chr "JT" "SP" #> $ location : chr "St. Helena/Napa, CA, US" "San Francisco/San Francisco, CA, US" #> $ asset_value_low : int 5000001 5000001 @@ -460,7 +458,7 @@

Required vs optional#> $ income_type : chr "Grape Sales" "Rent" #> $ income_low : int 100001 100001 #> $ income_high : int 1000000 1000000 -#> $ tx_gt_1000 : logi TRUE TRUE

+#> $ tx_gt_1000 : logi TRUE FALSE

Note that I’ve used more of an explict prompt here. For this example, I found that this generated better results, and it’s a useful place to put additional instructions.

@@ -528,12 +526,12 @@

Token usage OpenAI-api.openai.com/v1 6379 -671 +701 Claude 480 -136 +145 diff --git a/dev/pkgdown.yml b/dev/pkgdown.yml index ae4235f..1fc466b 100644 --- a/dev/pkgdown.yml +++ b/dev/pkgdown.yml @@ -7,7 +7,7 @@ articles: streaming-async: streaming-async.html structured-data: structured-data.html tool-calling: tool-calling.html -last_built: 2025-01-15T17:43Z +last_built: 2025-01-17T17:33Z urls: reference: https://ellmer.tidyverse.org/reference article: https://ellmer.tidyverse.org/articles diff --git a/dev/reference/Chat.html b/dev/reference/Chat.html index 9393363..3347c66 100644 --- a/dev/reference/Chat.html +++ b/dev/reference/Chat.html @@ -407,7 +407,7 @@

Examples
chat <- chat_openai(echo = TRUE)
 #> Using model = "gpt-4o".
 chat$chat("Tell me a funny joke")
-#> Sure, here's one for you:
+#> Sure! Here's one for you: 
 #> 
 #> Why don't skeletons fight each other?
 #> 
diff --git a/dev/reference/chat_claude.html b/dev/reference/chat_claude.html
index 39c99aa..534df50 100644
--- a/dev/reference/chat_claude.html
+++ b/dev/reference/chat_claude.html
@@ -151,22 +151,22 @@ 

See alsoExamples

chat <- chat_claude()
 chat$chat("Tell me three jokes about statisticians")
-#> Here are three statistics-themed jokes:
+#> Here are three statistics-related jokes:
 #> 
-#> 1. How do you tell the difference between an extroverted statistician and
-#> an introverted statistician?
-#> The extroverted statistician looks at YOUR shoes when they're talking to 
-#> you.
+#> 1. How do you tell the difference between an introverted statistician and
+#> an extroverted statistician?
+#> The extroverted statistician looks at *your* shoes when they're talking 
+#> to you.
 #> 
-#> 2. A statistician's wife had twins. He was delighted. He went to the 
-#> minister and said, "We're so happy - the mean of our children has 
-#> doubled!"
-#> The minister replied, "But variance has increased too!"
+#> 2. A statistician returns home late one night and finds their partner 
+#> angry. "Where have you been? You said you'd be home by 7!" 
+#> The statistician replies, "I said I'd be home by 7 on average. Some days 
+#> I'll be home by 5, others by 9. It all evens out!"
 #> 
-#> 3. Three statisticians go hunting. They spot a deer. The first one shoots
-#> and misses 1 meter to the left. The second one shoots and misses 1 meter 
-#> to the right. The third one shouts "We got it!" (Because on average, they
-#> hit it.)
+#> 3. Three statisticians go hunting. They spot a deer, and the first one 
+#> shoots, missing it by a meter to the left. The second one shoots and 
+#> misses by a meter to the right. The third statistician jumps up and 
+#> shouts "We got it! We got it! On average, we hit it!"