Skip to content

Commit

Permalink
Merge pull request #22 from edgararuiz/updates
Browse files Browse the repository at this point in the history
Adds examples to all methods, improves site output
  • Loading branch information
edgararuiz authored Oct 9, 2024
2 parents d96913e + 096bc55 commit 3d52990
Show file tree
Hide file tree
Showing 7 changed files with 274 additions and 28 deletions.
4 changes: 2 additions & 2 deletions _freeze/index/execute-results/html.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions _freeze/reference/MallFrame/execute-results/html.json

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,16 @@ reviews
## Python

```{python}
#| eval: true
import mall
#| include: false
import polars as pl
pl.Config(fmt_str_lengths=100)
pl.Config.set_tbl_hide_dataframe_shape(True)
pl.Config.set_tbl_hide_column_data_types(True)
```


```{python}
import mall
data = mall.MallData
reviews = data.reviews
Expand Down
142 changes: 134 additions & 8 deletions python/mall/polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@
class MallFrame:
"""Extension to Polars that add ability to use
an LLM to run batch predictions over a data frame
Loads the neede libraries, and sets up the review
data frame that will be used in the examples below:
```{python}
#| output: false
import mall
import polars as pl
pl.Config(fmt_str_lengths=100)
pl.Config.set_tbl_hide_dataframe_shape(True)
pl.Config.set_tbl_hide_column_data_types(True)
data = mall.MallData
reviews = data.reviews
reviews.llm.use(options = dict(seed = 100))
```
"""

def __init__(self, df: pl.DataFrame) -> None:
Expand All @@ -32,6 +47,32 @@ def use(self, backend="", model="", _cache="_mall_cache", **kwargs):
**kwargs
Arguments to pass to the downstream Python call. In this case, the
`chat` function in `ollama`
Examples
------
```{python}
# Additional arguments will be passed 'as-is' to the
# downstream R function in this example, to ollama::chat()
reviews.llm.use("ollama", "llama3.2", seed = 100, temp = 0.1)
```
```{python}
# During the Python session, you can change any argument
# individually and it will retain all of previous
# arguments used
reviews.llm.use(temp = 0.3)
```
```{python}
# Use _cache to modify the target folder for caching
reviews.llm.use(_cache = "_my_cache")
```
```{python}
# Leave _cache empty to turn off this functionality
reviews.llm.use(_cache = "")
```
"""
if backend != "":
self._use.update(dict(backend=backend))
Expand Down Expand Up @@ -71,14 +112,24 @@ def sentiment(
------
```{python}
import mall
import polars as pl
pl.Config(fmt_str_lengths=100)
data = mall.MallData
reviews = data.reviews
reviews.llm.use(options = dict(seed = 100), _cache = "_readme_cache")
reviews.llm.sentiment("review")
```
```{python}
# Use 'pred_name' to customize the new column's name
reviews.llm.sentiment("review", pred_name="review_sentiment")
```
```{python}
# Pass custom sentiment options
reviews.llm.sentiment("review", ["positive", "negative"])
```
```{python}
# Use a DICT object to specify values to return per sentiment
reviews.llm.sentiment("review", {"positive" : "1", "negative" : "0"})
```
"""
df = map_call(
df=self._df,
Expand All @@ -97,7 +148,7 @@ def summarize(
additional="",
pred_name="summary",
) -> list[pl.DataFrame]:
"""Summarise the text down to a specific number of words.
"""Summarize the text down to a specific number of words.
Parameters
------
Expand All @@ -114,6 +165,18 @@ def summarize(
additional : str
Inserts this text into the prompt sent to the LLM
Examples
------
```{python}
# Use max_words to set the maximum number of words to use for the summary
reviews.llm.summarize("review", max_words = 5)
```
```{python}
# Use 'pred_name' to customize the new column's name
reviews.llm.summarize("review", 5, pred_name = "review_summary")
```
"""
df = map_call(
df=self._df,
Expand Down Expand Up @@ -147,6 +210,19 @@ def translate(
additional : str
Inserts this text into the prompt sent to the LLM
Examples
------
```{python}
reviews.llm.translate("review", "spanish")
```
```{python}
reviews.llm.translate("review", "french")
```
"""
df = map_call(
df=self._df,
Expand Down Expand Up @@ -182,6 +258,23 @@ def classify(
additional : str
Inserts this text into the prompt sent to the LLM
Examples
------
```{python}
reviews.llm.classify("review", ["appliance", "computer"])
```
```{python}
# Use 'pred_name' to customize the new column's name
reviews.llm.classify("review", ["appliance", "computer"], pred_name="prod_type")
```
```{python}
#Pass a DICT to set custom values for each classification
reviews.llm.classify("review", {"appliance" : "1", "computer" : "2"})
```
"""
df = map_call(
df=self._df,
Expand Down Expand Up @@ -217,14 +310,34 @@ def extract(
additional : str
Inserts this text into the prompt sent to the LLM
Examples
------
```{python}
# Use 'labels' to let the function know what to extract
reviews.llm.extract("review", labels = "product")
```
```{python}
# Use 'pred_name' to customize the new column's name
reviews.llm.extract("review", "product", pred_name = "prod")
```
```{python}
# Pass a vector to request multiple things, the results will be pipe delimeted
# in a single column
reviews.llm.extract("review", ["product", "feelings"])
```
"""
# TODO: Support for expand_cols
df = map_call(
df=self._df,
col=col,
msg=extract(labels, additional=additional),
pred_name=pred_name,
use=self._use,
valid_resps=labels,
)
return df

Expand All @@ -248,6 +361,19 @@ def custom(
pred_name : str
A character vector with the name of the new column where the
prediction will be placed
Examples
------
```{python}
my_prompt = "Answer a question." \
+ "Return only the answer, no explanation" \
+ "Acceptable answers are 'yes', 'no'" \
+ "Answer this about the following text, is this a happy customer?:"
reviews.llm.custom("review", prompt = my_prompt)
```
"""
df = map_call(
df=self._df,
Expand Down
4 changes: 1 addition & 3 deletions python/mall/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ def extract(labels, additional=""):
if isinstance(labels, list):
no_labels = len(labels)
plural = "s"
text_multi = (
"Return the response in a simple list, pipe separated, and no headers. "
)
text_multi = "Return the response exclusively in a pipe separated list, and no headers. "
for label in labels:
col_labels += label + " "
col_labels = col_labels.rstrip()
Expand Down
Loading

0 comments on commit 3d52990

Please sign in to comment.