From 041f83bc0a611536b4ea9e79a7716cd3e859b3c3 Mon Sep 17 00:00:00 2001
From: Jason Liu <jxnl@users.noreply.github.com>
Date: Tue, 5 Sep 2023 21:13:59 -0500
Subject: [PATCH] Usage CLI (#93)

* adding usage cli

* updat
---
 docs/{ => cli}/finetune.md |  33 +++++----
 docs/cli/usage.md          |  57 +++++++++++++++
 instructor/cli/cli.py      |   2 +
 instructor/cli/usage.py    | 145 +++++++++++++++++++++++++++++++++++++
 mkdocs.yml                 |   3 +-
 5 files changed, 224 insertions(+), 16 deletions(-)
 rename docs/{ => cli}/finetune.md (67%)
 create mode 100644 docs/cli/usage.md
 create mode 100644 instructor/cli/usage.py

diff --git a/docs/finetune.md b/docs/cli/finetune.md
similarity index 67%
rename from docs/finetune.md
rename to docs/cli/finetune.md
index 1c85ceacc..20190a6bb 100644
--- a/docs/finetune.md
+++ b/docs/cli/finetune.md
@@ -6,20 +6,22 @@ The instructor CLI provides functionalities for managing fine-tuning jobs on Ope
 ### View Jobs Options
 
 ```sh
-$ instructor jobs --help          
+$ instructor jobs --help 
+                                                                                                               
+ Usage: instructor jobs [OPTIONS] COMMAND [ARGS]...                                                            
+                                                                                                               
+ Monitor and create fine tuning jobs                                                                           
+                                                                                                               
+╭─ Options ───────────────────────────────────────────────────────────────────────────────╮
+│ --help          Show this message and exit.                                             │
+╰─────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ──────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ cancel                    Cancel a fine-tuning job.                                                         │
+│ create-from-file          Create a fine-tuning job from a file.                                             │
+│ create-from-id            Create a fine-tuning job from an existing ID.                                     │
+│ list                      Monitor the status of the most recent fine-tuning jobs.                           │
+╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
 
-Usage: instructor jobs [OPTIONS] COMMAND [ARGS]...
-
-  Monitor and create fine tuning jobs
-
-Options:
-  --help  Show this message and exit.
-
-Commands:
-  cancel            Cancel a fine-tuning job.
-  create-from-file  Create a fine-tuning job from a file.
-  create-from-id    Create a fine-tuning job from an existing ID.
-  list              Monitor the status of the most recent fine-tuning jobs.
 ```
 
 ### Create from File
@@ -84,6 +86,7 @@ OpenAI Files
 └───────────────────────────────┴──────────────┴─────────────────────┴──────────┴───────────┘   
 ```
 
+# Contributions 
+
+We aim to provide a light wrapper around the API rather than offering a complete CLI. Contributions are welcome! Please feel free to make an issue at [jxnl/instructor/issues](https://github.com/jxnl/instructor/issues) or submit a pull request.
 
-## Conclusion
-The instructor CLI offers an intuitive interface for managing OpenAI's fine-tuning jobs and related files. By utilizing simple commands, you can create, monitor, and manage your fine-tuning tasks with ease. Feel free to explore further options and parameters by using the --help flag with any command.
\ No newline at end of file
diff --git a/docs/cli/usage.md b/docs/cli/usage.md
new file mode 100644
index 000000000..5d6725490
--- /dev/null
+++ b/docs/cli/usage.md
@@ -0,0 +1,57 @@
+# Using the OpenAI API Usage CLI
+
+The OpenAI API Usage CLI tool provides functionalities for monitoring your OpenAI API usage, breaking it down by model, date, and cost.
+
+## Monitoring API Usage
+
+### View Usage Options
+
+```sh
+$ instructor usage --help
+
+ Usage: instructor usage [OPTIONS] COMMAND [ARGS]...                                                           
+
+ Check OpenAI API usage data                                                                                   
+
+╭─ Options ───────────────────────────────────────────────────────╮
+│ --help          Show this message and exit.                     │
+╰─────────────────────────────────────────────────────────────────╯
+╭─ Commands ──────────────────────────────────────────────────────╮
+│ list       Displays OpenAI API usage data for the past N days.  │
+╰─────────────────────────────────────────────────────────────────╯
+```
+
+### List Usage for Specific Number of Days
+
+To display API usage for the past 3 days, use the following command:
+
+```sh
+$ instructor usage list -n 3
+```
+
+This will output a table similar to:
+
+```plaintext
+                 Usage Summary by Date, Snapshot, and Cost
+┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
+┃ Date       ┃ Snapshot ID               ┃ Total Requests ┃ Total Cost ($) ┃
+┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
+│ 2023-09-04 │ gpt-4-0613                │             44 │           0.68 │
+│ 2023-09-04 │ gpt-3.5-turbo-16k-0613    │            195 │           0.84 │
+│ 2023-09-04 │ text-embedding-ada-002-v2 │            276 │           0.00 │
+│ 2023-09-04 │ gpt-4-32k-0613            │            328 │          49.45 │
+└────────────┴───────────────────────────┴────────────────┴────────────────┘
+```
+
+### List Usage for Today
+
+To display the API usage for today, simply run:
+
+```sh
+$ instructor usage list
+```
+
+# Contributions 
+
+We aim to provide a light wrapper around the API rather than offering a complete CLI. Contributions are welcome! Please feel free to make an issue at [jxnl/instructor/issues](https://github.com/jxnl/instructor/issues) or submit a pull request.
+
diff --git a/instructor/cli/cli.py b/instructor/cli/cli.py
index 64dd8fbad..46fd5a0df 100644
--- a/instructor/cli/cli.py
+++ b/instructor/cli/cli.py
@@ -1,6 +1,7 @@
 import typer
 import instructor.cli.jobs as jobs
 import instructor.cli.files as files
+import instructor.cli.usage as usage
 
 app = typer.Typer(
     name="instructor-ft",
@@ -9,3 +10,4 @@
 
 app.add_typer(jobs.app, name="jobs", help="Monitor and create fine tuning jobs")
 app.add_typer(files.app, name="files", help="Manage files on OpenAI's servers")
+app.add_typer(usage.app, name="usage", help="Check OpenAI API usage data")
diff --git a/instructor/cli/usage.py b/instructor/cli/usage.py
new file mode 100644
index 000000000..90808b074
--- /dev/null
+++ b/instructor/cli/usage.py
@@ -0,0 +1,145 @@
+from typing import List
+from datetime import datetime, timedelta
+import typer
+import os
+import aiohttp
+import asyncio
+from collections import defaultdict
+from rich.console import Console
+from rich.table import Table
+from rich.progress import Progress
+
+app = typer.Typer()
+console = Console()
+
+api_key = os.environ.get("OPENAI_API_KEY")
+
+
+async def fetch_usage(date: str) -> dict:
+    headers = {"Authorization": f"Bearer {api_key}"}
+    url = f"https://api.openai.com/v1/usage?date={date}"
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url, headers=headers) as resp:
+            return await resp.json()
+
+
+async def get_usage_for_past_n_days(n_days: int) -> List[dict]:
+    tasks = []
+    all_data = []
+    with Progress() as progress:
+        if n_days > 1:
+            task = progress.add_task("[green]Fetching usage data...", total=n_days)
+            for i in range(n_days):
+                date = (datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
+                tasks.append(fetch_usage(date))
+                progress.update(task, advance=1)
+        else:
+            tasks.append(fetch_usage(datetime.now().strftime("%Y-%m-%d")))
+
+        fetched_data = await asyncio.gather(*tasks)
+        for data in fetched_data:
+            all_data.extend(data.get("data", []))
+    return all_data
+
+
+from collections import defaultdict
+from datetime import datetime
+from typing import List
+from rich.table import Table
+
+# Define the cost per unit for each model
+MODEL_COSTS = {
+    "gpt-3.5-turbo": {"prompt": 0.0015 / 1000, "completion": 0.002 / 1000},
+    "gpt-3.5-turbo-16k": {"prompt": 0.003 / 1000, "completion": 0.004 / 1000},
+    "gpt-4": {"prompt": 0.03 / 1000, "completion": 0.03 / 1000},
+    "gpt-4-32k": {"prompt": 0.06 / 1000, "completion": 0.12 / 1000},
+    "text-embedding-ada-002": 0.0001 / 1000,
+    "text-embedding-ada-002-v2": 0.0001 / 1000,
+}
+
+
+def get_model_cost(model: str):
+    """Get the cost details for a given model."""
+    if model in MODEL_COSTS:
+        return MODEL_COSTS[model]
+
+    # Handle prefix-based matching
+    if model.startswith("gpt-3.5-turbo-16k"):
+        return MODEL_COSTS["gpt-3.5-turbo-16k"]
+    elif model.startswith("gpt-3.5-turbo"):
+        return MODEL_COSTS["gpt-3.5-turbo"]
+    elif model.startswith("gpt-4-32k"):
+        return MODEL_COSTS["gpt-4-32k"]
+    elif model.startswith("gpt-4"):
+        return MODEL_COSTS["gpt-4"]
+    else:
+        raise ValueError(f"Cost for model {model} not found")
+
+
+def calculate_cost(
+    snapshot_id: str, n_context_tokens: int, n_generated_tokens: int
+) -> float:
+    """Calculate the cost based on the snapshot ID and number of tokens."""
+    cost = get_model_cost(snapshot_id)
+
+    if isinstance(cost, float):
+        return cost * (n_context_tokens + n_generated_tokens)
+
+    prompt_cost = cost["prompt"] * n_context_tokens
+    completion_cost = cost["completion"] * n_generated_tokens
+    return prompt_cost + completion_cost
+
+
+def group_and_sum_by_date_and_snapshot(usage_data: List[dict]) -> Table:
+    """Group and sum the usage data by date and snapshot, including costs."""
+    summary = defaultdict(
+        lambda: defaultdict(
+            lambda: {"total_requests": 0, "total_tokens": 0, "total_cost": 0.0}
+        )
+    )
+
+    for usage in usage_data:
+        snapshot_id = usage["snapshot_id"]
+        date = datetime.fromtimestamp(usage["aggregation_timestamp"]).strftime(
+            "%Y-%m-%d"
+        )
+        summary[date][snapshot_id]["total_requests"] += usage["n_requests"]
+        summary[date][snapshot_id]["total_tokens"] += usage["n_generated_tokens_total"]
+
+        # Calculate and add the cost
+        cost = calculate_cost(
+            snapshot_id,
+            usage["n_context_tokens_total"],
+            usage["n_generated_tokens_total"],
+        )
+        summary[date][snapshot_id]["total_cost"] += cost
+
+    table = Table(title="Usage Summary by Date, Snapshot, and Cost")
+    table.add_column("Date", style="dim")
+    table.add_column("Model", style="dim")
+    table.add_column("Total Requests", justify="right")
+    table.add_column("Total Cost ($)", justify="right")
+
+    for date, snapshots in summary.items():
+        for snapshot_id, data in snapshots.items():
+            table.add_row(
+                date,
+                snapshot_id,
+                str(data["total_requests"]),
+                "{:.2f}".format(data["total_cost"]),
+            )
+
+    return table
+
+
+@app.command(help="Displays OpenAI API usage data for the past N days.")
+def list(
+    n: int = typer.Option(0, help="Number of days."),
+):
+    all_data = asyncio.run(get_usage_for_past_n_days(n))
+    table = group_and_sum_by_date_and_snapshot(all_data)
+    console.print(table)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/mkdocs.yml b/mkdocs.yml
index 803569683..ba6054f43 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -64,7 +64,8 @@ nav:
       - "Introduction: Writing Prompts": "writing-prompts.md"
       - "Prompting Templates": "chat-completion.md"
   - CLI Reference:
-      - "Finetuning": finetune.md
+      - "Usage": "cli/usage.md"
+      - "Finetuning": "cli/finetune.md"
 extra:
   analytics:
     provider: google