diff --git a/docs/advanced/error_handling.md b/docs/advanced/error_handling.md new file mode 100644 index 0000000..b22e10a --- /dev/null +++ b/docs/advanced/error_handling.md @@ -0,0 +1,307 @@ +# Error Handling and Retry Strategies + +This guide covers best practices for handling errors and implementing retry strategies when working with ClientAI. Learn how to gracefully handle API errors, implement effective retry mechanisms, and build robust AI applications. + +## Table of Contents +1. [Common Error Types](#common-error-types) +2. [Basic Error Handling](#basic-error-handling) +3. [Retry Strategies](#retry-strategies) +4. [Advanced Error Handling Patterns](#advanced-error-handling-patterns) +5. [Provider-Specific Considerations](#provider-specific-considerations) + +## Common Error Types + +ClientAI provides a unified error hierarchy for all providers: + +```python +from clientai.exceptions import ( + ClientAIError, # Base exception for all errors + AuthenticationError, # API key or auth issues + RateLimitError, # Rate limits exceeded + InvalidRequestError, # Malformed requests + ModelError, # Model-related issues + TimeoutError, # Request timeouts + APIError # General API errors +) +``` + +## Basic Error Handling + +### Simple Try-Except Pattern +```python +from clientai import ClientAI +from clientai.exceptions import ClientAIError, RateLimitError + +client = ClientAI("openai", api_key="your-api-key") + +try: + response = client.generate_text( + prompt="Write a story", + model="gpt-3.5-turbo" + ) +except RateLimitError as e: + print(f"Rate limit hit. Status code: {e.status_code}") + print(f"Original error: {e.original_error}") +except ClientAIError as e: + print(f"Generation failed: {e}") +``` + +## Retry Strategies + +### Simple Retry with Exponential Backoff +```python +import time +from typing import TypeVar, Callable +from clientai.exceptions import RateLimitError, TimeoutError + +T = TypeVar('T') + +def with_retry( + operation: Callable[[], T], + max_retries: int = 3, + initial_delay: float = 1.0, + exponential_base: float = 2.0, + max_delay: float = 60.0 +) -> T: + """ + Execute an operation with exponential backoff retry logic. + + Args: + operation: Function to retry + max_retries: Maximum retry attempts + initial_delay: Initial delay between retries in seconds + exponential_base: Base for exponential backoff + max_delay: Maximum delay between retries + """ + last_exception = None + + for attempt in range(max_retries): + try: + return operation() + except (RateLimitError, TimeoutError) as e: + last_exception = e + if attempt == max_retries - 1: + raise + + delay = min( + initial_delay * (exponential_base ** attempt), + max_delay + ) + time.sleep(delay) + + raise last_exception or ClientAIError("Retry failed") + +# Usage Example +def generate_text(): + return client.generate_text( + prompt="Write a story", + model="gpt-3.5-turbo" + ) + +result = with_retry(generate_text) +``` + +### Provider-Aware Retry Strategy +```python +from typing import Dict, Optional + +class RetryConfig: + def __init__( + self, + max_retries: int = 3, + initial_delay: float = 1.0, + max_delay: float = 60.0, + retry_on: Optional[tuple] = None + ): + self.max_retries = max_retries + self.initial_delay = initial_delay + self.max_delay = max_delay + self.retry_on = retry_on or (RateLimitError, TimeoutError) + +PROVIDER_RETRY_CONFIGS = { + "openai": RetryConfig(max_retries=3, initial_delay=1.0), + "anthropic": RetryConfig(max_retries=5, initial_delay=2.0), + "ollama": RetryConfig(max_retries=2, initial_delay=0.5) +} + +def get_retry_config(provider: str) -> RetryConfig: + """Get provider-specific retry configuration.""" + return PROVIDER_RETRY_CONFIGS.get( + provider, + RetryConfig() # Default config + ) +``` + +## Advanced Error Handling Patterns + +### Circuit Breaker Pattern +```python +from typing import Optional +from datetime import datetime, timedelta + +class CircuitBreaker: + def __init__( + self, + failure_threshold: int = 5, + reset_timeout: int = 60 + ): + self.failure_threshold = failure_threshold + self.reset_timeout = reset_timeout + self.failures = 0 + self.last_failure_time: Optional[datetime] = None + self.is_open = False + + def record_failure(self) -> None: + self.failures += 1 + self.last_failure_time = datetime.now() + if self.failures >= self.failure_threshold: + self.is_open = True + + def can_proceed(self) -> bool: + if not self.is_open: + return True + + if self.last_failure_time and \ + datetime.now() - self.last_failure_time > timedelta(seconds=self.reset_timeout): + self.reset() + return True + + return False + + def reset(self) -> None: + self.failures = 0 + self.is_open = False + self.last_failure_time = None + +# Usage +circuit_breaker = CircuitBreaker() + +def generate_with_circuit_breaker(prompt: str, model: str) -> str: + if not circuit_breaker.can_proceed(): + raise ClientAIError("Circuit breaker is open") + + try: + return client.generate_text(prompt, model=model) + except ClientAIError as e: + circuit_breaker.record_failure() + raise +``` + +### Fallback Chain Pattern +```python +class FallbackChain: + def __init__(self, default_response: Optional[str] = None): + self.default_response = default_response + self.handlers: list = [] + + def add_handler( + self, + client: ClientAI, + model: str, + circuit_breaker: Optional[CircuitBreaker] = None + ): + self.handlers.append((client, model, circuit_breaker)) + return self + + def execute(self, prompt: str) -> str: + last_error = None + + for client, model, circuit_breaker in self.handlers: + if circuit_breaker and not circuit_breaker.can_proceed(): + continue + + try: + return client.generate_text(prompt, model=model) + except ClientAIError as e: + if circuit_breaker: + circuit_breaker.record_failure() + last_error = e + + if self.default_response: + return self.default_response + + raise last_error or ClientAIError("All handlers failed") + +# Usage +fallback_chain = FallbackChain("Sorry, service unavailable") +fallback_chain.add_handler( + ClientAI("openai"), "gpt-4", CircuitBreaker() +).add_handler( + ClientAI("anthropic"), "claude-2", CircuitBreaker() +) + +response = fallback_chain.execute("Write a story") +``` + +## Provider-Specific Considerations + +### OpenAI +- Implements rate limiting with retry-after headers +- Supports automatic retries for intermittent errors +- Provides detailed error messages with status codes + +### Anthropic +- Uses HTTP 429 for rate limits +- May require longer backoff periods +- Provides structured error responses + +### Ollama +- Local deployment may have different error patterns +- Network errors more common than rate limits +- May require custom timeout configurations + +## Best Practices + +1. **Always Use Specific Exception Types** + ```python + try: + response = client.generate_text(prompt, model) + except RateLimitError: + # Handle rate limits + except ModelError: + # Handle model issues + except ClientAIError: + # Handle other errors + ``` + +2. **Implement Graceful Degradation** + ```python + def generate_with_fallback(prompt: str) -> str: + try: + return client.generate_text( + prompt, model="gpt-4" + ) + except (RateLimitError, ModelError): + return client.generate_text( + prompt, model="gpt-3.5-turbo" + ) + except ClientAIError: + return "Service temporarily unavailable" + ``` + +3. **Use Appropriate Retry Strategies** + - Implement exponential backoff + - Respect rate limits and retry-after headers + - Set reasonable timeout values + - Use circuit breakers for system protection + +4. **Log Errors Appropriately** + ```python + import logging + + logger = logging.getLogger(__name__) + + try: + response = client.generate_text(prompt, model) + except ClientAIError as e: + logger.error( + "Generation failed", + extra={ + "status_code": e.status_code, + "error_type": type(e).__name__, + "original_error": str(e.original_error) + } + ) + ``` + +By following these error handling and retry strategies, you can build robust applications that gracefully handle failures and provide reliable service to your users. \ No newline at end of file diff --git a/docs/advanced/ollama_specific.md b/docs/advanced/ollama_specific.md new file mode 100644 index 0000000..25378b8 --- /dev/null +++ b/docs/advanced/ollama_specific.md @@ -0,0 +1,264 @@ +# Ollama-Specific Parameters in ClientAI + +This guide covers the Ollama-specific parameters that can be passed to ClientAI's `generate_text` and `chat` methods. These parameters are passed as additional keyword arguments to customize Ollama's behavior. + +## generate_text Method + +### Basic Structure +```python +from clientai import ClientAI + +client = ClientAI('ollama') +response = client.generate_text( + prompt="Your prompt here", # Required + model="llama2", # Required + suffix="Optional suffix", # Ollama-specific + system="System message", # Ollama-specific + template="Custom template", # Ollama-specific + context=[1, 2, 3], # Ollama-specific + format="json", # Ollama-specific + options={"temperature": 0.7}, # Ollama-specific + keep_alive="5m" # Ollama-specific +) +``` + +### Ollama-Specific Parameters + +#### `suffix: str` +- Text to append to the generated output +```python +response = client.generate_text( + prompt="Write a story about a robot", + model="llama2", + suffix="\n\nThe End." +) +``` + +#### `system: str` +- System message to guide the model's behavior +```python +response = client.generate_text( + prompt="Explain quantum computing", + model="llama2", + system="You are a quantum physics professor explaining concepts to beginners" +) +``` + +#### `template: str` +- Custom prompt template +```python +response = client.generate_text( + prompt="What is Python?", + model="llama2", + template="Question: {{.Prompt}}\n\nDetailed answer:" +) +``` + +#### `context: List[int]` +- Context from previous interactions +```python +# First request +first_response = client.generate_text( + prompt="Tell me a story about space", + model="llama2" +) + +# Continue the story using context +continued_response = client.generate_text( + prompt="What happened next?", + model="llama2", + context=first_response.context # Context from previous response +) +``` + +#### `format: Literal['', 'json']` +- Controls response format +```python +response = client.generate_text( + prompt="List three fruits with their colors", + model="llama2", + format="json" +) +``` + +#### `options: Optional[Options]` +- Model-specific parameters +```python +response = client.generate_text( + prompt="Write a creative story", + model="llama2", + options={ + "temperature": 0.9, + "top_p": 0.8, + "top_k": 40 + } +) +``` + +#### `keep_alive: Optional[Union[float, str]]` +- Model memory retention duration +```python +response = client.generate_text( + prompt="Quick calculation", + model="llama2", + keep_alive="10m" # Keep model loaded for 10 minutes +) +``` + +## chat Method + +### Basic Structure +```python +response = client.chat( + model="llama2", # Required + messages=[...], # Required + tools=[...], # Ollama-specific + format="json", # Ollama-specific + options={"temperature": 0.7}, # Ollama-specific + keep_alive="5m" # Ollama-specific +) +``` + +### Ollama-Specific Parameters + +#### `tools: Optional[List[Dict]]` +- Tools available for the model (requires stream=False) +```python +response = client.chat( + model="llama2", + messages=[{"role": "user", "content": "What's 2+2?"}], + tools=[{ + "type": "function", + "function": { + "name": "calculate", + "description": "Perform basic math", + "parameters": { + "type": "object", + "properties": { + "expression": {"type": "string"} + } + } + } + }], + stream=False +) +``` + +#### `format: Literal['', 'json']` +- Controls response format +```python +response = client.chat( + model="llama2", + messages=[ + {"role": "user", "content": "List three countries with their capitals"} + ], + format="json" +) +``` + +#### `options: Optional[Options]` +- Model-specific parameters +```python +response = client.chat( + model="llama2", + messages=[{"role": "user", "content": "Tell me a joke"}], + options={ + "temperature": 0.8, + "top_p": 0.9, + "presence_penalty": 0.5 + } +) +``` + +#### `keep_alive: Optional[Union[float, str]]` +- Model memory retention duration +```python +response = client.chat( + model="llama2", + messages=[{"role": "user", "content": "Hello"}], + keep_alive=300.0 # 5 minutes in seconds +) +``` + +## Complete Examples + +### Example 1: Creative Writing with generate_text +```python +response = client.generate_text( + prompt="Write a short story about AI", + model="llama2", + system="You are a creative writer specializing in science fiction", + template="Story prompt: {{.Prompt}}\n\nCreative story:", + options={ + "temperature": 0.9, + "top_p": 0.95 + }, + suffix="\n\nThe End.", + keep_alive="10m" +) +``` + +### Example 2: JSON Response with chat +```python +messages = [ + {"role": "system", "content": "You are a helpful assistant that provides structured data"}, + {"role": "user", "content": "List 3 programming languages with their key features"} +] + +response = client.chat( + model="llama2", + messages=messages, + format="json", + options={ + "temperature": 0.3, # Lower temperature for more structured output + "top_p": 0.9 + } +) +``` + +### Example 3: Multimodal Chat with Image +```python +messages = [ + { + "role": "user", + "content": "What's in this image?", + "images": ["encoded_image_data_or_path"] + } +] + +response = client.chat( + model="llava", + messages=messages, + format="json", + keep_alive="5m" +) +``` + +### Example 4: Contextual Generation +```python +# First generation +first_response = client.generate_text( + prompt="Write the beginning of a mystery story", + model="llama2", + options={"temperature": 0.8} +) + +# Continue the story using context +continued_response = client.generate_text( + prompt="Continue the story with a plot twist", + model="llama2", + context=first_response.context, + options={"temperature": 0.8} +) +``` + +## Parameter Validation Notes + +1. Both `model` and `prompt`/`messages` are required +2. When using `tools`, `stream` must be `False` +3. `format` only accepts `''` or `'json'` +4. Image support requires multimodal models (e.g., llava) +5. Context preservation works only with `generate_text` +6. Keep alive duration can be string (e.g., "5m") or float (seconds) + +These parameters allow you to fully customize Ollama's behavior while working with ClientAI's abstraction layer. \ No newline at end of file diff --git a/docs/advanced/openai_specific.md b/docs/advanced/openai_specific.md new file mode 100644 index 0000000..ed3d988 --- /dev/null +++ b/docs/advanced/openai_specific.md @@ -0,0 +1,247 @@ +# OpenAI-Specific Parameters in ClientAI + +This guide covers the OpenAI-specific parameters that can be passed to ClientAI's `generate_text` and `chat` methods. These parameters are passed as additional keyword arguments to customize OpenAI's behavior. + +## generate_text Method + +### Basic Structure +```python +from clientai import ClientAI + +client = ClientAI('openai', api_key="your-openai-api-key") +response = client.generate_text( + prompt="Your prompt here", # Required + model="gpt-3.5-turbo", # Required + frequency_penalty=0.5, # OpenAI-specific + presence_penalty=0.2, # OpenAI-specific + logit_bias={123: 100}, # OpenAI-specific + max_completion_tokens=100, # OpenAI-specific + response_format={"type": "json"}, # OpenAI-specific + seed=12345 # OpenAI-specific +) +``` + +### OpenAI-Specific Parameters + +#### `frequency_penalty: Optional[float]` +- Range: -2.0 to 2.0 +- Penalizes tokens based on their frequency in the text +```python +response = client.generate_text( + prompt="Write a creative story", + model="gpt-3.5-turbo", + frequency_penalty=0.7 # Reduces repetition +) +``` + +#### `presence_penalty: Optional[float]` +- Range: -2.0 to 2.0 +- Penalizes tokens based on their presence in prior text +```python +response = client.generate_text( + prompt="Write a varied story", + model="gpt-3.5-turbo", + presence_penalty=0.6 # Encourages topic diversity +) +``` + +#### `logit_bias: Optional[Dict[str, int]]` +- Maps token IDs to bias values (-100 to 100) +```python +response = client.generate_text( + prompt="Write about technology", + model="gpt-3.5-turbo", + logit_bias={ + 123: 100, # Increases likelihood of token 123 + 456: -100 # Decreases likelihood of token 456 + } +) +``` + +#### `max_completion_tokens: Optional[int]` +- Maximum tokens for completion +```python +response = client.generate_text( + prompt="Write a summary", + model="gpt-3.5-turbo", + max_completion_tokens=100 +) +``` + +#### `response_format: ResponseFormat` +- Controls output structure +```python +response = client.generate_text( + prompt="List three colors", + model="gpt-4", + response_format={"type": "json_object"} +) +``` + +#### `seed: Optional[int]` +- For deterministic generation (Beta) +```python +response = client.generate_text( + prompt="Generate a random number", + model="gpt-3.5-turbo", + seed=12345 +) +``` + +#### `user: str` +- Unique identifier for end-user tracking +```python +response = client.generate_text( + prompt="Hello", + model="gpt-3.5-turbo", + user="user_123" +) +``` + +## chat Method + +### Basic Structure +```python +response = client.chat( + model="gpt-3.5-turbo", # Required + messages=[...], # Required + tools=[...], # OpenAI-specific + tool_choice="auto", # OpenAI-specific + response_format={"type": "json"}, # OpenAI-specific + logprobs=True, # OpenAI-specific + top_logprobs=5 # OpenAI-specific +) +``` + +### OpenAI-Specific Parameters + +#### `tools: Iterable[ChatCompletionToolParam]` +- List of available tools (max 128) +```python +response = client.chat( + model="gpt-4", + messages=[{"role": "user", "content": "What's the weather?"}], + tools=[{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather data", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string"} + } + } + } + }] +) +``` + +#### `tool_choice: ChatCompletionToolChoiceOptionParam` +- Controls tool selection behavior +```python +response = client.chat( + model="gpt-4", + messages=[{"role": "user", "content": "Calculate something"}], + tool_choice="auto" # or "none" or "required" +) +``` + +#### `modalities: Optional[List[ChatCompletionModality]]` +- Output types for generation +```python +response = client.chat( + model="gpt-4o-audio-preview", + messages=[{"role": "user", "content": "Generate audio"}], + modalities=["text", "audio"] +) +``` + +#### `audio: Optional[ChatCompletionAudioParam]` +- Audio output parameters +```python +response = client.chat( + model="gpt-4o-audio-preview", + messages=[{"role": "user", "content": "Speak this"}], + modalities=["audio"], + audio={"model": "tts-1", "voice": "alloy"} +) +``` + +#### `metadata: Optional[Dict[str, str]]` +- Custom tags for filtering +```python +response = client.chat( + model="gpt-4", + messages=[{"role": "user", "content": "Hello"}], + metadata={"purpose": "greeting", "user_type": "new"} +) +``` + +## Complete Examples + +### Example 1: Structured Output with Tools +```python +response = client.chat( + model="gpt-4", + messages=[ + {"role": "system", "content": "You are a data assistant"}, + {"role": "user", "content": "Get weather for Paris"} + ], + response_format={"type": "json_object"}, + tools=[{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather data", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string"} + } + } + } + }], + tool_choice="auto" +) +``` + +### Example 2: Advanced Text Generation +```python +response = client.generate_text( + prompt="Write a technical analysis", + model="gpt-4", + max_completion_tokens=500, + frequency_penalty=0.7, + presence_penalty=0.6, + logit_bias={123: 50}, + user="analyst_1", + seed=42 +) +``` + +### Example 3: Audio Generation +```python +response = client.chat( + model="gpt-4o-audio-preview", + messages=[{"role": "user", "content": "Explain quantum physics"}], + modalities=["text", "audio"], + audio={ + "model": "tts-1", + "voice": "nova", + "speed": 1.0 + }, + metadata={"type": "educational"} +) +``` + +## Parameter Validation Notes + +1. Both `model` and `prompt`/`messages` are required +2. `response_format` requires compatible models +3. Tool usage limited to 128 functions +4. Audio generation requires specific models +5. `logprobs` must be True when using `top_logprobs` +6. `seed` feature is in Beta and not guaranteed + +These parameters allow you to fully customize OpenAI's behavior while working with ClientAI's abstraction layer. \ No newline at end of file diff --git a/docs/advanced/overview.md b/docs/advanced/overview.md new file mode 100644 index 0000000..6488bd4 --- /dev/null +++ b/docs/advanced/overview.md @@ -0,0 +1,35 @@ +# Advanced Overview + +This section provides in-depth guides on leveraging specific features of ClientAI and provider-specific functionalities. Each topic delves into a particular aspect of usage or focuses on a specific provider's unique capabilities. + +## Provider-Specific Parameters + +Different AI providers offer unique parameters and features. Understanding these can help you fine-tune your AI interactions for optimal results. + +1. **Ollama Specific Guide:** Learn about Ollama's unique parameters, including context handling, streaming options, and custom templates. + - [Ollama Specific Guide](ollama_specific.md) + +2. **OpenAI Specific Guide:** Explore OpenAI's advanced features, such as logit bias and model-specific parameters. + - [OpenAI Specific Guide](openai_specific.md) + +3. **Replicate Specific Guide**: Discover Replicate's distinctive offerings, including model versioning and custom deployment options. + - [Replicate Specific Guide](replicate_specific.md) + +## Advanced Usage Topics + +4. **Optimizing Performance:** Tips and tricks for improving response time, reducing token usage, and enhancing overall efficiency. + - Soon + +5. **Handling Long Conversations:** Strategies for managing context in extended dialogues and multi-turn interactions. + - Soon + +6. **Custom Prompting Techniques:** Advanced prompting methods to extract more accurate and relevant responses from AI models. + - Soon + +7. **Error Handling and Retry Strategies:** Best practices for gracefully managing API errors and implementing effective retry mechanisms. + - [Error Handling and Retry Strategies](error_handling.md) + +8. **Security and Privacy Considerations:** Guidelines for ensuring data security and maintaining user privacy when working with AI APIs. + - Soon + +Each guide in this section is designed to provide you with a deeper understanding of ClientAI's capabilities and how to leverage them effectively in your projects. \ No newline at end of file diff --git a/docs/advanced/replicate_specific.md b/docs/advanced/replicate_specific.md new file mode 100644 index 0000000..b8cd94f --- /dev/null +++ b/docs/advanced/replicate_specific.md @@ -0,0 +1,190 @@ +# Replicate-Specific Parameters in ClientAI + +This guide covers the Replicate-specific parameters that can be passed to ClientAI's `generate_text` and `chat` methods. These parameters are passed as additional keyword arguments to customize Replicate's behavior. + +## generate_text Method + +### Basic Structure +```python +from clientai import ClientAI + +client = ClientAI('replicate', api_key="your-replicate-api-key") +response = client.generate_text( + prompt="Your prompt here", # Required + model="owner/name:version", # Required + webhook="https://...", # Replicate-specific + webhook_completed="https://...",# Replicate-specific + webhook_events_filter=[...], # Replicate-specific + stream=False, # Optional + wait=True # Replicate-specific +) +``` + +### Replicate-Specific Parameters + +#### `webhook: Optional[str]` +- URL to receive POST requests with prediction updates +```python +response = client.generate_text( + prompt="Write a story", + model="stability-ai/stable-diffusion:db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf", + webhook="https://your-server.com/webhook" +) +``` + +#### `webhook_completed: Optional[str]` +- URL for receiving completion notifications +```python +response = client.generate_text( + prompt="Generate text", + model="meta/llama-2-70b:latest", + webhook_completed="https://your-server.com/completed" +) +``` + +#### `webhook_events_filter: Optional[List[str]]` +- List of events that trigger webhooks +- Common events: `"completed"`, `"output"` +```python +response = client.generate_text( + prompt="Analyze text", + model="meta/llama-2-70b:latest", + webhook_events_filter=["completed", "output"] +) +``` + +#### `wait: Optional[Union[int, bool]]` +- Controls request blocking behavior +- True: keeps request open up to 60 seconds +- int: specifies seconds to hold request (1-60) +- False: doesn't wait (default) +```python +response = client.generate_text( + prompt="Complex analysis", + model="meta/llama-2-70b:latest", + wait=30 # Wait for 30 seconds +) +``` + +#### `stream: bool` +- Enables token streaming for supported models +```python +for chunk in client.generate_text( + prompt="Write a story", + model="meta/llama-2-70b:latest", + stream=True +): + print(chunk, end="") +``` + +## chat Method + +### Basic Structure +```python +response = client.chat( + model="meta/llama-2-70b:latest", # Required + messages=[...], # Required + webhook="https://...", # Replicate-specific + webhook_completed="https://...", # Replicate-specific + webhook_events_filter=[...], # Replicate-specific + wait=True # Replicate-specific +) +``` + +### Message Formatting +Replicate formats chat messages into a single prompt: +```python +prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages]) +prompt += "\nassistant: " +``` + +## Training Parameters + +When using Replicate's training capabilities: + +```python +response = client.train( + model="stability-ai/sdxl", + version="39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b", + input={ + "input_images": "https://domain/images.zip", + "token_string": "TOK", + "caption_prefix": "a photo of TOK", + "max_train_steps": 1000, + "use_face_detection_instead": False + }, + destination="username/model-name" +) +``` + +## Complete Examples + +### Example 1: Generation with Webhooks +```python +response = client.generate_text( + prompt="Write a scientific paper summary", + model="meta/llama-2-70b:latest", + webhook="https://your-server.com/updates", + webhook_completed="https://your-server.com/completed", + webhook_events_filter=["completed"], + wait=True +) +``` + +### Example 2: Chat with Streaming +```python +messages = [ + {"role": "system", "content": "You are a helpful assistant"}, + {"role": "user", "content": "Write a haiku about coding"} +] + +for chunk in client.chat( + messages=messages, + model="meta/llama-2-70b:latest", + stream=True +): + print(chunk, end="") +``` + +### Example 3: Image Generation +```python +response = client.generate_text( + prompt="A portrait of a wombat gentleman", + model="stability-ai/stable-diffusion:27b93a2413e7f36cd83da926f3656280b2931564ff050bf9575f1fdf9bcd7478", + wait=60 +) +``` + +## Error Handling + +ClientAI maps Replicate's exceptions to its own error types: +```python +try: + response = client.generate_text( + prompt="Test prompt", + model="meta/llama-2-70b:latest", + wait=True + ) +except ClientAIError as e: + print(f"Error: {e}") +``` + +Error mappings: +- `AuthenticationError`: API key issues +- `RateLimitError`: Rate limit exceeded +- `ModelError`: Model not found or failed +- `InvalidRequestError`: Invalid parameters +- `TimeoutError`: Request timeout (default 300s) +- `APIError`: Other server errors + +## Parameter Validation Notes + +1. Both `model` and `prompt`/`messages` are required +2. Model string format: `"owner/name:version"` or `"owner/name"` for latest version +3. `wait` must be boolean or integer 1-60 +4. Webhook URLs must be valid HTTP/HTTPS URLs +5. `webhook_events_filter` must contain valid event types +6. Some models may not support streaming +7. File inputs can be URLs or local file paths + +These parameters allow you to leverage Replicate's features through ClientAI, including model management, webhook notifications, and streaming capabilities. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 8120523..9973df8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,14 +45,6 @@ nav: - ClientAI: index.md - Installing: installing.md - Quick-Start: quick-start.md - - API Reference: - - Overview: api/overview.md - - ClientAI: api/clientai.md - - AIProvider: api/ai_provider.md - - Specific Providers: - - Ollama: api/ollama_provider.md - - OpenAI: api/openai_provider.md - - Replicate: api/replicate_provider.md - Usage: - usage/overview.md - usage/initialization.md @@ -64,6 +56,21 @@ nav: - Overview: examples/overview.md - Examples: - AI Dungeon Master: examples/ai_dungeon_master.md + - Advanced: + - Overview: advanced/overview.md + - Specific Providers: + - Ollama: advanced/ollama_specific.md + - OpenAI: advanced/openai_specific.md + - Replicate: advanced/replicate_specific.md + - Error Handling: advanced/error_handling.md + - API Reference: + - Overview: api/overview.md + - ClientAI: api/clientai.md + - AIProvider: api/ai_provider.md + - Specific Providers: + - Ollama: api/ollama_provider.md + - OpenAI: api/openai_provider.md + - Replicate: api/replicate_provider.md - Community: - Overview: community/overview.md - Contributing: community/CONTRIBUTING.md