Skip to content

feat: support audio content #725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ def main(
@app.call_tool()
async def call_tool(
name: str, arguments: dict
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
) -> list[
types.TextContent
| types.ImageContent
| types.AudioContent
| types.EmbeddedResource
]:
ctx = app.request_context
interval = arguments.get("interval", 1.0)
count = arguments.get("count", 5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@ def main(
@app.call_tool()
async def call_tool(
name: str, arguments: dict
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
) -> list[
types.TextContent
| types.ImageContent
| types.AudioContent
| types.EmbeddedResource
]:
ctx = app.request_context
interval = arguments.get("interval", 1.0)
count = arguments.get("count", 5)
Expand Down
11 changes: 9 additions & 2 deletions examples/servers/simple-tool/mcp_simple_tool/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

async def fetch_website(
url: str,
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
) -> list[
types.TextContent | types.ImageContent | types.AudioContent | types.EmbeddedResource
]:
headers = {
"User-Agent": "MCP Test Server (github.com/modelcontextprotocol/python-sdk)"
}
Expand All @@ -31,7 +33,12 @@ def main(port: int, transport: str) -> int:
@app.call_tool()
async def fetch_tool(
name: str, arguments: dict
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
) -> list[
types.TextContent
| types.ImageContent
| types.AudioContent
| types.EmbeddedResource
]:
if name != "fetch":
raise ValueError(f"Unknown tool: {name}")
if "url" not in arguments:
Expand Down
4 changes: 2 additions & 2 deletions src/mcp/server/fastmcp/prompts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import pydantic_core
from pydantic import BaseModel, Field, TypeAdapter, validate_call

from mcp.types import EmbeddedResource, ImageContent, TextContent
from mcp.types import AudioContent, EmbeddedResource, ImageContent, TextContent

CONTENT_TYPES = TextContent | ImageContent | EmbeddedResource
CONTENT_TYPES = TextContent | ImageContent | AudioContent | EmbeddedResource


class Message(BaseModel):
Expand Down
7 changes: 4 additions & 3 deletions src/mcp/server/fastmcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from mcp.shared.context import LifespanContextT, RequestContext
from mcp.types import (
AnyFunction,
AudioContent,
EmbeddedResource,
GetPromptResult,
ImageContent,
Expand Down Expand Up @@ -273,7 +274,7 @@ def get_context(self) -> Context[ServerSession, object]:

async def call_tool(
self, name: str, arguments: dict[str, Any]
) -> Sequence[TextContent | ImageContent | EmbeddedResource]:
) -> Sequence[TextContent | ImageContent | AudioContent | EmbeddedResource]:
"""Call a tool by name with arguments."""
context = self.get_context()
result = await self._tool_manager.call_tool(name, arguments, context=context)
Expand Down Expand Up @@ -873,12 +874,12 @@ async def get_prompt(

def _convert_to_content(
result: Any,
) -> Sequence[TextContent | ImageContent | EmbeddedResource]:
) -> Sequence[TextContent | ImageContent | AudioContent | EmbeddedResource]:
"""Convert a result to a sequence of content objects."""
if result is None:
return []

if isinstance(result, TextContent | ImageContent | EmbeddedResource):
if isinstance(result, TextContent | ImageContent | AudioContent | EmbeddedResource):
return [result]

if isinstance(result, Image):
Expand Down
5 changes: 4 additions & 1 deletion src/mcp/server/lowlevel/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,10 @@ def decorator(
...,
Awaitable[
Iterable[
types.TextContent | types.ImageContent | types.EmbeddedResource
types.TextContent
| types.ImageContent
| types.AudioContent
| types.EmbeddedResource
]
],
],
Expand Down
23 changes: 19 additions & 4 deletions src/mcp/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,11 +651,26 @@ class ImageContent(BaseModel):
model_config = ConfigDict(extra="allow")


class AudioContent(BaseModel):
"""Audio content for a message."""

type: Literal["audio"]
data: str
"""The base64-encoded audio data."""
mimeType: str
"""
The MIME type of the audio. Different providers may support different
audio types.
"""
annotations: Annotations | None = None
model_config = ConfigDict(extra="allow")


class SamplingMessage(BaseModel):
"""Describes a message issued to or received from an LLM API."""

role: Role
content: TextContent | ImageContent
content: TextContent | ImageContent | AudioContent
model_config = ConfigDict(extra="allow")


Expand All @@ -677,7 +692,7 @@ class PromptMessage(BaseModel):
"""Describes a message returned as part of a prompt."""

role: Role
content: TextContent | ImageContent | EmbeddedResource
content: TextContent | ImageContent | AudioContent | EmbeddedResource
model_config = ConfigDict(extra="allow")


Expand Down Expand Up @@ -796,7 +811,7 @@ class CallToolRequest(Request[CallToolRequestParams, Literal["tools/call"]]):
class CallToolResult(Result):
"""The server's response to a tool call."""

content: list[TextContent | ImageContent | EmbeddedResource]
content: list[TextContent | ImageContent | AudioContent | EmbeddedResource]
isError: bool = False


Expand Down Expand Up @@ -960,7 +975,7 @@ class CreateMessageResult(Result):
"""The client's response to a sampling/create_message request from the server."""

role: Role
content: TextContent | ImageContent
content: TextContent | ImageContent | AudioContent
model: str
"""The name of the model that generated the message."""
stopReason: StopReason | None = None
Expand Down
3 changes: 2 additions & 1 deletion tests/issues/test_88_random_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from mcp.server.lowlevel import Server
from mcp.shared.exceptions import McpError
from mcp.types import (
AudioContent,
EmbeddedResource,
ImageContent,
TextContent,
Expand All @@ -37,7 +38,7 @@ async def test_notification_validation_error(tmp_path: Path):
@server.call_tool()
async def slow_tool(
name: str, arg
) -> Sequence[TextContent | ImageContent | EmbeddedResource]:
) -> Sequence[TextContent | ImageContent | AudioContent | EmbeddedResource]:
nonlocal request_count
request_count += 1

Expand Down
12 changes: 8 additions & 4 deletions tests/server/fastmcp/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
create_connected_server_and_client_session as client_session,
)
from mcp.types import (
AudioContent,
BlobResourceContents,
ImageContent,
TextContent,
Expand Down Expand Up @@ -207,10 +208,11 @@ def image_tool_fn(path: str) -> Image:
return Image(path)


def mixed_content_tool_fn() -> list[TextContent | ImageContent]:
def mixed_content_tool_fn() -> list[TextContent | ImageContent | AudioContent]:
return [
TextContent(type="text", text="Hello"),
ImageContent(type="image", data="abc", mimeType="image/png"),
AudioContent(type="audio", data="def", mimeType="audio/wav"),
]


Expand Down Expand Up @@ -312,14 +314,16 @@ async def test_tool_mixed_content(self):
mcp.add_tool(mixed_content_tool_fn)
async with client_session(mcp._mcp_server) as client:
result = await client.call_tool("mixed_content_tool_fn", {})
assert len(result.content) == 2
content1 = result.content[0]
content2 = result.content[1]
assert len(result.content) == 3
content1, content2, content3 = result.content
assert isinstance(content1, TextContent)
assert content1.text == "Hello"
assert isinstance(content2, ImageContent)
assert content2.mimeType == "image/png"
assert content2.data == "abc"
assert isinstance(content3, AudioContent)
assert content3.mimeType == "audio/wav"
assert content3.data == "def"

@pytest.mark.anyio
async def test_tool_mixed_list_with_image(self, tmp_path: Path):
Expand Down
Loading