Skip to content

Commit

Permalink
Merge pull request #37 from randombet/falkor_db_integration
Browse files Browse the repository at this point in the history
[Falkor DB Integration Part 1] Implement FalkorGraphQueryEngine
  • Loading branch information
randombet authored Oct 7, 2024
2 parents 628f8cd + 6778a5a commit 84066b1
Show file tree
Hide file tree
Showing 6 changed files with 623 additions and 1 deletion.
66 changes: 66 additions & 0 deletions .github/workflows/contrib-graph-rag-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: ContribGraphRagTests

on:
pull_request:
branches: ["main"]
paths:
- "autogen/agentchat/contrib/graph_rag/**"
- "test/agentchat/contrib/graph_rag/**"
- ".github/workflows/contrib-tests.yml"
- "setup.py"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
permissions:
{}
# actions: read
# checks: read
# contents: read
# deployments: read
jobs:
GraphRagIntegrationTest-FalkorDB-Ubuntu:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11"]
services:
falkordb:
image: falkordb/falkordb:edge
ports:
- 6379:6379
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install packages and dependencies for all tests
run: |
python -m pip install --upgrade pip wheel
pip install pytest
- name: Install Falkor DB SDK when on linux
run: |
pip install -e .[graph_rag_falkor_db]
- name: Set AUTOGEN_USE_DOCKER based on OS
shell: bash
run: |
echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
- name: Coverage
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
run: |
pip install pytest-cov>=5
pytest test/agentchat/contrib/graph_rag/test_falkor_graph_rag.py --skip-openai
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
76 changes: 76 additions & 0 deletions autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
from dataclasses import field
from typing import List

from graphrag_sdk import KnowledgeGraph, Source
from graphrag_sdk.schema import Schema

from .document import Document
from .graph_query_engine import GraphStoreQueryResult


class FalkorGraphQueryResult(GraphStoreQueryResult):
messages: list = field(default_factory=list)


class FalkorGraphQueryEngine:
"""
This is a wrapper for Falkor DB KnowledgeGraph.
"""

def __init__(
self,
name: str,
host: str = "127.0.0.1",
port: int = 6379,
username: str | None = None,
password: str | None = None,
model: str = "gpt-4-1106-preview",
schema: Schema | None = None,
):
"""
Initialize a Falkor DB knowledge graph.
Please also refer to https://github.com/FalkorDB/GraphRAG-SDK/blob/main/graphrag_sdk/kg.py
Args:
name (str): Knowledge graph name.
host (str): FalkorDB hostname.
port (int): FalkorDB port number.
username (str|None): FalkorDB username.
password (str|None): FalkorDB password.
model (str): OpenAI model to use for Falkor DB to build and retrieve from the graph.
schema: Falkor DB knowledge graph schema (ontology), https://github.com/FalkorDB/GraphRAG-SDK/blob/main/graphrag_sdk/schema/schema.py
If None, Falkor DB will auto generate a schema from the input docs.
"""
self.knowledge_graph = KnowledgeGraph(name, host, port, username, password, model, schema)

def init_db(self, input_doc: List[Document] | None):
"""
Build the knowledge graph with input documents.
"""
sources = []
for doc in input_doc:
if os.path.exists(doc.path_or_url):
sources.append(Source(doc.path_or_url))

if sources:
self.knowledge_graph.process_sources(sources)

def add_records(self, new_records: List) -> bool:
raise NotImplementedError("This method is not supported by Falkor DB SDK yet.")

def query(self, question: str, n_results: int = 1, **kwargs) -> FalkorGraphQueryResult:
"""
Query the knowledage graph with a question and optional message history.
Args:
question: a human input question.
n_results: number of returned results.
kwargs:
messages: a list of message history.
Returns: FalkorGraphQueryResult
"""
messages = kwargs.pop("messages", [])
answer, messages = self.knowledge_graph.ask(question, messages)
return FalkorGraphQueryResult(answer=answer, results=[], messages=messages)
1 change: 0 additions & 1 deletion autogen/agentchat/contrib/graph_rag/graph_query_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def init_db(self, input_doc: List[Document] | None = None):
Args:
input_doc: a list of input documents that are used to build the graph in database.
Returns: GraphStore
"""
pass

Expand Down
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@

retrieve_chat_pgvector = [*retrieve_chat, "pgvector>=0.2.5"]

graph_rag_falkor_db = [
"graphrag_sdk",
]

if current_os in ["Windows", "Darwin"]:
retrieve_chat_pgvector.extend(["psycopg[binary]>=3.1.18"])
elif current_os == "Linux":
Expand All @@ -81,6 +85,7 @@
"retrievechat-pgvector": retrieve_chat_pgvector,
"retrievechat-mongodb": [*retrieve_chat, "pymongo>=4.0.0"],
"retrievechat-qdrant": [*retrieve_chat, "qdrant_client", "fastembed>=0.3.1"],
"graph_rag_falkor_db": graph_rag_falkor_db,
"autobuild": ["chromadb", "sentence-transformers", "huggingface-hub", "pysqlite3"],
"teachable": ["chromadb"],
"lmm": ["replicate", "pillow"],
Expand Down
61 changes: 61 additions & 0 deletions test/agentchat/contrib/graph_rag/test_falkor_graph_rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import sys

import pytest
from conftest import reason, skip_openai # noqa: E402
from graphrag_sdk import KnowledgeGraph, Source
from graphrag_sdk.schema import Schema

try:
from autogen.agentchat.contrib.graph_rag.document import (
Document,
DocumentType,
)
from autogen.agentchat.contrib.graph_rag.falkor_graph_query_engine import (
FalkorGraphQueryEngine,
GraphStoreQueryResult,
)
except ImportError:
skip = True
else:
skip = False

reason = "do not run on MacOS or windows OR dependency is not installed OR " + reason


@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or skip or skip_openai,
reason=reason,
)
def test_falkor_db_query_engine():
"""
Test Falkor DB Query Engine.
1. create a test Falkor DB Query Engine with a schema.
2. Initialize it with an input txt file.
3. Query it with a question and verify the result contains the critical information.
"""
# Arrange
test_schema = Schema()
actor = test_schema.add_entity("Actor").add_attribute("name", str, unique=True)
movie = test_schema.add_entity("Movie").add_attribute("title", str, unique=True)
test_schema.add_relation("ACTED", actor, movie)

query_engine = FalkorGraphQueryEngine(schema=test_schema)

source_file = "test/agentchat/contrib/graph_rag/the_matrix.txt"
input_docs = [Document(doctype=DocumentType.TEXT, path_or_url=source_file)]

question = "Name a few actors who've played in 'The Matrix'"

# Act
query_engine.init_db(input_doc=input_docs)

query_result: GraphStoreQueryResult = query_engine.query(question=question)

# Assert
assert query_result.answer.find("Keanu Reeves") >= 0
for message in query_result.messages:
if isinstance(message, dict) and "role" in message and message["role"] == "user":
assert "content" in message
assert message["content"] is question
return
pytest.fail("Question not found in message history.")
Loading

0 comments on commit 84066b1

Please sign in to comment.