From 441b8c714e761108b48d3fb012a88d9f9f834077 Mon Sep 17 00:00:00 2001
From: Ali Farooq <ali_farooq1993@hotmail.com>
Date: Mon, 7 Apr 2025 16:04:50 +0000
Subject: [PATCH 1/3] openai github pipeline

---
 github-pipeline/openai_github_api_pipeline.py | 271 ++++++++++++++++++
 1 file changed, 271 insertions(+)
 create mode 100644 github-pipeline/openai_github_api_pipeline.py

diff --git a/github-pipeline/openai_github_api_pipeline.py b/github-pipeline/openai_github_api_pipeline.py
new file mode 100644
index 0000000..94078d0
--- /dev/null
+++ b/github-pipeline/openai_github_api_pipeline.py
@@ -0,0 +1,271 @@
+import asyncio
+import traceback
+from typing import List, Union, Generator, Iterator, Sequence
+import os
+import requests
+import json
+import base64
+from pydantic import BaseModel, Field
+from langchain import hub
+from langchain_openai import ChatOpenAI
+from langchain.agents import create_tool_calling_agent, AgentExecutor
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.tools import BaseTool, tool
+from langchain_community.llms import Ollama
+from llama_index.core import VectorStoreIndex, Settings, Document
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.core.llms import MockLLM
+
+index = None
+documents = None
+
+class SearchRepositoryInput(BaseModel):
+    query: str = Field(description="Search query on github repository files.")
+
+@tool("search_repository", args_schema=SearchRepositoryInput, return_direct=False)
+def search_repository(query: str) -> str:
+    """Search GitHub repository files and return information based on the query."""
+    try:
+        global index, documents
+        
+        # Query the index
+        query_engine = index.as_query_engine(llm=MockLLM())
+        response = query_engine.query(query)
+        return response.response
+
+    except Exception as e:
+        print(f"Error in search_repository: {str(e)}")
+        return "An error occurred while searching the repository."
+
+class Pipeline:
+
+    class Valves(BaseModel):
+
+        OPENAI_API_BASE_URL: str
+        OPENAI_API_KEY: str
+        OPENAI_API_MODEL: str
+        OPENAI_API_TEMPERATURE: float
+
+        OPENAI_EMBED_MODEL: str
+
+        GITHUB_BASE_URL: str
+        GITHUB_TOKEN: str
+        GITHUB_USER_NAME: str
+        GITHUB_REPO_NAME: str
+
+        SYSTEM_PROMPT: str
+        
+    def __init__(self):
+
+        self.name = "Chat with GitHub with valves"
+
+        self.valves = self.Valves(
+            **{
+                "OPENAI_API_BASE_URL": os.getenv("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
+                "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
+                "OPENAI_API_MODEL": os.getenv("OPENAI_API_MODEL", "gpt-4o"),
+                "OPENAI_API_TEMPERATURE": os.getenv("OPENAI_API_TEMPERATURE", 0.7),
+
+                "OPENAI_EMBED_MODEL": os.getenv("OPENAI_EMBED_MODEL", "text-embedding-ada-002"),
+
+                "GITHUB_BASE_URL": os.getenv("GITHUB_BASE_URL", "https://api.github.com"),
+                "GITHUB_TOKEN": os.getenv("GITHUB_TOKEN", ""),
+                "GITHUB_USER_NAME": os.getenv("GITHUB_USER_NAME", ""),
+                "GITHUB_REPO_NAME": os.getenv("GITHUB_REPO_NAME", ""),
+                
+                "SYSTEM_PROMPT": os.getenv("SYSTEM_PROMPT", "You are a smart assistant that read from github repository, retrieves their information, analyzes them, and assists users with Q&A over extracted content."),
+            }
+        )
+
+        self.tools = [search_repository]
+        self.pipelines = self.get_openai_models()
+    
+    def on_startup(self):
+        """Loads GitHub repository data and creates an index."""
+        try:
+            global index, documents
+            
+            repository_url = f"{self.valves.GITHUB_BASE_URL}/repos/{self.valves.GITHUB_USER_NAME}/{self.valves.GITHUB_REPO_NAME}"
+            headers = { 'Authorization': f"Bearer {self.valves.GITHUB_TOKEN}" }
+            
+            embed_model = OpenAIEmbedding(model=self.valves.OPENAI_EMBED_MODEL, api_key=self.valves.OPENAI_API_KEY)
+            
+            try:
+                response = requests.get(repository_url, headers=headers)
+                if response.status_code == 200:
+                    repositories = response.json()
+                        
+                    file_paths = self.get_all_files(repository_url, headers)
+
+                    files_data = []
+                    for file_path in file_paths:
+                        content = self.get_file_content(repository_url, file_path, headers)
+                        if content:
+                            files_data.append({"path": file_path, "content": content})
+
+                    documents = [
+                        Document(
+                            text=f"Github URL: {repository_url}",
+                            metadata={"type": "repo_info", "key": "Github URL"}
+                        ),
+                        Document(
+                            text=f"Project name: {repositories.get('name', 'Unknown')}",
+                            metadata={"type": "repo_info", "key": "Project Name"}
+                        ),
+                        Document(
+                            text=f"Project owner: {repositories.get('owner', {}).get('login', 'Unknown')}",
+                            metadata={"type": "repo_info", "key": "Project Owner"}
+                        ),
+                        Document(
+                            text=f"List users with access: {self.get_collaborators(repositories.get('collaborators_url', '').split('{')[0])}",
+                            metadata={"type": "repo_info", "key": "Users with Access"}
+                        ),
+                        Document(
+                            text=f"Programming languages used: {self.get_languages(repositories.get('languages_url', ''))}",
+                            metadata={"type": "repo_info", "key": "Languages Used"}
+                        ),
+                        Document(
+                            text=f"Security/visibility level: {repositories.get('visibility', 'Unknown')}",
+                            metadata={"type": "repo_info", "key": "Visibility"}
+                        ),
+                        Document(
+                            text=f"Summary: {repositories.get('description', 'No description')}",
+                            metadata={"type": "repo_info", "key": "Summary"}
+                        ),
+                        Document(
+                            text=f"Last maintained: {repositories.get('pushed_at', 'Unknown')}",
+                            metadata={"type": "repo_info", "key": "Last Maintained"}
+                        ),
+                        Document(
+                            text=f"Last release: {repositories.get('default_branch', 'Unknown')}",
+                            metadata={"type": "repo_info", "key": "Last Release"}
+                        ),
+                        Document(
+                            text=f"Open issues: {self.get_open_issues(repository_url, headers)}",
+                            metadata={"type": "repo_info", "key": "Open Issues"}
+                        )
+                    ]
+
+                    for file in files_data:
+                        documents.append(Document(
+                        text=f"File: {file['path']}\nContent:\n{file['content']}",
+                        metadata={"type": "file", "file_path": file["path"]}
+                    ))
+
+                else:
+                    print(f"Failed to retrieve repositories. Status code: {response.status_code}")
+                
+            except Exception as e:
+                print(f"Error: {e}")
+
+            try:
+                index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
+            except Exception as e:
+                print(f"Error while indexing: {str(e)}")
+
+            print("GitHub repository indexed successfully!")
+                
+        except Exception as e:
+            print(f"Error in on_startup: {str(e)}")
+
+    def get_collaborators(self, collaborators_url):
+        response = requests.get(collaborators_url)
+        if response.status_code == 200:
+            return [collaborator["login"] for collaborator in response.json()]
+        else:
+            return []
+
+    def get_languages(self, languages_url):
+        response = requests.get(languages_url)
+        if response.status_code == 200:
+            return list(response.json().keys())
+        else:
+            return []
+
+    def get_open_issues(self, url, headers):
+        url = f"{url}/issues?state=open"
+        response = requests.get(url, headers=headers)
+        if response.status_code == 200:
+            return response.json()
+        else:
+            print(f"Error: {response.status_code}")
+            return []
+
+    def get_all_files(self, url, headers):
+        url = f"{url}/git/trees/main?recursive=1"
+        response = requests.get(url, headers=headers)
+        if response.status_code == 200:
+            data = response.json()
+            return [item["path"] for item in data.get("tree", []) if item["type"] == "blob"]
+        else:
+            print(f"Error: {response.status_code}")
+            return []
+
+    def get_file_content(self, url, file_path, headers):
+        url = f"{url}/contents/{file_path}"
+        response = requests.get(url, headers=headers)
+
+        if response.status_code == 200:
+            file_data = response.json()
+            content = file_data.get("content", "")
+            encoding = file_data.get("encoding", "")
+
+            if encoding == "base64":
+                decoded_content = base64.b64decode(content)
+                try:
+                    return decoded_content.decode("utf-8")  # Try decoding as UTF-8 text
+                except UnicodeDecodeError:
+                    return "Binary Files"  # Return raw bytes for binary files
+            else:
+                print(f"Unknown encoding for {file_path}: {encoding}")
+                return None
+        else:
+            print(f"Error fetching {file_path}: {response.status_code}")
+            return None
+
+    def get_openai_models(self):
+        if self.valves.OPENAI_API_KEY:
+            try:
+                headers = {
+                    "Authorization": f"Bearer {self.valves.OPENAI_API_KEY}",
+                    "Content-Type": "application/json"
+                }
+                response = requests.get(
+                    f"{self.valves.OPENAI_API_BASE_URL}/models", headers=headers
+                )
+                models = response.json()
+                return [
+                    {"id": model["id"], "name": model.get("name", model["id"])}
+                    for model in models["data"] if "gpt" in model["id"]
+                ]
+            except Exception as e:
+                print(f"Error: {e}")
+                return [{"id": "error", "name": "Could not fetch models from OpenAI."}]
+        else:
+            return []
+
+
+    def pipe(self, user_message: str, model_id: str, messages: List[dict], body: dict):
+        """Processes user messages and interacts with the LLM."""
+        try:
+            model = ChatOpenAI(
+                api_key=self.valves.OPENAI_API_KEY,
+                model=self.valves.OPENAI_API_MODEL,
+                temperature=self.valves.OPENAI_API_TEMPERATURE
+            )
+            
+            tools: Sequence[BaseTool] = self.tools
+
+            prompt = ChatPromptTemplate.from_messages([
+                ("system", self.valves.SYSTEM_PROMPT),
+                MessagesPlaceholder("chat_history"),
+                ("user", "{input}"),
+                MessagesPlaceholder("agent_scratchpad")
+            ])
+            agent = create_tool_calling_agent(model, tools, prompt)
+            agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)
+            response = agent_executor.invoke({"input": user_message, "chat_history": messages})
+            return response["output"]
+        except Exception as e:
+            print(f"An error occurred: {str(e)}")
+            raise

From d0145772f068bed0d58809bc90e17249571786d6 Mon Sep 17 00:00:00 2001
From: Ali Farooq <ali_farooq1993@hotmail.com>
Date: Tue, 8 Apr 2025 10:45:36 +0000
Subject: [PATCH 2/3] updated pipeline with valves

---
 github-pipeline/openai_github_api_pipeline.py | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/github-pipeline/openai_github_api_pipeline.py b/github-pipeline/openai_github_api_pipeline.py
index 94078d0..2f73ebe 100644
--- a/github-pipeline/openai_github_api_pipeline.py
+++ b/github-pipeline/openai_github_api_pipeline.py
@@ -1,10 +1,9 @@
-import asyncio
-import traceback
-from typing import List, Union, Generator, Iterator, Sequence
 import os
 import requests
 import json
 import base64
+import traceback
+from typing import List, Union, Generator, Iterator, Sequence
 from pydantic import BaseModel, Field
 from langchain import hub
 from langchain_openai import ChatOpenAI
@@ -41,46 +40,42 @@ class Pipeline:
 
     class Valves(BaseModel):
 
-        OPENAI_API_BASE_URL: str
-        OPENAI_API_KEY: str
-        OPENAI_API_MODEL: str
-        OPENAI_API_TEMPERATURE: float
+        OPENAI_API_BASE_URL: str = "https://api.openai.com/v1"
+        OPENAI_API_KEY: str = ""
+        OPENAI_API_MODEL: str = "gpt-4o"
+        OPENAI_API_TEMPERATURE: float = 0.7
+        OPENAI_EMBED_MODEL: str = "text-embedding-ada-002"
 
-        OPENAI_EMBED_MODEL: str
+        GITHUB_BASE_URL: str = "https://api.github.com"
+        GITHUB_TOKEN: str = ""
+        GITHUB_USER_NAME: str = ""
+        GITHUB_REPO_NAME: str = ""
 
-        GITHUB_BASE_URL: str
-        GITHUB_TOKEN: str
-        GITHUB_USER_NAME: str
-        GITHUB_REPO_NAME: str
-
-        SYSTEM_PROMPT: str
+        SYSTEM_PROMPT: str = "You are a smart assistant that read from github repository, retrieves their information, analyzes them, and assists users with Q&A over extracted content."
         
     def __init__(self):
 
-        self.name = "Chat with GitHub with valves"
+        self.name = "Chat with GitHub Repository"
+        self.check = 0
 
         self.valves = self.Valves(
-            **{
-                "OPENAI_API_BASE_URL": os.getenv("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
-                "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
-                "OPENAI_API_MODEL": os.getenv("OPENAI_API_MODEL", "gpt-4o"),
-                "OPENAI_API_TEMPERATURE": os.getenv("OPENAI_API_TEMPERATURE", 0.7),
-
-                "OPENAI_EMBED_MODEL": os.getenv("OPENAI_EMBED_MODEL", "text-embedding-ada-002"),
-
-                "GITHUB_BASE_URL": os.getenv("GITHUB_BASE_URL", "https://api.github.com"),
-                "GITHUB_TOKEN": os.getenv("GITHUB_TOKEN", ""),
-                "GITHUB_USER_NAME": os.getenv("GITHUB_USER_NAME", ""),
-                "GITHUB_REPO_NAME": os.getenv("GITHUB_REPO_NAME", ""),
-                
-                "SYSTEM_PROMPT": os.getenv("SYSTEM_PROMPT", "You are a smart assistant that read from github repository, retrieves their information, analyzes them, and assists users with Q&A over extracted content."),
-            }
+            OPENAI_API_BASE_URL = os.getenv("OPENAI_API_BASE_URL", ""),
+            OPENAI_API_MODEL = os.getenv("OPENAI_API_MODEL", ""),
+            OPENAI_API_TEMPERATURE = float(os.getenv("OPENAI_API_TEMPERATURE"), ""),
+            OPENAI_EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", ""),
+            OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", ""),
+
+            GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", ""),
+            GITHUB_BASE_URL = os.getenv("GITHUB_BASE_URL", ""),
+            GITHUB_USER_NAME = os.getenv("GITHUB_USER_NAME", ""),
+            GITHUB_REPO_NAME = os.getenv("GITHUB_REPO_NAME", ""),
+
+            SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", ""),
         )
 
         self.tools = [search_repository]
-        self.pipelines = self.get_openai_models()
     
-    def on_startup(self):
+    def set_github_repo(self):
         """Loads GitHub repository data and creates an index."""
         try:
             global index, documents
@@ -248,6 +243,11 @@ def get_openai_models(self):
     def pipe(self, user_message: str, model_id: str, messages: List[dict], body: dict):
         """Processes user messages and interacts with the LLM."""
         try:
+            if self.check == 0:
+                self.pipelines = self.get_openai_models()
+                self.set_github_repo()
+                self.check = 1
+
             model = ChatOpenAI(
                 api_key=self.valves.OPENAI_API_KEY,
                 model=self.valves.OPENAI_API_MODEL,

From 82550c4910f5b87d562e54fd51dcb9962669494f Mon Sep 17 00:00:00 2001
From: Ali Farooq <ali_farooq1993@hotmail.com>
Date: Tue, 8 Apr 2025 10:48:42 +0000
Subject: [PATCH 3/3] docker-compose.yml

---
 github-pipeline/docker-compose.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 github-pipeline/docker-compose.yml

diff --git a/github-pipeline/docker-compose.yml b/github-pipeline/docker-compose.yml
new file mode 100644
index 0000000..a5986af
--- /dev/null
+++ b/github-pipeline/docker-compose.yml
@@ -0,0 +1,28 @@
+version: '3.8'
+
+services:
+  open-webui:
+    image: ghcr.io/open-webui/open-webui:main
+    container_name: open-webui
+    ports:
+      - "3000:8080"
+    volumes:
+      - open-webui:/app/backend/data
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    restart: always
+
+  pipelines:
+    image: ghcr.io/open-webui/pipelines:main
+    container_name: pipelines
+    ports:
+      - "9099:9099"
+    volumes:
+      - pipelines:/app/pipelines
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    restart: always
+
+volumes:
+  open-webui:
+  pipelines:
\ No newline at end of file