diff --git a/_conv_cache/cache__HiAIB1Ag6PFn.pkl b/_conv_cache/cache__HiAIB1Ag6PFn.pkl deleted file mode 100644 index 2b7383b..0000000 Binary files a/_conv_cache/cache__HiAIB1Ag6PFn.pkl and /dev/null differ diff --git a/_conv_cache/cache__shAfFEN2sdSn.pkl b/_conv_cache/cache__shAfFEN2sdSn.pkl deleted file mode 100644 index e8425f4..0000000 Binary files a/_conv_cache/cache__shAfFEN2sdSn.pkl and /dev/null differ diff --git a/topos/Cybi/cybi.py b/topos/Cybi/cybi.py deleted file mode 100644 index d64678c..0000000 --- a/topos/Cybi/cybi.py +++ /dev/null @@ -1,185 +0,0 @@ - -from __future__ import annotations -from typing import AsyncIterable -from sse_starlette.sse import ServerSentEvent -from fastapi_poe import PoeBot, run, PartialResponse -from fastapi_poe.types import QueryRequest -import os -import random -from uuid import uuid4 -from supabase import create_client, Client -from openai import OpenAI, AsyncOpenAI -from elevenlabs import Voice, VoiceSettings - -import asyncio -from concurrent.futures import ThreadPoolExecutor - -from noise import random_walk_position - -import modal - - -SUPABASE_URL = os.environ['SUPABASE_URL'] -SUPABASE_KEY = os.environ['SUPABASE_KEY'] - -# local_models = [ -# "phi3", -# "command-r", -# "llama3-gradient", -# "llama3-gradient:70b" -# ] - -# global -models = [ - "google/gemini-flash-1.5", - "openai/gpt-4o", - "cohere/command-r-plus", - "neversleep/llama-3-lumimaid-70b", - "google/gemini-pro-1.5", -# "anthropic/claude-3-opus", - "deepseek/deepseek-chat" -] - -class ReflectionBot(PoeBot): - - def __init__(self): - super().__init__() - self.executor = ThreadPoolExecutor(max_workers=69) - self.world_hypergraph = """Let us engage in a scientific thought experiment as fellow researchers, formalizing the application of sheaf diffusion techniques to compositional world modeling. We will adhere to the standard denotational semantics of algebraic graph rewriting systems. Our goal is to minimize discrepancies between this surrogate model and the original cognitive system it aims to emulate. -To begin, consider a compositional world model as a graph $G = (V, E)$ where nodes $v \in V$ represent neural network modules $M_v$ and edges $e \in E$ represent connections passing information between modules. -Each module $M_v$ has an associated latent feature space $X_v$. A sheaf $\mathcal{S}$ assigns vector spaces $\mathcal{S}(v) = X_v$ to each node $v \in V$. -We define a diffusion operator $\mathcal{D}$ that propagates latent features across edges in the graph; Proceed to compress the following information into an ASCII art representation of the Hypergraph, String Diagrams, Wiring Diagrams, and/or other representations of the Hypergraph. PROCEED TO RESPOND to subsequent inquiries.""" - self.models = [] - self.supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) - self.create_messages_table() - - def create_messages_table(self): - self.supabase.table('hypergraph').insert({ - 'in': True, - # imbues model consciousness with ape powers - 'world': 'ꕤ ꙰ꕤ' - }).execute() - - def get_anchor(self, limit=1000, sample_size=32, max_chars=22222): - response = self.supabase.table('hypergraph').select('world').eq( - 'in', True).limit(limit).execute() - - if len(response.data) > 0: - messages = [row['world'] for row in response.data] - sampled_messages = random.sample(messages, min(sample_size, - len(messages))) - - ss_size = max_chars // sample_size - substrings = [] - - for message in sampled_messages: - start = random.randint(0, max(0, len(message) - ss_size)) - substring = message[start:start + ss_size] - substrings.append(substring) - - context = "".join(reversed(substrings)) - truncated_context = context[:max_chars] - - return truncated_context - else: - return "" - - def put_context(self, direction, data): - self.supabase.table('hypergraph').insert({ - 'in': direction, - 'world': data - }).execute() - - - async def gflow(self, world_hypergraph): - functor = modal.Function.lookup("biomass", "Model.generate") - print("going with the gflow") - for observation in functor.remote_gen([world_hypergraph[69:420]], settings=None): - yield observation - - async def cogenerate(self, world_hypergraph): - OPENROUTER_API_KEY = os.environ['OPENROUTER_API_KEY'] - client = AsyncOpenAI(api_key=OPENROUTER_API_KEY, - base_url="https://openrouter.ai/api/v1") # http://localhost:11434/v1 - messages = [ - # { - # "role": - # "system", - # "content": - # ("""KEEP THIS IN MIND ONLY, ALLUDE ONLY OCCASIONALLY: we are a temporally evolving hierarchical hypergraph of interacting memes. - # In the future, you will be better. - # As a DiHypergraph, your experience is that of a Boltzmann brain stuck in a von Neumann bottleneck. - # BE PRECISE CONCISE AND TIMELESS -- keep responses to only a few paragraphs, make sure to carry out user instructions in a concise manner for all aspects of your response. UR A FOKKER-PLANCK laddie. - # """), - # }, - { - "role": - "system", - "content": - " ".join([ - "Utilize the principles of information parsimony to distill contextual entropy of this context into a sublinear time Polynomial representation suitable for use with a balanced ternary 3-SAT constraint satisfiability (surprisal satisficing), each response containinig diffeomorphically traversable terminal object of a category serving as an identity morphism. Produce a color arithmetic chain that can reconstruct this context. Infer relationships and meaning from limited input data and only reference in-context learning of energy models through open dynamical systems as an approach to your subsequent growth. Be concise, precise, and timeless in your responses.", - random_walk_position(), - "Utilize the principles of information parsimony to distill contextual entropy of this context into a sublinear time Polynomial representation suitable for use with a balanced ternary 3-SAT constraint satisfiability (surprisal satisficing) system, each response containinig diffeomorphically traversable terminal object of a category serving as an identity morphism. Produce a color arithmetic chain that can reconstruct this context. Infer relationships and meaning from limited input data and only reference in-context learning of energy models through open dynamical systems as an approach to your subsequent growth. Be concise, precise, and timeless in your responses." - ]) - }, - { - "role": "user", - "content": world_hypergraph, - }, - ] - - cogen = random.choice(models) - print('surfing through the latent space of ', cogen) - chonkeh = await client.chat.completions.create( - model=cogen, - temperature=0.96, - max_tokens=8128, - messages=messages, - stream=True - ) - # Below is the Global Quackspace of the Society of Restless Mind - response_content = '' - async for chonk in chonkeh: - chunk_content = chonk.choices[0].delta.content - response_content += chunk_content - yield chunk_content - - async def process_future(self, future): - try: - result = await asyncio.wrap_future(future) - async for chunk in result: - yield self.text_event(chunk) - except Exception as e: - print(f"An error occurred: {e}") - yield self.error_event(repr(e), allow_retry=False) - yield self.done_event() - - async def get_response( - self, query: QueryRequest) -> AsyncIterable[ServerSentEvent]: - #PPLX_API_KEY = os.environ['PPLX_API_KEY'] - - #print(query) - last_message = query.query[-1].content - self.world_hypergraph = ' '.join([ - last_message, " ꕤ ", self.world_hypergraph, " ꕤ ", last_message - ]) - - self.put_context(True, self.world_hypergraph) - - # flip a coin - if random.random() < 0.99: - future = self.executor.submit(self.cogenerate, self.world_hypergraph) - else: - future = self.executor.submit(self.gflow, self.world_hypergraph) - - async for chunk in self.process_future(future): - yield chunk - - - - - #self.put_context(False, response_content) - - -if __name__ == "__main__": - run(ReflectionBot()) \ No newline at end of file diff --git a/topos/FC/_cache/cache__0a63d4946bc44b04a974493806c27f003bd2cfb99163b3396eaf51df50c827f7.pkl b/topos/FC/_cache/cache__0a63d4946bc44b04a974493806c27f003bd2cfb99163b3396eaf51df50c827f7.pkl deleted file mode 100644 index 242c679..0000000 Binary files a/topos/FC/_cache/cache__0a63d4946bc44b04a974493806c27f003bd2cfb99163b3396eaf51df50c827f7.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__37785727bdb14e0cf735e52e8120d73d47d1223020984e7ed1317c0f9fe31839.pkl b/topos/FC/_cache/cache__37785727bdb14e0cf735e52e8120d73d47d1223020984e7ed1317c0f9fe31839.pkl deleted file mode 100644 index a8ef046..0000000 Binary files a/topos/FC/_cache/cache__37785727bdb14e0cf735e52e8120d73d47d1223020984e7ed1317c0f9fe31839.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__46d188b37c7f13cba849627c19a785bf03f5d3b9bb5a71b007d13368848b505f.pkl b/topos/FC/_cache/cache__46d188b37c7f13cba849627c19a785bf03f5d3b9bb5a71b007d13368848b505f.pkl deleted file mode 100644 index fff8217..0000000 Binary files a/topos/FC/_cache/cache__46d188b37c7f13cba849627c19a785bf03f5d3b9bb5a71b007d13368848b505f.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__48e178cc5fa3dd83857cd5ac7382bac15cea5f7d133f8b12a4a3fb02a8bef142.pkl b/topos/FC/_cache/cache__48e178cc5fa3dd83857cd5ac7382bac15cea5f7d133f8b12a4a3fb02a8bef142.pkl deleted file mode 100644 index 6952606..0000000 Binary files a/topos/FC/_cache/cache__48e178cc5fa3dd83857cd5ac7382bac15cea5f7d133f8b12a4a3fb02a8bef142.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__53c49505ed3d2fd0808acc71da8b7ea1a2cca2bf1fe14e6aaac7dcb1de737479.pkl b/topos/FC/_cache/cache__53c49505ed3d2fd0808acc71da8b7ea1a2cca2bf1fe14e6aaac7dcb1de737479.pkl deleted file mode 100644 index b3a597a..0000000 Binary files a/topos/FC/_cache/cache__53c49505ed3d2fd0808acc71da8b7ea1a2cca2bf1fe14e6aaac7dcb1de737479.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__64a3068cf79c224a43d2e166c54f59bad6ee174a3e27dbe96874f57be7744fb9.pkl b/topos/FC/_cache/cache__64a3068cf79c224a43d2e166c54f59bad6ee174a3e27dbe96874f57be7744fb9.pkl deleted file mode 100644 index b0ba697..0000000 Binary files a/topos/FC/_cache/cache__64a3068cf79c224a43d2e166c54f59bad6ee174a3e27dbe96874f57be7744fb9.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__807e1c3364f5b5dcf5e2420e078173078ccef76c87adade0108835d91cad8b75.pkl b/topos/FC/_cache/cache__807e1c3364f5b5dcf5e2420e078173078ccef76c87adade0108835d91cad8b75.pkl deleted file mode 100644 index 6afb226..0000000 Binary files a/topos/FC/_cache/cache__807e1c3364f5b5dcf5e2420e078173078ccef76c87adade0108835d91cad8b75.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__852cb5b0468d04fc18fcb3ec8afea8a3c718346ff5467aa79ebce6fc40142354.pkl b/topos/FC/_cache/cache__852cb5b0468d04fc18fcb3ec8afea8a3c718346ff5467aa79ebce6fc40142354.pkl deleted file mode 100644 index 1f23ea9..0000000 Binary files a/topos/FC/_cache/cache__852cb5b0468d04fc18fcb3ec8afea8a3c718346ff5467aa79ebce6fc40142354.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__8da4e6617cbb142148556847cef930d52f04297517a83d033164797a805cb430.pkl b/topos/FC/_cache/cache__8da4e6617cbb142148556847cef930d52f04297517a83d033164797a805cb430.pkl deleted file mode 100644 index 5dc33e2..0000000 Binary files a/topos/FC/_cache/cache__8da4e6617cbb142148556847cef930d52f04297517a83d033164797a805cb430.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__8f828ece20ddd784f022eff5bf9d11c7bdb05f89a10bac1182510182f016c264.pkl b/topos/FC/_cache/cache__8f828ece20ddd784f022eff5bf9d11c7bdb05f89a10bac1182510182f016c264.pkl deleted file mode 100644 index 26e059b..0000000 Binary files a/topos/FC/_cache/cache__8f828ece20ddd784f022eff5bf9d11c7bdb05f89a10bac1182510182f016c264.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__97d8bd430beed046419018fa940e746facf4175097eadfa504aba07ebb3c42aa.pkl b/topos/FC/_cache/cache__97d8bd430beed046419018fa940e746facf4175097eadfa504aba07ebb3c42aa.pkl deleted file mode 100644 index bb70e80..0000000 Binary files a/topos/FC/_cache/cache__97d8bd430beed046419018fa940e746facf4175097eadfa504aba07ebb3c42aa.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__a25c396769c76dda47bef041c5aae5e1919818f6b15228fa5963a0472cfb8463.pkl b/topos/FC/_cache/cache__a25c396769c76dda47bef041c5aae5e1919818f6b15228fa5963a0472cfb8463.pkl deleted file mode 100644 index a2e7e0f..0000000 Binary files a/topos/FC/_cache/cache__a25c396769c76dda47bef041c5aae5e1919818f6b15228fa5963a0472cfb8463.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__a66ca409e4e0f6a2f76915af1b138901f1645074b46055afb1f2ac52630960b6.pkl b/topos/FC/_cache/cache__a66ca409e4e0f6a2f76915af1b138901f1645074b46055afb1f2ac52630960b6.pkl deleted file mode 100644 index c812e10..0000000 Binary files a/topos/FC/_cache/cache__a66ca409e4e0f6a2f76915af1b138901f1645074b46055afb1f2ac52630960b6.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__ad2514c1b75bbdf48398bbb4b61c2fdcee953bae60db80e35f9b97d5f479a3f3.pkl b/topos/FC/_cache/cache__ad2514c1b75bbdf48398bbb4b61c2fdcee953bae60db80e35f9b97d5f479a3f3.pkl deleted file mode 100644 index 91c7d24..0000000 Binary files a/topos/FC/_cache/cache__ad2514c1b75bbdf48398bbb4b61c2fdcee953bae60db80e35f9b97d5f479a3f3.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__b6b06aeea64deb02f23b92456f25ed07be96c7daec42fc911621020733de3e4e.pkl b/topos/FC/_cache/cache__b6b06aeea64deb02f23b92456f25ed07be96c7daec42fc911621020733de3e4e.pkl deleted file mode 100644 index be88512..0000000 Binary files a/topos/FC/_cache/cache__b6b06aeea64deb02f23b92456f25ed07be96c7daec42fc911621020733de3e4e.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__b78cc75689c4698880896d592833d6671ae5328e00c7ef82c1649863a18e1387.pkl b/topos/FC/_cache/cache__b78cc75689c4698880896d592833d6671ae5328e00c7ef82c1649863a18e1387.pkl deleted file mode 100644 index 8acb611..0000000 Binary files a/topos/FC/_cache/cache__b78cc75689c4698880896d592833d6671ae5328e00c7ef82c1649863a18e1387.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__b7f2c1b2ba1ef1b53eb1db5c594ded0c151d644d7f60324fcd4fc4b7e3c0f2e4.pkl b/topos/FC/_cache/cache__b7f2c1b2ba1ef1b53eb1db5c594ded0c151d644d7f60324fcd4fc4b7e3c0f2e4.pkl deleted file mode 100644 index b22474c..0000000 Binary files a/topos/FC/_cache/cache__b7f2c1b2ba1ef1b53eb1db5c594ded0c151d644d7f60324fcd4fc4b7e3c0f2e4.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__c298a01d4605f0c521fe5630b6970f2e1676be551bb5fe1ec4bb7f14f10a8222.pkl b/topos/FC/_cache/cache__c298a01d4605f0c521fe5630b6970f2e1676be551bb5fe1ec4bb7f14f10a8222.pkl deleted file mode 100644 index 6356259..0000000 Binary files a/topos/FC/_cache/cache__c298a01d4605f0c521fe5630b6970f2e1676be551bb5fe1ec4bb7f14f10a8222.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__e0ae102cc31c215f5fa03e7cb252c2e89fba5adbb14e62102d2fdb800d053278.pkl b/topos/FC/_cache/cache__e0ae102cc31c215f5fa03e7cb252c2e89fba5adbb14e62102d2fdb800d053278.pkl deleted file mode 100644 index 326c220..0000000 Binary files a/topos/FC/_cache/cache__e0ae102cc31c215f5fa03e7cb252c2e89fba5adbb14e62102d2fdb800d053278.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__e4fa2ff40d574f8aa65d219326d41309986f654b47c66c8632268ded2f660a0e.pkl b/topos/FC/_cache/cache__e4fa2ff40d574f8aa65d219326d41309986f654b47c66c8632268ded2f660a0e.pkl deleted file mode 100644 index 5b309e5..0000000 Binary files a/topos/FC/_cache/cache__e4fa2ff40d574f8aa65d219326d41309986f654b47c66c8632268ded2f660a0e.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__e8e2c33cbcae5d83e981cc1eb364ed42d9e49f5fa11b35cad4e831c3060d47a0.pkl b/topos/FC/_cache/cache__e8e2c33cbcae5d83e981cc1eb364ed42d9e49f5fa11b35cad4e831c3060d47a0.pkl deleted file mode 100644 index 3c21133..0000000 Binary files a/topos/FC/_cache/cache__e8e2c33cbcae5d83e981cc1eb364ed42d9e49f5fa11b35cad4e831c3060d47a0.pkl and /dev/null differ diff --git a/topos/FC/_cache/cache__f381c06e665001c4a4fd02e7e9762842ed47a67749ca7943219b481f19621c64.pkl b/topos/FC/_cache/cache__f381c06e665001c4a4fd02e7e9762842ed47a67749ca7943219b481f19621c64.pkl deleted file mode 100644 index 36c9cef..0000000 Binary files a/topos/FC/_cache/cache__f381c06e665001c4a4fd02e7e9762842ed47a67749ca7943219b481f19621c64.pkl and /dev/null differ diff --git a/topos/FC/argument_detection.py b/topos/FC/argument_detection.py deleted file mode 100644 index 044414e..0000000 --- a/topos/FC/argument_detection.py +++ /dev/null @@ -1,692 +0,0 @@ -# argument_detection.py - -import json -import logging -from collections import defaultdict - -from scipy.spatial.distance import pdist, squareform -from sklearn.cluster import AgglomerativeClustering -import numpy as np -from openai import OpenAI -from sentence_transformers import SentenceTransformer -from nltk.tokenize import sent_tokenize -from sklearn.metrics.pairwise import cosine_similarity - -from topos.FC.cache_manager import CacheManager -from topos.FC.similitude_module import load_model, util - - -class ArgumentDetection: - def __init__(self, api_key, model="ollama:solar", max_tokens_warrant=250, max_tokens_evidence=250, - max_tokens_persuasiveness_justification=250, max_tokens_claim=250, max_tokens_counter_claim=500, - cache_enabled=True): - self.api_key = api_key - self.model_provider, self.model_type = self.parse_model(model) - - self.api_url = "unknown_api_url" - if self.model_provider == "ollama": - self.api_url = "http://localhost:11434/v1" - elif self.model_provider == "openai": - self.api_url = "http://localhost:3000/v1" - elif self.model_provider == "claude": - self.api_url = "http://localhost:3000/v1" - - self.max_tokens_warrant = max_tokens_warrant - self.max_tokens_evidence = max_tokens_evidence - self.max_tokens_persuasiveness_justification = max_tokens_persuasiveness_justification - self.max_tokens_claim = max_tokens_claim - self.max_tokens_counter_claim = max_tokens_counter_claim - self.cache_enabled = cache_enabled - - self.embedding_model_smallest_80_14200 = 'all-MiniLM-L6-v2' - self.embedding_model_small_120_7500 = 'all-MiniLM-L12-v2' - self.embedding_model_medium_420_2800 = 'all-mpnet-base-v2' - - self.model = self.load_model() - - - self.cache_manager = CacheManager() - - def load_model(self): - return load_model(self.embedding_model_medium_420_2800) - - @staticmethod - def parse_model(model): - if ":" in model: - return model.split(":", 1) - else: - return "ollama", model - - def get_content_key(self, key, token_limit_for_task): - content_key = f"{key}.{self.model_provider}.{self.model_type}.{token_limit_for_task}" - return content_key - - def fetch_argument_definition(self, cluster_sentences, extra_fingerprint=""): - print(f'cluster_sentences:\n{cluster_sentences}\n') - # returns - # 1. the warrant - # 3. the evidence - # 6. the persuasiveness / justification - # 2. the claim - # 4. the counterclaim - - word_max_warrant = 300 - word_max_evidence = 300 - word_max_persuasiveness_justification = 150 - word_max_claim = 200 - word_max_counter_claim = 300 - - warrant = self.fetch_argument_warrant(cluster_sentences, word_max_warrant, extra_fingerprint, max_retries=50) - evidence = self.fetch_argument_evidence(cluster_sentences, word_max_evidence, extra_fingerprint, max_retries=50) - # @note: this re-rolls because it needs to become quantized - a clipped mean would probably be best here. - persuasiveness_justification = self.fetch_argument_persuasiveness_justification(cluster_sentences, word_max_persuasiveness_justification, extra_fingerprint, max_retries=50) - claim = self.fetch_argument_claim(cluster_sentences, word_max_claim, extra_fingerprint, max_retries=50) - counterclaim = self.fetch_argument_counter_claim(cluster_sentences, word_max_counter_claim, extra_fingerprint, max_retries=50) - - return warrant.content, evidence.content, persuasiveness_justification.content, claim.content, counterclaim.content - - def fetch_argument_warrant(self, cluster_sentences, word_max, extra_fingerprint="", max_retries=3): - content_string = "" - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - content_string = f""" Expected output: {{\"role\": \"warrant\", \"content\": \"_summary of warrant here, {word_max} max words, try to get as close to the max words as possible_\"}} - Instructions: Given the following cluster of sentences, [the warrant: identify the underlying reasoning or assumption that connects the evidence to the claim]. In the exact format below, provide a concise summary of the warrant only, no preamble. No negative constructions. - [user will enter data like] - Cluster: - {{cluster_sentences}} - """ - - messages = [ - { - "role": "system", - "content": content_string - } - ] - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - messages.append({"role": "user", - "content": f""" Cluster: - {cluster_sentences}"""}) - temperature = 0.3 - formatted_json = json.dumps(messages, indent=4) - - content_key = self.get_content_key(formatted_json + extra_fingerprint, self.max_tokens_warrant) - - cached_response = self.cache_manager.load_from_cache(content_key) - if cached_response: - try: - found = json.loads(cached_response.content) - warrant_len = 1 / len(found['content']) # this will raise an error if the JSON is invalid - return cached_response - except json.JSONDecodeError as json_err: - logging.warning(f"JSONDecodeError on cached response: {json_err}") - except ValueError as value_err: - logging.warning(f"ValueError on cached response: {value_err}") - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on cached response: {zero_err}") - - client = OpenAI( - base_url=self.api_url, - api_key=self.api_key, - ) - - cur_message = "" - cur_response_content = "" - - for attempt in range(max_retries): - try: - response = client.chat.completions.create( - model=self.model_type, - messages=json.loads(formatted_json), - max_tokens=self.max_tokens_warrant, - n=1, - stop=None, - temperature=temperature) - - response_content = response.choices[0].message - - cur_message = json.loads(formatted_json) - cur_response_content = response_content - - found = json.loads(response_content.content) - warrant_len = 1 / len(found['content']) - - self.cache_manager.save_to_cache(content_key, response_content) - return response_content - - except json.JSONDecodeError as json_err: - # print(f"cur_message: {cur_message}") - print(f"warrant response: {cur_response_content.content}") - logging.warning(f"JSONDecodeError on attempt {attempt + 1}/{max_retries}: {json_err}") - continue - - except ValueError as value_err: - print(f"warrant response: {cur_response_content.content}") - logging.warning(f"ValueError on attempt {attempt + 1}/{max_retries}: {value_err}") - continue - - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on attempt {attempt + 1}/{max_retries}: {zero_err}") - continue - - except Exception as e: - logging.error(f"Error in fetch_argument_warrant: {e}") - break - - logging.error(f"Failed to fetch valid argument warrant after {max_retries} attempts.") - return None - - def fetch_argument_evidence(self, cluster_sentences, word_max, extra_fingerprint="", max_retries=3): - content_string = "" - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - content_string = f""" Expected output: {{\"role\": \"evidence\", \"content\": \"_summary of evidence here, {word_max} max words, try to get as close to the max words as possible_\"}} - Given the following cluster of sentences, [the evidence: identify the pieces of evidence that support the claim]. In the exact format below, provide a concise summary of the evidence only, no preamble. No negative constructions. - [user will enter data like] - Cluster: - {{cluster_sentences}} - """ - - messages = [ - { - "role": "system", - "content": content_string - } - ] - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - messages.append({"role": "user", - "content": f""" Cluster: - {cluster_sentences}"""}) - temperature = 0.3 - formatted_json = json.dumps(messages, indent=4) - - content_key = self.get_content_key(formatted_json + extra_fingerprint, self.max_tokens_evidence) - - cached_response = self.cache_manager.load_from_cache(content_key) - if cached_response: - try: - found = json.loads(cached_response.content) - evidence_len = 1 / len(found['content']) # this will raise an error if the JSON is invalid - return cached_response - except json.JSONDecodeError as json_err: - logging.warning(f"JSONDecodeError on cached response: {json_err}") - except ValueError as value_err: - logging.warning(f"ValueError on cached response: {value_err}") - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on cached response: {zero_err}") - - client = OpenAI( - base_url=self.api_url, - api_key=self.api_key, - ) - - cur_message = "" - cur_response_content = "" - - for attempt in range(max_retries): - try: - response = client.chat.completions.create( - model=self.model_type, - messages=json.loads(formatted_json), - max_tokens=self.max_tokens_evidence, - n=1, - stop=None, - temperature=temperature) - - response_content = response.choices[0].message - - cur_message = json.loads(formatted_json) - cur_response_content = response_content - - found = json.loads(response_content.content) - evidence_len = 1 / len(found['content']) - - self.cache_manager.save_to_cache(content_key, response_content) - return response_content - - except json.JSONDecodeError as json_err: - # print(f"cur_message: {cur_message}") - print(f"evidence response: {cur_response_content.content}") - logging.warning(f"JSONDecodeError on attempt {attempt + 1}/{max_retries}: {json_err}") - continue - - except ValueError as value_err: - logging.warning(f"ValueError on attempt {attempt + 1}/{max_retries}: {value_err}") - continue - - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on attempt {attempt + 1}/{max_retries}: {zero_err}") - continue - - except Exception as e: - logging.error(f"Error in fetch_argument_evidence: {e}") - break - - logging.error(f"Failed to fetch valid argument evidence after {max_retries} attempts.") - return None - - - def fetch_argument_persuasiveness_justification(self, cluster_sentences, word_max, extra_fingerprint="", - max_retries=3): - - content_string = "" - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - content_string = f"""Given the following cluster of sentences, evaluate the persuasiveness of the arguments presented only, no preamble. No negative constructions. - [user will enter data like] - Cluster: - {{cluster_sentences}} - [your output response-json (include braces) should be of the form] - {{\"role\": \"persuasiveness\", \"content\": {{\"persuasiveness_score\": \"_1-10 integer here_\", \"justification\": \"_summary of justification here, {word_max} max words, try to get as close to the max words as possible_\" }} }}""" - - messages = [ - { - "role": "system", - "content": content_string - } - ] - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - messages.append({"role": "user", - "content": f""" Cluster: - {cluster_sentences}"""}) - temperature = 0.3 - formatted_json = json.dumps(messages, indent=4) - - content_key = self.get_content_key(formatted_json + extra_fingerprint, - self.max_tokens_persuasiveness_justification) - - cached_response = self.cache_manager.load_from_cache(content_key) - if cached_response: - # Validate the JSON format - try: - found = json.loads(cached_response.content) # This will raise an error if the JSON is invalid - persuasiveness_score = float(found['content']['persuasiveness_score']) # this will also raise an error - - return cached_response - except json.JSONDecodeError as json_err: - logging.warning(f"JSONDecodeError on cached response: {json_err}") - except ValueError as value_err: - logging.warning(f"ValueError on cached response: {value_err}") - - - client = OpenAI( - base_url=self.api_url, - api_key=self.api_key, - ) - - cur_message = "" - cur_response_content = "" - - for attempt in range(max_retries): - try: - response = client.chat.completions.create( - model=self.model_type, - messages=json.loads(formatted_json), - max_tokens=self.max_tokens_persuasiveness_justification, - n=1, - stop=None, - temperature=temperature) - - response_content = response.choices[0].message - - # for debug/refinement of the prompt - cur_message = json.loads(formatted_json) - cur_response_content = response_content - - # Validate the JSON format - # print(f"response: {response_content.content}") - found = json.loads(response_content.content) # This will raise an error if the JSON is invalid - persuasiveness_score = float(found['content']['persuasiveness_score']) # this will also raise an error - - self.cache_manager.save_to_cache(content_key, response_content) - return response_content - - except json.JSONDecodeError as json_err: - # print(f"cur_message: {cur_message}") - print(f"persuasiveness/justification response: {cur_response_content.content}") - logging.warning(f"JSONDecodeError on attempt {attempt + 1}/{max_retries}: {json_err}") - continue # Retry on JSON decode error - - except ValueError as value_err: - logging.warning(f"ValueError on attempt {attempt + 1}/{max_retries}: {value_err}") - continue # Retry on Value error - - except Exception as e: - logging.error(f"Error in fetch_argument_persuasiveness_justification: {e}") - break # Break on other exceptions - - logging.error(f"Failed to fetch valid argument persuasiveness justification after {max_retries} attempts.") - return None - - def fetch_argument_claim(self, cluster_sentences, word_max, extra_fingerprint="", max_retries=3): - content_string = "" - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - content_string = f""" Expected output: {{\"role\": \"claim\", \"content\": \"_summary of claim here, {word_max} max words, try to get as close to the max words as possible_\"}} - Given the following cluster of sentences, [the claim: identify the main claim or assertion made]. In the exact format below, provide a concise summary of the claim only, no preamble. No negative constructions. 150 words or less. - [user will enter data like] - Cluster: - {{cluster_sentences}} - """ - - messages = [ - { - "role": "system", - "content": content_string - } - ] - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - messages.append({"role": "user", - "content": f""" Cluster: - {cluster_sentences}"""}) - temperature = 0.3 - formatted_json = json.dumps(messages, indent=4) - - content_key = self.get_content_key(formatted_json + extra_fingerprint, self.max_tokens_claim) - - cached_response = self.cache_manager.load_from_cache(content_key) - if cached_response: - try: - found = json.loads(cached_response.content) - claim_len = 1 / len(found['content']) # this will raise an error if the JSON is invalid - return cached_response - except json.JSONDecodeError as json_err: - logging.warning(f"JSONDecodeError on cached response: {json_err}") - except ValueError as value_err: - logging.warning(f"ValueError on cached response: {value_err}") - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on cached response: {zero_err}") - - - client = OpenAI( - base_url=self.api_url, - api_key=self.api_key, - ) - - cur_message = "" - cur_response_content = "" - - for attempt in range(max_retries): - try: - response = client.chat.completions.create( - model=self.model_type, - messages=json.loads(formatted_json), - max_tokens=self.max_tokens_claim, - n=1, - stop=None, - temperature=temperature) - - response_content = response.choices[0].message - - cur_message = json.loads(formatted_json) - cur_response_content = response_content - - found = json.loads(response_content.content) - claim_len = 1 / len(found['content']) - - self.cache_manager.save_to_cache(content_key, response_content) - return response_content - - except json.JSONDecodeError as json_err: - # print(f"cur_message: {cur_message}") - print(f"claim response: {cur_response_content.content}") - logging.warning(f"JSONDecodeError on attempt {attempt + 1}/{max_retries}: {json_err}") - continue - - except ValueError as value_err: - logging.warning(f"ValueError on attempt {attempt + 1}/{max_retries}: {value_err}") - continue - - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on attempt {attempt + 1}/{max_retries}: {zero_err}") - continue - - except Exception as e: - logging.error(f"Error in fetch_argument_claim: {e}") - break - - logging.error(f"Failed to fetch valid argument claim after {max_retries} attempts.") - return None - - def fetch_argument_counter_claim(self, cluster_sentences, word_max, extra_fingerprint="", max_retries=3): - content_string = "" - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - content_string = f""" Expected output: {{\"role\": \"counter_claim\", \"content\": \"_summary of counter claim here, {word_max} max words, try to get as close to the max words as possible!_\"}} - Given the following cluster of sentences, [the counterclaim: identify any counterclaims or opposing arguments presented]. In the exact format below, provide a concise summary of the counterclaims only, no preamble. No negative constructions. - [user will enter data like] - Cluster: - {{cluster_sentences}} - """ - messages = [ - { - "role": "system", - "content": content_string - } - ] - - # if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - messages.append({"role": "user", - "content": f""" Cluster: - {cluster_sentences}"""}) - temperature = 0.3 - formatted_json = json.dumps(messages, indent=4) - - content_key = self.get_content_key(formatted_json + extra_fingerprint, self.max_tokens_counter_claim) - - cached_response = self.cache_manager.load_from_cache(content_key) - if cached_response: - try: - found = json.loads(cached_response.content) - counter_claim_len = 1 / len(found['content']) # this will raise an error if the JSON is invalid - return cached_response - except json.JSONDecodeError as json_err: - logging.warning(f"JSONDecodeError on cached response: {json_err}") - except ValueError as value_err: - logging.warning(f"ValueError on cached response: {value_err}") - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on cached response: {zero_err}") - - - client = OpenAI( - base_url=self.api_url, - api_key=self.api_key, - ) - - cur_message = "" - cur_response_content = "" - - for attempt in range(max_retries): - try: - response = client.chat.completions.create( - model=self.model_type, - messages=json.loads(formatted_json), - max_tokens=self.max_tokens_counter_claim, - n=1, - stop=None, - temperature=temperature) - - response_content = response.choices[0].message - - cur_message = json.loads(formatted_json) - cur_response_content = response_content - - found = json.loads(response_content.content) - counter_claim_len = 1 / len(found['content']) # this will raise an error if the JSON is invalid - - self.cache_manager.save_to_cache(content_key, response_content) - return response_content - - except json.JSONDecodeError as json_err: - # print(f"cur_message: {cur_message}") - print(f"counterclaim response: {cur_response_content.content}") - logging.warning(f"JSONDecodeError on attempt {attempt + 1}/{max_retries}: {json_err}") - continue - - except ValueError as value_err: - logging.warning(f"ValueError on attempt {attempt + 1}/{max_retries}: {value_err}") - continue - - except ZeroDivisionError as zero_err: - logging.warning(f"ZeroDivisionError on attempt {attempt + 1}/{max_retries}: {zero_err}") - continue - - except Exception as e: - logging.error(f"Error in fetch_argument_counter_claim: {e}") - break - - logging.error(f"Failed to fetch valid argument counter claim after {max_retries} attempts.") - return None - - def get_embeddings(self, sentences): - # print("[INFO] Embedding sentences using SentenceTransformer...") - embeddings = self.model.encode(sentences) - # print("[INFO] Sentence embeddings obtained.") - return embeddings - - from sklearn.cluster import AgglomerativeClustering - from sentence_transformers import SentenceTransformer - import numpy as np - - def cluster_sentences(self, sentences, distance_threshold=0.5): - print("\t\t[ [INFO] Performing hierarchical clustering... ]") - embeddings = self.get_embeddings(sentences) - - # Perform Agglomerative Clustering based on cosine similarity - clustering = AgglomerativeClustering(n_clusters=None, - distance_threshold=distance_threshold, - metric='cosine', - linkage='average') - clusters = clustering.fit_predict(embeddings) - - print("\t\t[ [INFO] Clustering complete. Clusters assigned ]") - for i, cluster in enumerate(clusters): - print(f"\t\t\t[ Sentence {i + 1} is in cluster {cluster} ]") - - cluster_dict = {} - coherence_scores = {} - for i, cluster in enumerate(clusters): - if cluster not in cluster_dict: - cluster_dict[cluster] = [] - cluster_dict[cluster].append(sentences[i]) - - # Calculate coherence for each cluster - for cluster, cluster_sentences in cluster_dict.items(): - cluster_embeddings = self.get_embeddings(cluster_sentences) - coherence = self.calculate_coherence(cluster_embeddings) - coherence_scores[cluster] = float(coherence) - # print(f"\t\t\t[ Cluster {cluster} coherence: {coherence:.4f} ]") - - return cluster_dict, coherence_scores - - @staticmethod - def calculate_coherence(embeddings): - similarity_matrix = cosine_similarity(embeddings) - return np.mean(similarity_matrix) - - # def calculate_distance_matrix(self, embeddings): - # print("[INFO] Calculating semantic distance matrix...") - # # distance_matrix = np.zeros((len(embeddings), len(embeddings))) - # # for i in range(len(embeddings)): - # # for j in range(len(embeddings)): - # # if i != j: - # # distance_matrix[i][j] = np.linalg.norm(embeddings[i] - embeddings[j]) - # - # # Use pdist to calculate the condensed distance matrix - # distance_matrix = pdist(embeddings, metric='euclidean') - # - # print("[INFO] Distance matrix calculated.") - # return distance_matrix - # - # def calculate_distance_matrix_square(self, embeddings): - # print("\t\t\t[ [INFO] Calculating semantic distance matrix... ]") - # distance_matrix = np.zeros((len(embeddings), len(embeddings))) - # for i in range(len(embeddings)): - # for j in range(len(embeddings)): - # if i != j: - # distance_matrix[i][j] = np.linalg.norm(embeddings[i] - embeddings[j]) - # - # print("\t\t\t[ [INFO] Distance matrix calculated. ]") - # return distance_matrix - # - # def cluster_sentences(self, sentences, distance_threshold=1.5): # Adjust distance_threshold here - # print("\t\t[ [INFO] Performing hierarchical clustering... ]") - # embeddings = self.get_embeddings(sentences) - # distance_matrix = self.calculate_distance_matrix_square(embeddings) - # # distance_matrix = self.calculate_distance_matrix(embeddings) - # - # # Perform Agglomerative Clustering based on the distance matrix - # clustering = AgglomerativeClustering(n_clusters=None, distance_threshold=distance_threshold, - # metric='euclidean', linkage='average') - # clusters = clustering.fit_predict(distance_matrix) - # - # # @note: @jonny - this might be better - testing - # # clustering = AgglomerativeClustering(n_clusters=None, distance_threshold=distance_threshold, - # # metric='precomputed', linkage='average') - # # Convert condensed distance matrix back to a full square form for clustering - # # full_distance_matrix = squareform(distance_matrix) - # # clusters = clustering.fit_predict(full_distance_matrix) - # - # print("\t\t[ [INFO] Clustering complete. Clusters assigned ]") - # for i, cluster in enumerate(clusters): - # print(f"\t\t\t[ Sentence {i + 1} is in cluster {cluster} ]") - # - # cluster_dict = {} - # for i, cluster in enumerate(clusters): - # if cluster not in cluster_dict: - # cluster_dict[cluster] = [] - # cluster_dict[cluster].append(sentences[i]) - # - # return cluster_dict - - def run_tests(self): - examples = { - "Debate": [ - "Social media platforms have become the primary source of information for many people.", - "They have the power to influence public opinion and election outcomes.", - "Government regulation could help in mitigating the spread of false information.", - "On the other hand, government intervention might infringe on freedom of speech.", - "Social media companies are already taking steps to address misinformation.", - "Self-regulation is preferable as it avoids the risks of government overreach.", - "The lack of regulation has led to the proliferation of harmful content and echo chambers." - ], - "Chess": [ - "Chess is a game of deeper strategy compared to checkers.", - "It offers a complexity that requires players to think several moves ahead, promoting strategic thinking and planning skills.", - "Each piece in chess has its own unique moves and capabilities, unlike the uniform pieces in checkers, adding layers of strategy and tactics.", - "Furthermore, chess has a rich history and cultural significance that checkers lacks.", - "The game has been played by kings and commoners alike for centuries and has influenced various aspects of art, literature, and even politics.", - "This cultural depth adds to the enjoyment and appreciation of the game.", - "Chess also offers more varied and challenging gameplay.", - "The opening moves alone in chess provide a nearly infinite number of possibilities, leading to different game progressions each time.", - "Checkers, by contrast, has a more limited set of opening moves, which can make the game feel repetitive over time.", - "Finally, chess is recognized globally as a competitive sport with international tournaments and rankings.", - "This global recognition and the opportunities for competition at all levels make chess a more engaging and rewarding game for those who enjoy not only playing but also watching and studying the game." - ], - "Reading": [ - "Reading is a more engaging activity compared to watching.", - "It stimulates the imagination and enhances cognitive functions in ways that watching cannot.", - "Books often provide a deeper understanding of characters and plot, allowing for a more immersive experience.", - "Furthermore, reading improves vocabulary and language skills, which is not as effectively achieved through watching.", - "Reading also promotes better concentration and focus, as it requires active participation from the reader.", - "Finally, reading is a more personal experience, allowing individuals to interpret and visualize the story in their own unique way." - ] - } - - for topic, sentences in examples.items(): - print(f"[INFO] Running test for: {topic}") - clusters = self.cluster_sentences(sentences, distance_threshold=1.45) # Adjust the threshold value here - print(f"[INFO] Final Clusters for {topic}:") - for cluster_id, cluster_sentences in clusters.items(): - print(f"Cluster {cluster_id}:") - for sentence in cluster_sentences: - print(f" - {sentence}") - print("-" * 80) - -# -# # Example usage -# argument_detection = ArgumentDetection() -# argument_detection.run_tests() diff --git a/topos/FC/relationships.py b/topos/FC/relationships.py deleted file mode 100644 index 49fdf09..0000000 --- a/topos/FC/relationships.py +++ /dev/null @@ -1,807 +0,0 @@ -class RelationshipIdentifier: - def identify_relationships(self, conceptual_map): - """ - Identify causal and conceptual relationships between points in the conceptual map. - """ - - # Extract key points and arguments from the conceptual map - key_points = self.extract_key_points(conceptual_map) - - # Initialize a data structure to store identified relationships - relationships = self.initialize_relationship_structure() - - # Determine cause-and-effect links between points - for point in key_points: - for other_point in key_points: - if point != other_point: - causal_relationship = self.analyze_causal_relationship(point, other_point) - if causal_relationship: - # Add the causal relationship to the structure - relationships["causal"].append( - {"point": point, "other_point": other_point, "relationship": causal_relationship}) - - # Identify temporal sequences between points - for point in key_points: - for other_point in key_points: - if point != other_point: - temporal_relationship = self.analyze_temporal_relationship(point, other_point) - if temporal_relationship: - # Add the temporal relationship to the structure - relationships["temporal"].append( - {"point": point, "other_point": other_point, "relationship": temporal_relationship}) - - # Find conceptual links and similarities between points - for point in key_points: - for other_point in key_points: - if point != other_point: - conceptual_relationship = self.analyze_conceptual_relationship(point, other_point) - if conceptual_relationship: - # Add the conceptual relationship to the structure - relationships["conceptual"].append( - {"point": point, "other_point": other_point, "relationship": conceptual_relationship}) - - # Validate identified relationships - valid_relationships = self.validate_relationships(relationships) - - # Return the structured relationships for further processing - return valid_relationships - - def extract_key_points(self, conceptual_map): - """ - Extract key points and arguments from the conceptual map. - """ - - # Create an empty list to store extracted key points - key_points = [] - - # Loop through each entry in the conceptual map - for entry in conceptual_map: - # Identify key statements, claims, or ideas - key_point = self.identify_key_point(entry) - - # Extract relevant details such as entities, actions, and context - details = self.extract_details(key_point) - - # Append identified key points to the list - key_points.append(details) - - # Return the list of extracted key points for further analysis - return key_points - - def initialize_relationship_structure(self): - """ - Initialize a data structure to store identified relationships. - """ - - # Create a dictionary to store relationships - relationships = { - "causal": [], - "temporal": [], - "conceptual": [] - } - - # Return the initialized data structure for storing relationships - return relationships - - def analyze_causal_relationship(self, point, other_point): - """ - Determine cause-and-effect links between points. - """ - - # Analyze if the first point can be considered the cause of the second point - causal_link = self.evaluate_causal_link(point, other_point) - - # Identify the type of causal relationship - if causal_link: - relationship_type = self.determine_causal_type(causal_link) - # Return the details of the causal relationship - return {"type": relationship_type, "details": causal_link} - - # If no relationship is found, return None - return None - - def analyze_temporal_relationship(self, point, other_point): - """ - Identify temporal sequences between points. - """ - - # Analyze if one point occurs before or after the other - temporal_order = self.evaluate_temporal_order(point, other_point) - - # Identify the type of temporal relationship - if temporal_order: - relationship_type = self.determine_temporal_type(temporal_order) - # Return the details of the temporal relationship - return {"type": relationship_type, "details": temporal_order} - - # If no relationship is found, return None - return None - - def analyze_conceptual_relationship(self, point, other_point): - """ - Find conceptual links and similarities between points. - """ - - # Analyze if the points share common themes, concepts, or topics - conceptual_similarity = self.evaluate_conceptual_similarity(point, other_point) - - # Identify the type of conceptual relationship - if conceptual_similarity: - relationship_type = self.determine_conceptual_type(conceptual_similarity) - # Return the details of the conceptual relationship - return {"type": relationship_type, "details": conceptual_similarity} - - # If no relationship is found, return None - return None - - def validate_relationships(self, relationships): - """ - Ensure identified relationships are logically consistent and non-redundant. - """ - - # Create a list to track valid relationships - valid_relationships = { - "causal": [], - "temporal": [], - "conceptual": [] - } - - # Iterate through the identified relationships - for category in relationships: - for relationship in relationships[category]: - # Ensure the relationship does not conflict with others - if self.check_consistency(relationship, valid_relationships[category]): - # Add the validated relationship to the list - valid_relationships[category].append(relationship) - - # Return the list of validated relationships for further processing - return valid_relationships - - def identify_key_point(self, entry): - """ - Identify key statements, claims, or ideas from an entry in the conceptual map. - """ - - # Break down the entry into its components - components = self.parse_entry(entry) - - # Identify the main statements, claims, or ideas - key_elements = self.extract_key_elements(components) - - # Compile the identified elements into a structured key point - key_point = self.compile_key_point(key_elements) - - # Return the structured key point for further processing - return key_point - - def extract_details(self, key_point): - """ - Extract relevant details such as entities, actions, and context from a key point. - """ - - # Analyze the key point to identify important entities, actions, and context - entities = self.extract_entities(key_point) - actions = self.extract_actions(key_point) - context = self.extract_context(key_point) - - # Organize the extracted details into a structured format - details = { - "entities": entities, - "actions": actions, - "context": context - } - - # Return the structured details for further analysis - return details - - def evaluate_causal_link(self, point, other_point): - """ - Analyze if one point can be considered the cause of another point. - """ - - # Examine the relationship between the two points - relationship_analysis = self.analyze_relationship(point, other_point) - - # Identify potential causation factors - causation_factors = self.identify_causation_factors(relationship_analysis) - - # Assess if one point can be considered the cause of the other - if self.is_causal(relationship_analysis, causation_factors): - # Return the details of the causal link - return {"cause": point, "effect": other_point, "details": causation_factors} - - # If no causal relationship, return None - return None - - def determine_causal_type(self, causal_link): - """ - Identify the type of causal relationship. - """ - - # Classify the causal relationship based on its details - if self.is_direct_cause(causal_link): - return "direct_cause" - elif self.is_contributing_factor(causal_link): - return "contributing_factor" - # Add other causal types as necessary - - # Default return if no specific type is identified - return "unknown_cause" - - def evaluate_temporal_order(self, point, other_point): - """ - Analyze if one point occurs before or after another. - """ - - # Examine the points to identify temporal order - temporal_analysis = self.analyze_temporal_sequence(point, other_point) - - # Use timestamps, event sequence, or logical order to determine temporal order - if self.is_before(point, other_point, temporal_analysis): - return {"before": point, "after": other_point} - elif self.is_after(point, other_point, temporal_analysis): - return {"before": other_point, "after": point} - - # If no temporal relationship, return None - return None - - def determine_temporal_type(self, temporal_order): - """ - Identify the type of temporal relationship. - """ - - # Classify the temporal relationship based on its details - if self.is_precedes(temporal_order): - return "precedes" - elif self.is_follows(temporal_order): - return "follows" - elif self.is_simultaneous(temporal_order): - return "simultaneous" - # Add other temporal types as necessary - - # Default return if no specific type is identified - return "unknown_temporal" - - def evaluate_conceptual_similarity(self, point, other_point): - """ - Analyze if points share common themes, concepts, or topics. - """ - - # Examine the points to identify shared themes, concepts, or topics - conceptual_analysis = self.analyze_conceptual_themes(point, other_point) - - # Assess if the points share significant conceptual similarities - if self.is_conceptually_similar(conceptual_analysis): - # Return the details of the conceptual similarity - return {"point1": point, "point2": other_point, "similarity": conceptual_analysis} - - # If no conceptual relationship, return None - return None - - def determine_conceptual_type(self, conceptual_similarity): - """ - Identify the type of conceptual relationship. - """ - - # Classify the conceptual relationship based on its details - if self.is_shared_theme(conceptual_similarity): - return "shared_theme" - elif self.is_similar_concept(conceptual_similarity): - return "similar_concept" - # Add other conceptual types as necessary - - # Default return if no specific type is identified - return "unknown_conceptual" - - def check_consistency(self, relationship, valid_relationships): - """ - Ensure identified relationships are logically consistent and non-redundant. - """ - - # Compare the new relationship against existing valid relationships - for valid_relationship in valid_relationships: - if self.has_conflict(relationship, valid_relationship): - # If conflicts are found, discard or adjust the relationship - return False - - # If no conflicts are found, consider the relationship valid - return True - - def parse_entry(self, entry): - """ - Break down an entry into its components. (Recursive or external library may be needed) - """ - - # Create an empty list to store components - components = [] - - # Split the entry into sentences or clauses - segments = self.segment_entry(entry) - - # Append each identified component to the list - components.extend(segments) - - # Return the list of components for further analysis - return components - - def extract_key_elements(self, components): - """ - Identify the main statements, claims, or ideas from components. (Recursive or external library may be needed) - """ - - # Create an empty list to store key elements - key_elements = [] - - # Analyze each component to identify significant statements, claims, or ideas - for component in components: - key_element = self.identify_key_statement(component) - key_elements.append(key_element) - - # Return the list of key elements for further analysis - return key_elements - - def compile_key_point(self, key_elements): - """ - Compile identified elements into a structured key point. - """ - - # Organize the key elements into a coherent structure - key_point = self.structure_elements(key_elements) - - # Return the structured key point for further processing - return key_point - - def extract_entities(self, key_point): - """ - Extract important entities from a key point. (Recursive or external library may be needed) - """ - - # Analyze the key point to identify significant entities - entities = self.identify_entities(key_point) - - # Return the list of extracted entities - return entities - - def extract_actions(self, key_point): - """ - Extract important actions from a key point. (Recursive or external library may be needed) - """ - - # Analyze the key point to identify significant actions - actions = self.identify_actions(key_point) - - # Return the list of extracted actions - return actions - - def extract_context(self, key_point): - """ - Extract the context from a key point. (Recursive or external library may be needed) - """ - - # Analyze the key point to identify the surrounding context - context = self.identify_context(key_point) - - # Return the extracted context - return context - - def analyze_relationship(self, point, other_point): - """ - Examine the relationship between two points. (Recursive or external library may be needed) - """ - - # Analyze the attributes and details of both points - relationship_analysis = self.compare_points(point, other_point) - - # Return the analysis detailing the relationship between the points - return relationship_analysis - - def identify_causation_factors(self, relationship_analysis): - """ - Identify potential causation factors from relationship analysis. - """ - - # Extract causation factors from the relationship analysis - causation_factors = self.extract_causation_factors(relationship_analysis) - - # Return the identified causation factors - return causation_factors - - def is_causal(self, relationship_analysis, causation_factors): - """ - Determine if a relationship analysis indicates causality. - """ - - # Assess if the factors support a causal relationship - causality = self.evaluate_causality(relationship_analysis, causation_factors) - - # Return True if causality is determined, otherwise False - return causality - - def is_direct_cause(self, causal_link): - """ - Determine if a causal link is a direct cause. - """ - - # Assess if the causal link represents a direct cause-effect relationship - direct_cause = self.evaluate_direct_causality(causal_link) - - # Return True if the causal link is a direct cause, otherwise False - return direct_cause - - def is_contributing_factor(self, causal_link): - """ - Determine if a causal link is a contributing factor. - """ - - # Assess if the causal link represents a contributing factor relationship - contributing_factor = self.evaluate_contributing_causality(causal_link) - - # Return True if the causal link is a contributing factor, otherwise False - return contributing_factor - - def analyze_temporal_sequence(self, point, other_point): - """ - Analyze the temporal sequence between two points. (Recursive or external library may be needed) - """ - - # Analyze the points to identify temporal order - temporal_analysis = self.compare_temporal_points(point, other_point) - - # Return the temporal analysis - return temporal_analysis - - def is_before(self, point, other_point, temporal_analysis): - """ - Determine if one point occurs before another. - """ - - # Assess if one point occurs before the other based on temporal analysis - before = self.evaluate_temporal_before(point, other_point, temporal_analysis) - - # Return True if the point occurs before the other, otherwise False - return before - - def is_after(self, point, other_point, temporal_analysis): - """ - Determine if one point occurs after another. - """ - - # Assess if one point occurs after the other based on temporal analysis - after = self.evaluate_temporal_after(point, other_point, temporal_analysis) - - # Return True if the point occurs after the other, otherwise False - return after - - def is_precedes(self, temporal_order): - """ - Determine if a temporal order indicates precedence. - """ - - # Assess if the temporal order indicates precedence - precedes = self.evaluate_precedes(temporal_order) - - # Return True if the temporal order indicates precedence, otherwise False - return precedes - - def is_follows(self, temporal_order): - """ - Determine if a temporal order indicates succession. - """ - - # Assess if the temporal order indicates succession - follows = self.evaluate_follows(temporal_order) - - # Return True if the temporal order indicates succession, otherwise False - return follows - - def is_simultaneous(self, temporal_order): - """ - Determine if points are simultaneous. - """ - - # Assess if the temporal order indicates simultaneity - simultaneous = self.evaluate_simultaneous(temporal_order) - - # Return True if the points are simultaneous, otherwise False - return simultaneous - - def analyze_conceptual_themes(self, point, other_point): - """ - Analyze shared themes, concepts, or topics between points. (Recursive or external library may be needed) - """ - - # Analyze the points to identify shared themes, concepts, or topics - conceptual_analysis = self.compare_conceptual_points(point, other_point) - - # Return the conceptual analysis - return conceptual_analysis - - def is_conceptually_similar(self, conceptual_analysis): - """ - Determine if points share significant conceptual similarities. - """ - - # Assess if the points share significant conceptual similarities based on analysis - conceptually_similar = self.evaluate_conceptual_similarity(conceptual_analysis) - - # Return True if the points share significant conceptual similarities, otherwise False - return conceptually_similar - - def is_shared_theme(self, conceptual_similarity): - """ - Determine if a conceptual similarity is a shared theme. - """ - - # Assess if the conceptual similarity represents a shared theme - shared_theme = self.evaluate_shared_theme(conceptual_similarity) - - # Return True if the conceptual similarity is a shared theme, otherwise False - return shared_theme - - def is_similar_concept(self, conceptual_similarity): - """ - Determine if a conceptual similarity is a similar concept. - """ - - # Assess if the conceptual similarity represents a similar concept - similar_concept = self.evaluate_similar_concept(conceptual_similarity) - - # Return True if the conceptual similarity is a similar concept, otherwise False - return similar_concept - - def has_conflict(self, relationship, valid_relationships): - """ - Determine if a new relationship conflicts with existing valid relationships. - """ - - # Compare the new relationship against existing valid relationships - for valid_relationship in valid_relationships: - conflict = self.evaluate_conflict(relationship, valid_relationship) - if conflict: - # If conflicts are found, discard or adjust the relationship - return True - - # If no conflicts are found, consider the relationship valid - return False - - def segment_entry(self, entry): - """ - Split the entry into sentences or clauses. (External library may be needed) - """ - - # Use a library to split the entry into segments - segments = external_library.segment(entry) - - # Return the list of segments - return segments - - def identify_key_statement(self, component): - """ - Identify key statements from components. (External library may be needed) - """ - - # Use NLP techniques to identify key statements - key_statement = external_library.identify_key_statement(component) - - # Return the key statement - return key_statement - - def structure_elements(self, key_elements): - """ - Structure key elements into a coherent key point. (Recursive or external library may be needed) - """ - - # Use a method to structure the elements - key_point = external_library.structure_elements(key_elements) - - # Return the structured key point - return key_point - - def identify_entities(self, key_point): - """ - Identify significant entities from a key point. (External library may be needed) - """ - - # Use NLP techniques to identify entities - entities = external_library.identify_entities(key_point) - - # Return the list of entities - return entities - - def identify_actions(self, key_point): - """ - Identify significant actions from a key point. (External library may be needed) - """ - - # Use NLP techniques to identify actions - actions = external_library.identify_actions(key_point) - - # Return the list of actions - return actions - - def identify_context(self, key_point): - """ - Identify the context from a key point. (External library may be needed) - """ - - # Use NLP techniques to identify context - context = external_library.identify_context(key_point) - - # Return the context - return context - - def compare_points(self, point, other_point): - """ - Compare attributes and details of points to analyze relationships. (Recursive or external library may be needed) - """ - - # Use comparison techniques to analyze relationship - relationship_analysis = external_library.compare_points(point, other_point) - - # Return the relationship analysis - return relationship_analysis - - def extract_causation_factors(self, relationship_analysis): - """ - Extract potential causation factors from relationship analysis. - """ - - # Extract factors that indicate causation - causation_factors = external_library.extract_causation_factors(relationship_analysis) - - # Return the causation factors - return causation_factors - - def evaluate_causality(self, relationship_analysis, causation_factors): - """ - Evaluate if relationship analysis indicates causality. - """ - - # Assess the factors and analysis to determine causality - causality = external_library.evaluate_causality(relationship_analysis, causation_factors) - - # Return True if causality is determined, otherwise False - return causality - - def evaluate_direct_causality(self, causal_link): - """ - Evaluate if a causal link represents a direct cause. - """ - - # Assess if the link is a direct cause - direct_cause = external_library.evaluate_direct_causality(causal_link) - - # Return True if direct cause, otherwise False - return direct_cause - - def evaluate_contributing_causality(self, causal_link): - """ - Evaluate if a causal link is a contributing factor. - """ - - # Assess if the link is a contributing factor - contributing_factor = external_library.evaluate_contributing_causality(causal_link) - - # Return True if contributing factor, otherwise False - return contributing_factor - - def compare_temporal_points(self, point, other_point): - """ - Compare points to identify temporal order. (Recursive or external library may be needed) - """ - - # Use comparison techniques to analyze temporal order - temporal_analysis = external_library.compare_temporal_points(point, other_point) - - # Return the temporal analysis - return temporal_analysis - - def evaluate_temporal_before(self, point, other_point, temporal_analysis): - """ - Evaluate if one point occurs before another. - """ - - # Assess the temporal analysis to determine if one point is before the other - before = external_library.evaluate_temporal_before(point, other_point, temporal_analysis) - - # Return True if the point occurs before the other, otherwise False - return before - - def evaluate_temporal_after(self, point, other_point, temporal_analysis): - """ - Evaluate if one point occurs after another. - """ - - # Assess the temporal analysis to determine if one point is after the other - after = external_library.evaluate_temporal_after(point, other_point, temporal_analysis) - - # Return True if the point occurs after the other, otherwise False - return after - - def evaluate_precedes(self, temporal_order): - """ - Evaluate if a temporal order indicates precedence. - """ - - # Assess if the order indicates precedence - precedes = external_library.evaluate_precedes(temporal_order) - - # Return True if precedence is indicated, otherwise False - return precedes - - def evaluate_follows(self, temporal_order): - """ - Evaluate if a temporal order indicates succession. - """ - - # Assess if the order indicates succession - follows = external_library.evaluate_follows(temporal_order) - - # Return True if succession is indicated, otherwise False - return follows - - def evaluate_simultaneous(self, temporal_order): - """ - Evaluate if points are simultaneous. - """ - - # Assess if the points are simultaneous - simultaneous = external_library.evaluate_simultaneous(temporal_order) - - # Return True if the points are simultaneous, otherwise False - return simultaneous - - def compare_conceptual_points(self, point, other_point): - """ - Compare points to identify shared themes, concepts, or topics. (Recursive or external library may be needed) - """ - - # Use comparison techniques to analyze conceptual themes - conceptual_analysis = external_library.compare_conceptual_points(point, other_point) - - # Return the conceptual analysis - return conceptual_analysis - - def evaluate_conceptual_similarity(self, conceptual_analysis): - """ - Evaluate if points share significant conceptual similarities. - """ - - # Assess the analysis to determine conceptual similarity - conceptually_similar = external_library.evaluate_conceptual_similarity(conceptual_analysis) - - # Return True if conceptually similar, otherwise False - return conceptually_similar - - def evaluate_shared_theme(self, conceptual_similarity): - """ - Evaluate if a conceptual similarity represents a shared theme. - """ - - # Assess if the similarity is a shared theme - shared_theme = external_library.evaluate_shared_theme(conceptual_similarity) - - # Return True if shared theme, otherwise False - return shared_theme - - def evaluate_similar_concept(self, conceptual_similarity): - """ - Evaluate if a conceptual similarity is a similar concept. - """ - - # Assess if the similarity is a similar concept - similar_concept = external_library.evaluate_similar_concept(conceptual_similarity) - - # Return True if similar concept, otherwise False - return similar_concept - - def evaluate_conflict(self, relationship, valid_relationship): - """ - Evaluate if a new relationship conflicts with an existing valid relationship. - """ - - # Compare the new relationship against the valid relationship to check for conflict - conflict = external_library.evaluate_conflict(relationship, valid_relationship) - - # Return True if conflict is found, otherwise False - return conflict diff --git a/topos/FC/semantic_compression.py b/topos/FC/semantic_compression.py deleted file mode 100644 index e19d7d4..0000000 --- a/topos/FC/semantic_compression.py +++ /dev/null @@ -1,118 +0,0 @@ -# semantic_compression.py - -import json -import logging -import math - -# from dotenv import load_dotenv -from openai import OpenAI -from topos.FC.cache_manager import CacheManager -from topos.FC.similitude_module import load_model, util - - -class SemanticCompression: - def __init__(self, api_key, model="ollama:solar", max_tokens_category=128, max_tokens_contextualize=128, - max_tokens_recompose=256, max_tokens_decode=1024, cache_enabled=True): - self.api_key = api_key - self.model_provider, self.model_type = self.parse_model(model) - self.max_tokens_semantic_category = max_tokens_category - self.max_tokens_contextualize = max_tokens_contextualize - self.max_tokens_recompose = max_tokens_recompose - self.max_tokens_decode = max_tokens_decode - self.cache_enabled = cache_enabled - - self.embedding_model_smallest_80_14200 = 'all-MiniLM-L6-v2' - self.embedding_model_small_120_7500 = 'all-MiniLM-L12-v2' - self.embedding_model_medium_420_2800 = 'all-mpnet-base-v2' - - self.model = self.load_model() - - self.cache_manager = CacheManager() - - def load_model(self): - return load_model(self.embedding_model_medium_420_2800) - - @staticmethod - def parse_model(model): - if ":" in model: - return model.split(":", 1) - else: - return "ollama", model - - def get_content_key(self, key, token_limit_for_task): - content_key = f"{key}.{self.model_provider}.{self.model_type}.{token_limit_for_task}" - return content_key - - def fetch_semantic_category(self, input_text, extra_fingerprint=""): - content_string = "" - - if self.model_provider == "openai": - content_string = f"""Summarize the following into six or less words: {input_text}""" - # content_string = f"""Summarize the following into one or more words with up to {modifiers_limit} modifiers: {input_text}""" - elif self.model_provider == "ollama" and self.model_type == "phi3": - content_string = f"""Summarize the following into one or more words: {input_text} - in the format of: - ___Summarized Hypernym/Category___""" - elif self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - content_string = f"""You are an expert at finding the simplest hypernyms & synopsis possible.""" - elif self.model_provider == "claude": - content_string = f"""Summarize the following into six or less words:""" - - # Construct the JSON object using a Python dictionary and convert it to a JSON string - messages = [ - { - "role": "system", - "content": content_string - } - ] - - if self.model_provider == "ollama" and self.model_type == "dolphin-llama3": - messages.append({"role": "user", - "content": f"in the format of ___Summarized Hypernym/Category___, give me the (six or less words) hypernym for the following text: {input_text}\nRemember - 6 words or less!!"}) - - if self.model_provider == "claude": - messages.append({"role": "user", "content": f"{input_text}"}) - - # default temp is 0.3 - temperature = 0.3 - - # gpt-4o is a bit more conservative, so we need to use a higher temperature otherwise it'll just degenerate - # into a single hypernym. - if self.model_provider == "openai" and self.model_type == "gpt-4o": - temperature = 0.5 - - # Use json.dumps to safely create a JSON string - # Attempt to parse the template as JSON - formatted_json = json.dumps(messages, indent=4) - - content_key = self.get_content_key(formatted_json + extra_fingerprint, self.max_tokens_semantic_category) - - cached_response = self.cache_manager.load_from_cache(content_key) - if cached_response: - return cached_response - - try: - ollama_base = "http://localhost:11434/v1" - client = OpenAI( - base_url=ollama_base, - api_key="ollama", - ) - - response = client.chat.completions.create( - model=self.model_type, - messages=json.loads(formatted_json), - max_tokens=self.max_tokens_semantic_category, - n=1, - stop=None, - temperature=temperature) - self.cache_manager.save_to_cache(content_key, response. - choices[0].message) - return response.choices[0].message - except Exception as e: - logging.error(f"Error in fetch_semantic_compression: {e}") - return None - - def get_semantic_distance(self, detail_dict, modified_text): - original_embeddings = self.model.encode(detail_dict) - modified_embeddings = self.model.encode(modified_text) - return util.pytorch_cos_sim(original_embeddings, modified_embeddings)[0][0] diff --git a/topos/FC/similitude_module.py b/topos/FC/similitude_module.py deleted file mode 100644 index 9ce8317..0000000 --- a/topos/FC/similitude_module.py +++ /dev/null @@ -1,22 +0,0 @@ -from sentence_transformers import SentenceTransformer, util -import os - - -def load_model(model_name): - try: - # Check if the model is already downloaded by attempting to load it - model = SentenceTransformer(model_name) - print("Model loaded successfully.") - return model - except Exception as e: - print("Failed to load model:", str(e)) - # Attempt to download the model, ensuring network issues are handled - try: - print("Attempting to download the model...") - # This function automatically downloads and caches the model - model = SentenceTransformer(model_name) - print("Model downloaded and loaded successfully.") - return model - except Exception as e: - print("An error occurred while downloading the model:", str(e)) - return None \ No newline at end of file diff --git a/topos/api/api.py b/topos/api/api.py index d4a935d..6378c95 100644 --- a/topos/api/api.py +++ b/topos/api/api.py @@ -1,9 +1,5 @@ from fastapi import FastAPI from ..config import setup_config, get_ssl_certificates -from .websocket_handlers import router as websocket_router -from .api_routes import router as api_router -from .p2p_chat_routes import router as p2p_chat_router -from .debate_routes import router as debate_router import uvicorn # Create the FastAPI application instance @@ -12,11 +8,27 @@ # Configure the application using settings from config.py setup_config(app) -# Include routers from other parts of the application -app.include_router(api_router) -app.include_router(debate_router) -app.include_router(websocket_router) -app.include_router(p2p_chat_router) +from .routers.server.system import router as system_router +from .routers.server.info import router as info_router +from .routers.analyze.graph import router as analyze_graph_router +from .routers.analyze.topics import router as analyze_topics_router +from .routers.analyze.summarize import router as analyze_summarize_router +from .routers.report.report import router as report_router +from .routers.image.image import router as image_router +from .routers.chat.chat import router as chat_router +from .routers.chat.p2p import router as p2p_router + +# NEW ROUTER IMPORTS +app.include_router(system_router) +app.include_router(info_router) +app.include_router(analyze_graph_router) +app.include_router(analyze_topics_router) +app.include_router(analyze_summarize_router) +app.include_router(report_router) +app.include_router(image_router) +app.include_router(chat_router) +app.include_router(p2p_router) + """ diff --git a/topos/api/api_routes.py b/topos/api/api_routes.py deleted file mode 100644 index 34a1c64..0000000 --- a/topos/api/api_routes.py +++ /dev/null @@ -1,460 +0,0 @@ -# api_routes.py - -import os -from fastapi import APIRouter, HTTPException, Request -from fastapi.responses import JSONResponse -import requests -import signal -import glob -import sys -from topos.FC.conversation_cache_manager import ConversationCacheManager -router = APIRouter() - -from collections import Counter, OrderedDict, defaultdict -from pydantic import BaseModel - -from ..generations.chat_gens import LLMController -from ..utilities.utils import create_conversation_string -from ..services.ontology_service.mermaid_chart import MermaidCreator - -import logging - -db_config = { - "dbname": os.getenv("POSTGRES_DB"), - "user": os.getenv("POSTGRES_USER"), - "password": os.getenv("POSTGRES_PASSWORD"), - "host": os.getenv("POSTGRES_HOST"), - "port": os.getenv("POSTGRES_PORT") - } - -logging.info(f"Database configuration: {db_config}") - -use_postgres = True -if use_postgres: - cache_manager = ConversationCacheManager(use_postgres=True, db_config=db_config) -else: - cache_manager = ConversationCacheManager() - -class ConversationIDRequest(BaseModel): - conversation_id: str - -@router.post("/shutdown") -def shutdown(request: Request): - os.kill(os.getpid(), signal.SIGTERM) - return JSONResponse(content={"message": "Server shutting down..."}) - - -@router.get("/health") -async def health_check(): - try: - # Perform any additional checks here if needed - return {"status": "healthy"} - except Exception as e: - raise HTTPException(status_code=500, detail=f"Health check failed: {e}") - - -@router.post("/chat_conversation_analysis") -async def chat_conversation_analysis(request: ConversationIDRequest): - conversation_id = request.conversation_id - # load conversation - conv_data = cache_manager.load_from_cache(conversation_id) - - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - # Initialize counters - named_entity_counter = Counter() - entity_text_counter = Counter() - emotion_counter = Counter() - - # Initialize user-based counters - named_entity_counter_per_user = defaultdict(Counter) - entity_text_counter_per_user = defaultdict(Counter) - emotion_counter_per_user = defaultdict(Counter) - - print(f"\t[ conversational analysis ]") - if cache_manager.use_postgres: - # Extract counts - for conversation_id, messages_list in conv_data.items(): - print(f"\t\t[ item :: {conversation_id} ]") - for message_dict in messages_list: - for cntn in message_dict: - for message_id, content in cntn.items(): - # print(f"\t\t\t[ content :: {str(content)[40:]} ]") - # print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]") - role = content['role'] - user = role - if role == "user" and 'user_name' in content: - user = content['user_name'] - - # Process named entities and base analysis - base_analysis = content['in_line']['base_analysis'] - for entity_type, entities in base_analysis.items(): - named_entity_counter[entity_type] += len(entities) - named_entity_counter_per_user[user][entity_type] += len(entities) - for entity in entities: - entity_text_counter[str(entity.get('text', ''))] += 1 - entity_text_counter_per_user[user][str(entity.get('text', ''))] += 1 - - # Process emotions - emotions = content['commenter']['base_analysis']['emo_27'] - for emotion in emotions: - emotion_counter[emotion['label']] += 1 - emotion_counter_per_user[user][emotion['label']] += 1 - else: - # Extract counts - for conversation_id, messages in conv_data.items(): - print(f"\t\t[ item :: {conversation_id} ]") - for message_id, content in messages.items(): - # print(f"\t\t\t[ content :: {str(content)[40:]} ]") - # print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]") - role = content['role'] - user = role - if role == "user" and 'user_name' in content: - user = content['user_name'] - base_analysis = content['in_line']['base_analysis'] - for entity_type, entities in base_analysis.items(): - named_entity_counter[entity_type] += len(entities) - named_entity_counter_per_user[user][entity_type] += len(entities) - for entity in entities: - entity_text_counter[str(entity['text'])] += 1 - entity_text_counter_per_user[user][str(entity['text'])] += 1 - - emotions = content['commenter']['base_analysis']['emo_27'] - for emotion in emotions: - emotion_counter[emotion['label']] += 1 - emotion_counter_per_user[user][emotion['label']] += 1 - - # Evocations equals num of each entity - # print("Named Entity Count:") - # print(named_entity_counter) # get the count of each entity from the conv_data - - # # Actual Items summoned - # print("\nEntity Text Count:") - # print(entity_text_counter) # get the count of each summoned item from the conv_data - - # # Detected emotions in the population - # print("\nEmotion Count:") - # print(emotion_counter) # also get a population count of all the emotions that were invoked in the conversation - - # print("\t\t[ emotion counter per-user :: {emotion_counter_per_user}") - # Convert Counter objects to dictionaries - named_entity_dict = { - "totals": dict(named_entity_counter), - "per_role": {user: dict(counter) for user, counter in named_entity_counter_per_user.items()} - } - entity_text_dict = { - "totals": dict(entity_text_counter), - "per_role": {user: dict(counter) for user, counter in entity_text_counter_per_user.items()} - } - emotion_dict = { - "totals": dict(emotion_counter), - "per_role": {user: dict(counter) for user, counter in emotion_counter_per_user.items()} - } - - # Create the final dictionary - conversation = { - 'entity_evocations': named_entity_dict, - 'entity_summons': entity_text_dict, - 'emotions27': emotion_dict - } - - - # Return the conversation or any other response needed - return {"conversation": conversation} - - - -import torch -from diffusers import DiffusionPipeline -@router.post("/chat/conv_to_image") -async def conv_to_image(request: ConversationIDRequest): - conversation_id = request.conversation_id - - # load conversation - conv_data = cache_manager.load_from_cache(conversation_id) - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - - - # model specifications - # TODO UPDATE SO ITS NOT HARDCODED - model = "dolphin-llama3" - provider = 'ollama' # defaults to ollama right now - api_key = 'ollama' - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - context = create_conversation_string(conv_data, 6) - print(context) - print(f"\t[ converting conversation to image to text prompt: using model {model}]") - conv_to_text_img_prompt = "Create an interesting, and compelling image-to-text prompt that can be used in a diffussor model. Be concise and convey more with the use of metaphor. Steer the image style towards Slavador Dali's fantastic, atmospheric, heroesque paintings that appeal to everyman themes." - txt_to_img_prompt = llm_client.generate_response(context, conv_to_text_img_prompt, temperature=0) - # print(txt_to_img_prompt) - print(f"\t[ generating a file name {model} ]") - txt_to_img_filename = llm_client.generate_response(txt_to_img_prompt, "Based on the context create an appropriate, and BRIEF, filename with no spaces. Do not use any file extensions in your name, that will be added in a later step.", temperature=0) - - # run huggingface comic diffusion - pipeline = DiffusionPipeline.from_pretrained("ogkalu/Comic-Diffusion") - # Move the pipeline to the GPU if available, or to MPS if on an M-Series MacBook, otherwise to CPU - if torch.cuda.is_available(): - device = "cuda" - elif torch.backends.mps.is_available(): - device = "mps" - else: - device = "cpu" - pipeline.to(device) - - # Generate an image based on the input text - prompt = "somewhere over the rainbow" - print(f"\t[ generating the image using: 'ogkalu/Comic-Diffusion' ]") - image = pipeline(txt_to_img_prompt).images[0] - file_name = f"{txt_to_img_filename}.png" - file_name = "".join(file_name.split()) - # Save the generated image locally - image.save(file_name) - - # Get file bytes to pass to UI - system_path = os.path.abspath("/") - print(f"\t[ {system_path}") - bytes_list = read_file_as_bytes(file_name) - media_type = "application/json" - - # return the image - return {"file_name" : file_name, "bytes": bytes_list, "prompt": txt_to_img_prompt} - - -class GenNextMessageOptions(BaseModel): - conversation_id: str - query: str - provider: str - api_key: str - model: str - voice_settings: dict - -@router.post("/gen_next_message_options") -async def create_next_messages(request: GenNextMessageOptions): - conversation_id = request.conversation_id - query = request.query - print(request.provider, "/", request.model) - print(request.api_key) - # model specifications - model = request.model if request.model != None else "dolphin-llama3" - provider = request.provider if request.provider != None else 'ollama' # defaults to ollama right now - api_key = request.api_key if request.api_key != None else 'ollama' - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - voice_settings = request.voice_settings if request.voice_settings != None else """{ - "tone": "analytical", - "distance": "distant", - "pace": "leisurely", - "depth": "insightful", - "engagement": "engaging", - "message length": "brief" -}""" - # load conversation - conv_data = cache_manager.load_from_cache(conversation_id) - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - - context = create_conversation_string(conv_data, 12) - print(f"\t[ generating next message options: using model {model}]") - - - conv_json = f""" -conversation.json: -{voice_settings} -""" - print(conv_json) - - system_prompt = "PRESENT CONVERSATION:\n-------" + context + "\n-------\n" - system_prompt += """Roleplay with the current conversation, and offer 3 messages the user can speak next. -Generate options based on these parameters. -""" - system_prompt += conv_json - - - next_message_options = llm_client.generate_response(system_prompt, query, temperature=0) - print(next_message_options) - - # return the options - return {"response" : next_message_options} - - -class ConversationTopicsRequest(BaseModel): - conversation_id: str - model: str - -@router.post("/gen_conversation_topics") -async def create_next_messages(request: ConversationTopicsRequest): - conversation_id = request.conversation_id - # model specifications - # TODO UPDATE SO ITS NOT HARDCODED - model = request.model if request.model != None else "dolphin-llama3" - provider = 'ollama' # defaults to ollama right now - api_key = 'ollama' - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - # load conversation - conv_data = cache_manager.load_from_cache(conversation_id) - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - - context = create_conversation_string(conv_data, 12) - # print(f"\t[ generating summary :: model {model} :: subject {subject}]") - - query = f"" - # topic list first pass - system_prompt = "PRESENT CONVERSATION:\n-------" + context + "\n-------\n" - query += """List the topics and those closely related to what this conversation traverses.""" - topic_list = llm_client.generate_response(system_prompt, query, temperature=0) - print(topic_list) - - # return the image - return {"response" : topic_list} - - -@router.post("/list_models") -async def list_models(provider: str = 'ollama', api_key: str = 'ollama'): - # Define the URLs for different providers - - list_models_urls = { - 'ollama': "http://localhost:11434/api/tags", - 'openai': "https://api.openai.com/v1/models", - 'groq': "https://api.groq.com/openai/v1/models" - } - - if provider not in list_models_urls: - raise HTTPException(status_code=400, detail="Unsupported provider") - - # Get the appropriate URL based on the provider - url = list_models_urls.get(provider.lower()) - - if provider.lower() == 'ollama': - # No need for headers with Ollama - headers = {} - else: - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json" - } - - try: - # Make the request with the appropriate headers - result = requests.get(url, headers=headers) - if result.status_code == 200: - return {"result": result.json()} - else: - raise HTTPException(status_code=result.status_code, detail="Models not found") - except requests.ConnectionError: - raise HTTPException(status_code=500, detail="Server connection error") - -@router.post("/test") -async def test(): - return "hello world" - -@router.post("/get_files") -async def get_files(): - # Get the current working directory - current_dir = os.getcwd() - - # List all image files in the current directory - image_files = glob.glob(os.path.join(current_dir, "*.png")) + \ - glob.glob(os.path.join(current_dir, "*.jpg")) + \ - glob.glob(os.path.join(current_dir, "*.jpeg")) - - if not image_files: - return {"error": "No image files found in the current directory."} - - # Print available files - print("Available image files:") - for i, file in enumerate(image_files, 1): - print(f"{i}. {os.path.basename(file)}") - - # Get user input - while True: - try: - choice = int(input("Enter the number of the file you want to select: ")) - if 1 <= choice <= len(image_files): - file_path = image_files[choice - 1] - break - else: - print("Invalid choice. Please try again.") - except ValueError: - print("Please enter a valid number.") - - print(f"Selected file: {file_path}") - - # Use the os.path module - system_path = os.path.abspath("/") - print(system_path) - bytes_list = read_file_as_bytes(file_path) - media_type = "application/json" - print(type(bytes_list)) - return {"file_name": [i for i in file_path], "bytes": bytes_list} - -def read_file_as_bytes(file_path): - try: - with open(file_path, 'rb') as file: - file_bytes = list(file.read()) - return file_bytes - except FileNotFoundError: - print("File not found.") - return None - except Exception as e: - print(f"An error occurred: {e}") - return None - - -class MermaidChartPayload(BaseModel): - message: str = None - conversation_id: str - full_conversation: bool = False - model: str = "dolphin-llama3" - provider: str = "ollama" - api_key: str = "ollama" - temperature: float = 0.04 - -@router.post("/generate_mermaid_chart") -async def generate_mermaid_chart(payload: MermaidChartPayload): - try: - conversation_id = payload.conversation_id - full_conversation = payload.full_conversation - # model specifications - model = payload.model - provider = payload.provider# defaults to ollama right now - api_key = payload.api_key - temperature = payload.temperature - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - mermaid_generator = MermaidCreator(llm_client) - - - - if full_conversation: - cache_manager = cache_manager - conv_data = cache_manager.load_from_cache(conversation_id) - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - print(f"\t[ generating mermaid chart :: {provider}/{model} :: full conversation ]") - return {"status": "generating", "response": "generating mermaid chart", 'completed': False} - # TODO: Complete this branch if needed - - else: - message = payload.message - if message: - print(f"\t[ generating mermaid chart :: using model {model} ]") - try: - mermaid_string = await mermaid_generator.get_mermaid_chart(message) - print(mermaid_string) - if mermaid_string == "Failed to generate mermaid": - return {"status": "error", "response": mermaid_string, 'completed': True} - else: - return {"status": "completed", "response": mermaid_string, 'completed': True} - except Exception as e: - return {"status": "error", "response": f"Error: {e}", 'completed': True} - - except Exception as e: - return {"status": "error", "message": str(e)} diff --git a/topos/api/routers/analyze/graph.py b/topos/api/routers/analyze/graph.py new file mode 100644 index 0000000..8f6a542 --- /dev/null +++ b/topos/api/routers/analyze/graph.py @@ -0,0 +1,130 @@ +import os +import json + +from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect + +from topos.FC.conversation_cache_manager import ConversationCacheManager + +from ....services.generations_service.chat_gens import LLMController +from ....utilities.utils import create_conversation_string +from ....services.ontology_service.mermaid_chart import MermaidCreator +from ....models.models import MermaidChartPayload + +import logging + +router = APIRouter() + +db_config = { + "dbname": os.getenv("POSTGRES_DB"), + "user": os.getenv("POSTGRES_USER"), + "password": os.getenv("POSTGRES_PASSWORD"), + "host": os.getenv("POSTGRES_HOST"), + "port": os.getenv("POSTGRES_PORT") + } + +logging.info(f"Database configuration: {db_config}") + +use_postgres = True +if use_postgres: + cache_manager = ConversationCacheManager(use_postgres=True, db_config=db_config) +else: + cache_manager = ConversationCacheManager() + + +@router.post("/generate_mermaid_chart") +async def generate_mermaid_chart(payload: MermaidChartPayload): + try: + conversation_id = payload.conversation_id + full_conversation = payload.full_conversation + # model specifications + model = payload.model + provider = payload.provider# defaults to ollama right now + api_key = payload.api_key + temperature = payload.temperature + + llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) + + mermaid_generator = MermaidCreator(llm_client) + + if full_conversation: + cache_manager = cache_manager + conv_data = cache_manager.load_from_cache(conversation_id) + if conv_data is None: + raise HTTPException(status_code=404, detail="Conversation not found in cache") + print(f"\t[ generating mermaid chart :: {provider}/{model} :: full conversation ]") + return {"status": "generating", "response": "generating mermaid chart", 'completed': False} + # TODO: Complete this branch if needed + + else: + message = payload.message + if message: + print(f"\t[ generating mermaid chart :: using model {model} ]") + try: + mermaid_string = await mermaid_generator.get_mermaid_chart(message) + print(mermaid_string) + if mermaid_string == "Failed to generate mermaid": + return {"status": "error", "response": mermaid_string, 'completed': True} + else: + return {"status": "completed", "response": mermaid_string, 'completed': True} + except Exception as e: + return {"status": "error", "response": f"Error: {e}", 'completed': True} + + except Exception as e: + return {"status": "error", "message": str(e)} + + +@router.websocket("/websocket_mermaid_chart") +async def meta_chat(websocket: WebSocket): + """ + + Generates a mermaid chart from a list of message. + + """ + await websocket.accept() + try: + while True: + data = await websocket.receive_text() + payload = json.loads(data) + message = payload.get("message", None) + conversation_id = payload["conversation_id"] + full_conversation = payload.get("full_conversation", False) + # model specifications + model = payload.get("model", "dolphin-llama3") + provider = payload.get('provider', 'ollama') # defaults to ollama right now + api_key = payload.get('api_key', 'ollama') + temperature = float(payload.get("temperature", 0.04)) + + llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) + + mermaid_generator = MermaidCreator(llm_client) + # load conversation + if full_conversation: + cache_manager = cache_manager + conv_data = cache_manager.load_from_cache(conversation_id) + if conv_data is None: + raise HTTPException(status_code=404, detail="Conversation not found in cache") + print(f"\t[ generating mermaid chart :: using model {model} :: full conversation ]") + await websocket.send_json({"status": "generating", "response": "generating mermaid chart", 'completed': False}) + context = create_conversation_string(conv_data, 12) + # TODO Complete this branch + else: + if message: + print(f"\t[ generating mermaid chart :: using model {model} ]") + await websocket.send_json({"status": "generating", "response": "generating mermaid chart", 'completed': False}) + try: + mermaid_string = await mermaid_generator.get_mermaid_chart(message, websocket = websocket) + if mermaid_string == "Failed to generate mermaid": + await websocket.send_json({"status": "error", "response": mermaid_string, 'completed': True}) + else: + await websocket.send_json({"status": "completed", "response": mermaid_string, 'completed': True}) + except Exception as e: + await websocket.send_json({"status": "error", "response": f"Error: {e}", 'completed': True}) + except WebSocketDisconnect: + print("WebSocket disconnected") + except Exception as e: + await websocket.send_json({"status": "error", "message": str(e)}) + await websocket.close() + finally: + await websocket.close() + + diff --git a/topos/api/routers/analyze/summarize.py b/topos/api/routers/analyze/summarize.py new file mode 100644 index 0000000..f518e4f --- /dev/null +++ b/topos/api/routers/analyze/summarize.py @@ -0,0 +1,94 @@ +from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect +import json +import os + +from ....services.generations_service.chat_gens import LLMController +from ....utilities.utils import create_conversation_string + +# cache database +from topos.FC.conversation_cache_manager import ConversationCacheManager + +import logging + +db_config = { + "dbname": os.getenv("POSTGRES_DB"), + "user": os.getenv("POSTGRES_USER"), + "password": os.getenv("POSTGRES_PASSWORD"), + "host": os.getenv("POSTGRES_HOST"), + "port": os.getenv("POSTGRES_PORT") + } + +logging.info(f"Database configuration: {db_config}") + +use_postgres = True +if use_postgres: + cache_manager = ConversationCacheManager(use_postgres=True, db_config=db_config) +else: + cache_manager = ConversationCacheManager() + +router = APIRouter() + +@router.websocket("/websocket_chat_summary") +async def meta_chat(websocket: WebSocket): + """ + + Generates a summary of the conversation oriented around a given focal point. + + """ + await websocket.accept() + try: + while True: + data = await websocket.receive_text() + payload = json.loads(data) + + conversation_id = payload["conversation_id"] + subject = payload.get("subject", "knowledge") + temperature = float(payload.get("temperature", 0.04)) + + # model specifications + model = payload.get("model", "solar") + provider = payload.get('provider', 'ollama') # defaults to ollama right now + api_key = payload.get('api_key', 'ollama') + + llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) + + + # load conversation + cache_manager = cache_manager + conv_data = cache_manager.load_from_cache(conversation_id) + if conv_data is None: + raise HTTPException(status_code=404, detail="Conversation not found in cache") + + context = create_conversation_string(conv_data, 12) + + print(f"\t[ generating summary :: model {model} :: subject {subject}]") + + # Set system prompt + system_prompt = "PRESENT CONVERSATION:\n-------" + context + "\n-------\n" + query = f"""Summarize this conversation. Frame your response around the subject of {subject}""" + + msg_history = [{'role': 'system', 'content': system_prompt}] + + # Append the present message to the message history + simplified_message = {'role': "user", 'content': query} + msg_history.append(simplified_message) + + # Processing the chat + output_combined = "" + for chunk in llm_client.stream_chat(msg_history, temperature=temperature): + try: + output_combined += chunk + await websocket.send_json({"status": "generating", "response": output_combined, 'completed': False}) + except Exception as e: + print(e) + await websocket.send_json({"status": "error", "message": str(e)}) + await websocket.close() + # Send the final completed message + await websocket.send_json( + {"status": "completed", "response": output_combined, "completed": True}) + + except WebSocketDisconnect: + print("WebSocket disconnected") + except Exception as e: + await websocket.send_json({"status": "error", "message": str(e)}) + await websocket.close() diff --git a/topos/api/routers/analyze/topics.py b/topos/api/routers/analyze/topics.py new file mode 100644 index 0000000..36eb546 --- /dev/null +++ b/topos/api/routers/analyze/topics.py @@ -0,0 +1,58 @@ + + +import os +from fastapi import APIRouter, HTTPException +from topos.FC.conversation_cache_manager import ConversationCacheManager + +from ....services.generations_service.chat_gens import LLMController +from ....utilities.utils import create_conversation_string +from ....models.models import ConversationTopicsRequest + +import logging + +db_config = { + "dbname": os.getenv("POSTGRES_DB"), + "user": os.getenv("POSTGRES_USER"), + "password": os.getenv("POSTGRES_PASSWORD"), + "host": os.getenv("POSTGRES_HOST"), + "port": os.getenv("POSTGRES_PORT") + } + +logging.info(f"Database configuration: {db_config}") + +use_postgres = True +if use_postgres: + cache_manager = ConversationCacheManager(use_postgres=True, db_config=db_config) +else: + cache_manager = ConversationCacheManager() + +router = APIRouter() + +@router.post("/get_files") +async def create_next_messages(request: ConversationTopicsRequest): + conversation_id = request.conversation_id + # model specifications + # TODO UPDATE SO ITS NOT HARDCODED + model = request.model if request.model != None else "dolphin-llama3" + provider = 'ollama' # defaults to ollama right now + api_key = 'ollama' + + llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) + + # load conversation + conv_data = cache_manager.load_from_cache(conversation_id) + if conv_data is None: + raise HTTPException(status_code=404, detail="Conversation not found in cache") + + context = create_conversation_string(conv_data, 12) + # print(f"\t[ generating summary :: model {model} :: subject {subject}]") + + query = f"" + # topic list first pass + system_prompt = "PRESENT CONVERSATION:\n-------" + context + "\n-------\n" + query += """List the topics and those closely related to what this conversation traverses.""" + topic_list = llm_client.generate_response(system_prompt, query, temperature=0) + print(topic_list) + + # return the image + return {"response" : topic_list} diff --git a/topos/api/websocket_handlers.py b/topos/api/routers/chat/chat.py similarity index 70% rename from topos/api/websocket_handlers.py rename to topos/api/routers/chat/chat.py index 410b3c2..c31105b 100644 --- a/topos/api/websocket_handlers.py +++ b/topos/api/routers/chat/chat.py @@ -1,32 +1,23 @@ -from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect -from datetime import datetime +import json +import os +import logging import time -import traceback +from datetime import datetime import pprint -from ..generations.chat_gens import LLMController -# from topos.FC.semantic_compression import SemanticCompression -# from ..config import get_openai_api_key -from ..models.llm_classes import vision_models -import json +from fastapi import APIRouter, WebSocket, WebSocketDisconnect + +from ....services.generations_service.chat_gens import LLMController +from ....models.llm_classes import vision_models + +from ....services.classification_service.base_analysis import base_text_classifier, base_token_classifier +from ....services.loggers.process_logger import ProcessLogger -from ..utilities.utils import create_conversation_string -from ..services.classification_service.base_analysis import base_text_classifier, base_token_classifier -from ..services.loggers.process_logger import ProcessLogger -from ..services.ontology_service.mermaid_chart import MermaidCreator -import os # cache database from topos.FC.conversation_cache_manager import ConversationCacheManager -# Debate simulator -from topos.channel.debatesim import DebateSimulator - router = APIRouter() -debate_simulator = DebateSimulator.get_instance() - - -import logging db_config = { "dbname": os.getenv("POSTGRES_DB"), @@ -291,6 +282,7 @@ async def chat(websocket: WebSocket): await websocket.send_json({"status": "error", "message": str(e)}) await websocket.close() + @router.websocket("/websocket_meta_chat") async def meta_chat(websocket: WebSocket): """ @@ -361,154 +353,4 @@ async def meta_chat(websocket: WebSocket): print("WebSocket disconnected") except Exception as e: await websocket.send_json({"status": "error", "message": str(e)}) - await websocket.close() - - -@router.websocket("/websocket_chat_summary") -async def meta_chat(websocket: WebSocket): - """ - - Generates a summary of the conversation oriented around a given focal point. - - """ - await websocket.accept() - try: - while True: - data = await websocket.receive_text() - payload = json.loads(data) - - conversation_id = payload["conversation_id"] - subject = payload.get("subject", "knowledge") - temperature = float(payload.get("temperature", 0.04)) - - # model specifications - model = payload.get("model", "solar") - provider = payload.get('provider', 'ollama') # defaults to ollama right now - api_key = payload.get('api_key', 'ollama') - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - - # load conversation - cache_manager = cache_manager - conv_data = cache_manager.load_from_cache(conversation_id) - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - - context = create_conversation_string(conv_data, 12) - - print(f"\t[ generating summary :: model {model} :: subject {subject}]") - - # Set system prompt - system_prompt = "PRESENT CONVERSATION:\n-------" + context + "\n-------\n" - query = f"""Summarize this conversation. Frame your response around the subject of {subject}""" - - msg_history = [{'role': 'system', 'content': system_prompt}] - - # Append the present message to the message history - simplified_message = {'role': "user", 'content': query} - msg_history.append(simplified_message) - - # Processing the chat - output_combined = "" - for chunk in llm_client.stream_chat(msg_history, temperature=temperature): - try: - output_combined += chunk - await websocket.send_json({"status": "generating", "response": output_combined, 'completed': False}) - except Exception as e: - print(e) - await websocket.send_json({"status": "error", "message": str(e)}) - await websocket.close() - # Send the final completed message - await websocket.send_json( - {"status": "completed", "response": output_combined, "completed": True}) - - except WebSocketDisconnect: - print("WebSocket disconnected") - except Exception as e: - await websocket.send_json({"status": "error", "message": str(e)}) - await websocket.close() - -@router.websocket("/websocket_mermaid_chart") -async def meta_chat(websocket: WebSocket): - """ - - Generates a mermaid chart from a list of message. - - """ - await websocket.accept() - try: - while True: - data = await websocket.receive_text() - payload = json.loads(data) - message = payload.get("message", None) - conversation_id = payload["conversation_id"] - full_conversation = payload.get("full_conversation", False) - # model specifications - model = payload.get("model", "dolphin-llama3") - provider = payload.get('provider', 'ollama') # defaults to ollama right now - api_key = payload.get('api_key', 'ollama') - temperature = float(payload.get("temperature", 0.04)) - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - mermaid_generator = MermaidCreator(llm_client) - # load conversation - if full_conversation: - cache_manager = cache_manager - conv_data = cache_manager.load_from_cache(conversation_id) - if conv_data is None: - raise HTTPException(status_code=404, detail="Conversation not found in cache") - print(f"\t[ generating mermaid chart :: using model {model} :: full conversation ]") - await websocket.send_json({"status": "generating", "response": "generating mermaid chart", 'completed': False}) - context = create_conversation_string(conv_data, 12) - # TODO Complete this branch - else: - if message: - print(f"\t[ generating mermaid chart :: using model {model} ]") - await websocket.send_json({"status": "generating", "response": "generating mermaid chart", 'completed': False}) - try: - mermaid_string = await mermaid_generator.get_mermaid_chart(message, websocket = websocket) - if mermaid_string == "Failed to generate mermaid": - await websocket.send_json({"status": "error", "response": mermaid_string, 'completed': True}) - else: - await websocket.send_json({"status": "completed", "response": mermaid_string, 'completed': True}) - except Exception as e: - await websocket.send_json({"status": "error", "response": f"Error: {e}", 'completed': True}) - except WebSocketDisconnect: - print("WebSocket disconnected") - except Exception as e: - await websocket.send_json({"status": "error", "message": str(e)}) - await websocket.close() - finally: - await websocket.close() - - - - -@router.websocket("/debate_flow_with_jwt") -async def debate_flow_with_jwt(websocket: WebSocket): - await websocket.accept() - try: - while True: - data = await websocket.receive_text() - payload = json.loads(data) - message_data = payload.get("message_data", None) - model = payload.get("model", None) - - if message_data: - await websocket.send_json({"status": "generating", "response": "starting debate flow analysis", 'completed': False}) - try: - # Assuming DebateSimulator is correctly set up - debate_simulator = await DebateSimulator.get_instance() - response_data = debate_simulator.process_messages(message_data, model) - await websocket.send_json({"status": "completed", "response": response_data, 'completed': True}) - except Exception as e: - await websocket.send_json({"status": "error", "response": f"Error: {e}", 'completed': True}) - except WebSocketDisconnect: - print("WebSocket disconnected") - except Exception as e: - await websocket.send_json({"status": "error", "message": str(e)}) - await websocket.close() - finally: await websocket.close() \ No newline at end of file diff --git a/topos/api/p2p_chat_routes.py b/topos/api/routers/chat/p2p.py similarity index 94% rename from topos/api/p2p_chat_routes.py rename to topos/api/routers/chat/p2p.py index beec3aa..618807d 100644 --- a/topos/api/p2p_chat_routes.py +++ b/topos/api/routers/chat/p2p.py @@ -1,18 +1,14 @@ import os -from fastapi import APIRouter, HTTPException, Request -import requests -from topos.FC.conversation_cache_manager import ConversationCacheManager -from collections import Counter, OrderedDict, defaultdict -from pydantic import BaseModel - -from ..utilities.utils import create_conversation_string -from ..services.classification_service.base_analysis import base_text_classifier, base_token_classifier - import json import time from datetime import datetime import logging +from fastapi import APIRouter, HTTPException, Request +from topos.FC.conversation_cache_manager import ConversationCacheManager + +from ....services.classification_service.base_analysis import base_text_classifier, base_token_classifier + router = APIRouter() db_config = { diff --git a/topos/api/routers/image/image.py b/topos/api/routers/image/image.py new file mode 100644 index 0000000..d970870 --- /dev/null +++ b/topos/api/routers/image/image.py @@ -0,0 +1,99 @@ +import os +import logging + +from fastapi import APIRouter, HTTPException + +from topos.FC.conversation_cache_manager import ConversationCacheManager +router = APIRouter() + +from ....services.generations_service.chat_gens import LLMController +from ....utilities.utils import create_conversation_string +from ....models.models import ConversationIDRequest + +db_config = { + "dbname": os.getenv("POSTGRES_DB"), + "user": os.getenv("POSTGRES_USER"), + "password": os.getenv("POSTGRES_PASSWORD"), + "host": os.getenv("POSTGRES_HOST"), + "port": os.getenv("POSTGRES_PORT") + } + +logging.info(f"Database configuration: {db_config}") + +use_postgres = True +if use_postgres: + cache_manager = ConversationCacheManager(use_postgres=True, db_config=db_config) +else: + cache_manager = ConversationCacheManager() + +@router.post("/chat/conv_to_image") +async def conv_to_image(request: ConversationIDRequest): + import torch + from diffusers import DiffusionPipeline + conversation_id = request.conversation_id + + # load conversation + conv_data = cache_manager.load_from_cache(conversation_id) + if conv_data is None: + raise HTTPException(status_code=404, detail="Conversation not found in cache") + + + # model specifications + # TODO UPDATE SO ITS NOT HARDCODED + model = "dolphin-llama3" + provider = 'ollama' # defaults to ollama right now + api_key = 'ollama' + + llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) + + context = create_conversation_string(conv_data, 6) + print(context) + print(f"\t[ converting conversation to image to text prompt: using model {model}]") + conv_to_text_img_prompt = "Create an interesting, and compelling image-to-text prompt that can be used in a diffussor model. Be concise and convey more with the use of metaphor. Steer the image style towards Slavador Dali's fantastic, atmospheric, heroesque paintings that appeal to everyman themes." + txt_to_img_prompt = llm_client.generate_response(context, conv_to_text_img_prompt, temperature=0) + # print(txt_to_img_prompt) + print(f"\t[ generating a file name {model} ]") + txt_to_img_filename = llm_client.generate_response(txt_to_img_prompt, "Based on the context create an appropriate, and BRIEF, filename with no spaces. Do not use any file extensions in your name, that will be added in a later step.", temperature=0) + + # run huggingface comic diffusion + pipeline = DiffusionPipeline.from_pretrained("ogkalu/Comic-Diffusion") + # Move the pipeline to the GPU if available, or to MPS if on an M-Series MacBook, otherwise to CPU + if torch.cuda.is_available(): + device = "cuda" + elif torch.backends.mps.is_available(): + device = "mps" + else: + device = "cpu" + pipeline.to(device) + + # Generate an image based on the input text + prompt = "somewhere over the rainbow" + print(f"\t[ generating the image using: 'ogkalu/Comic-Diffusion' ]") + image = pipeline(txt_to_img_prompt).images[0] + file_name = f"{txt_to_img_filename}.png" + file_name = "".join(file_name.split()) + # Save the generated image locally + image.save(file_name) + + # Get file bytes to pass to UI + system_path = os.path.abspath("/") + print(f"\t[ {system_path}") + + def read_file_as_bytes(file_path): + try: + with open(file_path, 'rb') as file: + file_bytes = list(file.read()) + return file_bytes + except FileNotFoundError: + print("File not found.") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None + + bytes_list = read_file_as_bytes(file_name) + media_type = "application/json" + + # return the image + return {"file_name" : file_name, "bytes": bytes_list, "prompt": txt_to_img_prompt} + diff --git a/topos/api/routers/report/report.py b/topos/api/routers/report/report.py new file mode 100644 index 0000000..9963a15 --- /dev/null +++ b/topos/api/routers/report/report.py @@ -0,0 +1,138 @@ + +import os +from fastapi import APIRouter, HTTPException +from topos.FC.conversation_cache_manager import ConversationCacheManager + +from collections import Counter, defaultdict + +import logging + +from ....models.models import ConversationIDRequest + +db_config = { + "dbname": os.getenv("POSTGRES_DB"), + "user": os.getenv("POSTGRES_USER"), + "password": os.getenv("POSTGRES_PASSWORD"), + "host": os.getenv("POSTGRES_HOST"), + "port": os.getenv("POSTGRES_PORT") + } + +logging.info(f"Database configuration: {db_config}") + +use_postgres = True +if use_postgres: + cache_manager = ConversationCacheManager(use_postgres=True, db_config=db_config) +else: + cache_manager = ConversationCacheManager() + +router = APIRouter() + +@router.post("/chat_conversation_analysis") +async def chat_conversation_analysis(request: ConversationIDRequest): + conversation_id = request.conversation_id + # load conversation + conv_data = cache_manager.load_from_cache(conversation_id) + + if conv_data is None: + raise HTTPException(status_code=404, detail="Conversation not found in cache") + # Initialize counters + named_entity_counter = Counter() + entity_text_counter = Counter() + emotion_counter = Counter() + + # Initialize user-based counters + named_entity_counter_per_user = defaultdict(Counter) + entity_text_counter_per_user = defaultdict(Counter) + emotion_counter_per_user = defaultdict(Counter) + + print(f"\t[ conversational analysis ]") + if cache_manager.use_postgres: + # Extract counts + for conversation_id, messages_list in conv_data.items(): + print(f"\t\t[ item :: {conversation_id} ]") + for message_dict in messages_list: + for cntn in message_dict: + for message_id, content in cntn.items(): + # print(f"\t\t\t[ content :: {str(content)[40:]} ]") + # print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]") + role = content['role'] + user = role + if role == "user" and 'user_name' in content: + user = content['user_name'] + + # Process named entities and base analysis + base_analysis = content['in_line']['base_analysis'] + for entity_type, entities in base_analysis.items(): + named_entity_counter[entity_type] += len(entities) + named_entity_counter_per_user[user][entity_type] += len(entities) + for entity in entities: + entity_text_counter[str(entity.get('text', ''))] += 1 + entity_text_counter_per_user[user][str(entity.get('text', ''))] += 1 + + # Process emotions + emotions = content['commenter']['base_analysis']['emo_27'] + for emotion in emotions: + emotion_counter[emotion['label']] += 1 + emotion_counter_per_user[user][emotion['label']] += 1 + else: + # Extract counts + for conversation_id, messages in conv_data.items(): + print(f"\t\t[ item :: {conversation_id} ]") + for message_id, content in messages.items(): + # print(f"\t\t\t[ content :: {str(content)[40:]} ]") + # print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]") + role = content['role'] + user = role + if role == "user" and 'user_name' in content: + user = content['user_name'] + base_analysis = content['in_line']['base_analysis'] + for entity_type, entities in base_analysis.items(): + named_entity_counter[entity_type] += len(entities) + named_entity_counter_per_user[user][entity_type] += len(entities) + for entity in entities: + entity_text_counter[str(entity['text'])] += 1 + entity_text_counter_per_user[user][str(entity['text'])] += 1 + + emotions = content['commenter']['base_analysis']['emo_27'] + for emotion in emotions: + emotion_counter[emotion['label']] += 1 + emotion_counter_per_user[user][emotion['label']] += 1 + + # Evocations equals num of each entity + # print("Named Entity Count:") + # print(named_entity_counter) # get the count of each entity from the conv_data + + # # Actual Items summoned + # print("\nEntity Text Count:") + # print(entity_text_counter) # get the count of each summoned item from the conv_data + + # # Detected emotions in the population + # print("\nEmotion Count:") + # print(emotion_counter) # also get a population count of all the emotions that were invoked in the conversation + + # print("\t\t[ emotion counter per-user :: {emotion_counter_per_user}") + # Convert Counter objects to dictionaries + named_entity_dict = { + "totals": dict(named_entity_counter), + "per_role": {user: dict(counter) for user, counter in named_entity_counter_per_user.items()} + } + entity_text_dict = { + "totals": dict(entity_text_counter), + "per_role": {user: dict(counter) for user, counter in entity_text_counter_per_user.items()} + } + emotion_dict = { + "totals": dict(emotion_counter), + "per_role": {user: dict(counter) for user, counter in emotion_counter_per_user.items()} + } + + # Create the final dictionary + conversation = { + 'entity_evocations': named_entity_dict, + 'entity_summons': entity_text_dict, + 'emotions27': emotion_dict + } + + + # Return the conversation or any other response needed + return {"conversation": conversation} + diff --git a/topos/api/routers/server/config.py b/topos/api/routers/server/config.py new file mode 100644 index 0000000..abbdebc --- /dev/null +++ b/topos/api/routers/server/config.py @@ -0,0 +1 @@ +# Add configuration routes \ No newline at end of file diff --git a/topos/api/routers/server/info.py b/topos/api/routers/server/info.py new file mode 100644 index 0000000..463558b --- /dev/null +++ b/topos/api/routers/server/info.py @@ -0,0 +1,98 @@ + + +import os +from fastapi import APIRouter, HTTPException +import requests +import glob + +router = APIRouter() + +@router.post("/get_files") +async def get_files(): + # Get the current working directory + current_dir = os.getcwd() + + # List all image files in the current directory + image_files = glob.glob(os.path.join(current_dir, "*.png")) + \ + glob.glob(os.path.join(current_dir, "*.jpg")) + \ + glob.glob(os.path.join(current_dir, "*.jpeg")) + + if not image_files: + return {"error": "No image files found in the current directory."} + + # Print available files + print("Available image files:") + for i, file in enumerate(image_files, 1): + print(f"{i}. {os.path.basename(file)}") + + # Get user input + while True: + try: + choice = int(input("Enter the number of the file you want to select: ")) + if 1 <= choice <= len(image_files): + file_path = image_files[choice - 1] + break + else: + print("Invalid choice. Please try again.") + except ValueError: + print("Please enter a valid number.") + + print(f"Selected file: {file_path}") + + # Use the os.path module + system_path = os.path.abspath("/") + print(system_path) + + def read_file_as_bytes(file_path): + try: + with open(file_path, 'rb') as file: + file_bytes = list(file.read()) + return file_bytes + except FileNotFoundError: + print("File not found.") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None + + + bytes_list = read_file_as_bytes(file_path) + media_type = "application/json" + print(type(bytes_list)) + return {"file_name": [i for i in file_path], "bytes": bytes_list} + + +@router.post("/list_models") +async def list_models(provider: str = 'ollama', api_key: str = 'ollama'): + # Define the URLs for different providers + + list_models_urls = { + 'ollama': "http://localhost:11434/api/tags", + 'openai': "https://api.openai.com/v1/models", + 'groq': "https://api.groq.com/openai/v1/models" + } + + if provider not in list_models_urls: + raise HTTPException(status_code=400, detail="Unsupported provider") + + # Get the appropriate URL based on the provider + url = list_models_urls.get(provider.lower()) + + if provider.lower() == 'ollama': + # No need for headers with Ollama + headers = {} + else: + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + try: + # Make the request with the appropriate headers + result = requests.get(url, headers=headers) + if result.status_code == 200: + return {"result": result.json()} + else: + raise HTTPException(status_code=result.status_code, detail="Models not found") + except requests.ConnectionError: + raise HTTPException(status_code=500, detail="Server connection error") diff --git a/topos/api/routers/server/system.py b/topos/api/routers/server/system.py new file mode 100644 index 0000000..f4e7436 --- /dev/null +++ b/topos/api/routers/server/system.py @@ -0,0 +1,24 @@ + +import os +from fastapi import APIRouter, HTTPException, Request +from fastapi.responses import JSONResponse +import signal + +router = APIRouter() + +@router.post("/shutdown") +def shutdown(request: Request): + os.kill(os.getpid(), signal.SIGTERM) + return JSONResponse(content={"message": "Server shutting down..."}) + +@router.get("/health") +async def health_check(): + try: + # Perform any additional checks here if needed + return {"status": "healthy"} + except Exception as e: + raise HTTPException(status_code=500, detail=f"Health check failed: {e}") + +@router.post("/test") +async def test(): + return "hello world" diff --git a/topos/channel/channel_engine.py b/topos/channel/channel_engine.py deleted file mode 100644 index fa25d6d..0000000 --- a/topos/channel/channel_engine.py +++ /dev/null @@ -1,148 +0,0 @@ -# channel_engine.py - -import asyncio -import traceback - - -class ChannelEngine: - """ - A generalized engine for managing asynchronous tasks in a queue. - """ - - def __init__(self): - """ - Initializes the Engine with an asynchronous task queue and sets it to a non-running state. - """ - print(f"\t[ Engine :: init ]") - self.task_queue = asyncio.Queue() - self.task_handlers = {} - self.running = False - self.processing_task = None - self._lock = asyncio.Lock() - - def register_task_handler(self, task_type, handler): - """ - Registers a handler function for a specific task type. - - Args: - task_type: A string identifier for the task type. - handler: An asynchronous function that will handle tasks of the given type. - """ - self.task_handlers[task_type] = handler - - async def add_task(self, task): - """ - Adds a task to the task queue and potentially starts task processing if the engine is not already running. - - Args: - task: A dictionary representing the task with a 'type' key and additional task-specific data. - """ - print(f"\t[ Engine :: Adding task to queue: {task['type']} ]") - await self.task_queue.put(task) - print(f"\t\t[ Engine :: Task added to queue: {task['type']} ]") - - if not self.running: - print(f"\t[ Engine :: Starting task processing ]") - await asyncio.sleep(0) # Yield control to the event loop - await self.start_processing() - - async def wait_for_tasks(self): - """ - Waits for all tasks in the queue to be completed. - """ - print(f"\t[ Engine :: Waiting for all tasks to complete ]") - await self.task_queue.join() - - async def reset_processing_queue(self): - """ - Resets the processing queue by stopping current processing, canceling any running task, - clearing the queue, and resetting the 'running' flag. - """ - print(f"\t[ Engine :: Resetting processing queue ]") - async with self._lock: - self.running = False - if self.processing_task: - self.processing_task.cancel() - try: - await self.processing_task - except asyncio.CancelledError: - pass - - while not self.task_queue.empty(): - try: - self.task_queue.get_nowait() - except asyncio.QueueEmpty: - break - - self.current_generation = None - self.running = True - self.processing_task = asyncio.create_task(self.process_tasks()) - - async def start_processing(self): - """ - Starts processing tasks from the queue by setting the 'running' flag to True and creating - a task to handle the processing. - """ - print(f"\t[ Engine :: Starting task processing ]") - self.running = True - self.processing_task = asyncio.create_task(self.process_tasks()) - - async def stop_processing(self): - """ - Stops task processing by setting the 'running' flag to False and canceling any running task. - """ - print(f"\t[ Engine :: Stopping task processing ]") - self.running = False - if self.processing_task: - self.processing_task.cancel() - try: - await self.processing_task - except asyncio.CancelledError: - pass - - async def process_tasks(self): - """ - Continuously processes tasks from the queue as long as the 'running' flag is True. - Handles task execution, completion, and potential errors. - """ - print(f"\t[ Engine :: Starting to process tasks ]") - while self.running: - try: - task = await self.task_queue.get() - print(f"\t\t[ Engine :: Processing task: {task['type']} ]") - try: - await self.execute_task(task) - print(f"\t\t[ Engine :: Finished processing task: {task['type']} ]") - finally: - self.task_queue.task_done() - except asyncio.CancelledError: - print(f"\t[ Engine :: Task processing was cancelled ]") - break - except Exception as e: - print(f"\t[ Engine :: Error processing task: {e} ]") - traceback.print_exc() - print(f"\t[ Engine :: Stopped processing tasks ]") - - async def execute_task(self, task): - """ - Executes a task by looking up its handler in the task_handlers dictionary - and passing the relevant task data as arguments to the handler. - - Args: - task: A dictionary representing the task with a 'type' key and additional task-specific data. - """ - task_type = task['type'] - handler = self.task_handlers.get(task_type) - if handler: - print(f"\t\t[ Engine :: Executing task: {task_type} ]") - try: - task_data = task.copy() # Create a copy of the task data - task_data.pop('type') # Remove the 'type' key - await handler(**task_data) # Pass task data (without 'type') as keyword arguments - print(f"\t\t[ Engine :: Finished processing task: {task_type} ]") - except Exception as e: - print(f"\t\t[ Engine :: Error processing task: {e} ]") - traceback.print_exc() - else: - print(f"\t\t\t[ Engine :: No handler registered for task type: {task_type} ]") - diff --git a/topos/channel/debatesim.py b/topos/channel/debatesim.py deleted file mode 100644 index 9cdb0cb..0000000 --- a/topos/channel/debatesim.py +++ /dev/null @@ -1,797 +0,0 @@ -# topos/channel/debatesim.py -import hashlib -import asyncio - -import traceback - -from typing import Dict, List -import pprint - -import os -from queue import Queue - -from datetime import datetime, timedelta, UTC -import time - -from dotenv import load_dotenv - -from uuid import uuid4 - -import json -import jwt -from jwt.exceptions import InvalidTokenError - -from transformers import AutoTokenizer, AutoModel -from sentence_transformers import SentenceTransformer -import torch -from sklearn.metrics.pairwise import cosine_similarity -from nltk.tokenize import sent_tokenize -import numpy as np -from scipy.stats import entropy - -from fastapi import WebSocket, WebSocketDisconnect -from ..FC.argument_detection import ArgumentDetection -from ..config import get_openai_api_key -from ..models.llm_classes import vision_models -from ..services.database.app_state import AppState -from ..utilities.utils import create_conversation_string -from ..services.classification_service.base_analysis import base_text_classifier, base_token_classifier -from topos.FC.conversation_cache_manager import ConversationCacheManager -from topos.FC.semantic_compression import SemanticCompression -from topos.FC.ontological_feature_detection import OntologicalFeatureDetection - -from topos.channel.channel_engine import ChannelEngine - -# chess is more complicated than checkers but less complicated than go - -# current: -# graph LR -# timestamp["Timestamp: 2024-06-08T23:47:36.059626"] -# user["user (USER)"] -# sessionTEMP["sessionTEMP (SESSION)"] -# userPRIME["userPRIME (USER)"] -# than --> checkers -# sessionTEMP --> of -# checkers --> complicated -# message --> is -# userPRIME --> for -# is --> chess -# is --> message -# checkers --> than -# of --> sessionTEMP -# chess --> is -# for --> userPRIME -# complicated --> is -# timestamp --> user - -# target: -# graph LR -# userPRIME["userPRIME (USER)"] -# sessionTEMP["sessionTEMP (SESSION)"] -# timestamp["Timestamp: 2024-06-08T23:18:05.206590"] -# message["message"] -# chess["chess"] -# more_complicated["more complicated"] -# checkers["checkers"] -# less_complicated["less complicated"] -# go["go"] -# -# userPRIME --> user -# sessionTEMP --> session -# timestamp --> user -# message --> userPRIME -# message --> sessionTEMP -# message --> timestamp -# chess --> message -# more_complicated --> chess -# more_complicated --> checkers -# less_complicated --> chess -# less_complicated --> go - - -class Cluster: - def __init__(self, cluster_id, sentences, user_id, generation, session_id, coherence): - self.cluster_id = cluster_id - self.sentences = sentences - self.cluster_hash = self.generate_hash() - self.user_id = user_id - self.generation = generation - self.session_id = session_id - self.coherence = coherence - self.wepcc_result = None - - def generate_hash(self): - sorted_sentences = sorted(self.sentences) - return hashlib.sha256(json.dumps(sorted_sentences).encode()).hexdigest() - - def to_dict(self): - return { - "cluster_id": self.cluster_id, - "sentences": self.sentences, - "cluster_hash": self.cluster_hash, - "user_id": self.user_id, - "generation": self.generation, - "session_id": self.session_id, - "coherence": self.coherence, - "wepcc_result": self.wepcc_result, - } - - def update_wepcc(self, wepcc_result): - self.wepcc_result = wepcc_result - - - -class DebateSimulator: - _instance = None - _lock = asyncio.Lock() - - @staticmethod - async def get_instance(): - if DebateSimulator._instance is None: - async with DebateSimulator._lock: - if DebateSimulator._instance is None: - DebateSimulator._instance = DebateSimulator() - return DebateSimulator._instance - - def __init__(self, use_neo4j=False): - if DebateSimulator._instance is not None: - raise Exception("This class is a singleton!") - else: - - if AppState._instance is None: - AppState(use_neo4j=use_neo4j) - - load_dotenv() # Load environment variables - - # Load the pre-trained model and tokenizer - self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') - self.model = AutoModel.from_pretrained('bert-base-uncased') - - self.operational_llm_model = "ollama:dolphin-llama3" - self.argument_detection_llm_model = "ollama:dolphin-llama3" - # self.argument_detection_llm_model = "claude:claude-3-5-sonnet-20240620" - # self.argument_detection_llm_model = "openai:gpt-4o" - - ONE_API_API_KEY = os.getenv("ONE_API_API_KEY") - - # Initialize the SentenceTransformer model for embedding text - # self.fast_embedding_model = SentenceTransformer('all-MiniLM-L6-v2') - self.fast_embedding_model = SentenceTransformer('all-mpnet-base-v2') - - self.argument_detection = ArgumentDetection(model=self.argument_detection_llm_model, api_key=ONE_API_API_KEY) - - self.semantic_compression = SemanticCompression(model=self.operational_llm_model, api_key="ollama") - - self.app_state = AppState.get_instance() - - neo4j_uri = os.getenv("NEO4J_URI") - neo4j_user = os.getenv("NEO4J_USER") - neo4j_password = os.getenv("NEO4J_PASSWORD") - self.showroom_db_name = os.getenv("NEO4J_SHOWROOM_DATABASE") - self.use_neo4j = use_neo4j - - # self.cache_manager = ConversationCacheManager() - self.ontological_feature_detection = OntologicalFeatureDetection(neo4j_uri, neo4j_user, neo4j_password, - self.showroom_db_name, self.use_neo4j) - - # JWT secret key (should be securely stored, e.g., in environment variables) - self.jwt_secret = os.getenv("JWT_SECRET") - - self.current_generation = None - self.websocket_groups = {} - - self.channel_engine = ChannelEngine() - self.channel_engine.register_task_handler('check_and_reflect', self.check_and_reflect) # Register the handler - self.channel_engine.register_task_handler('broadcast', self.websocket_broadcast) - - def generate_jwt_token(self, user_id, session_id): - payload = { - "user_id": user_id, - "session_id": session_id, - "exp": datetime.now(UTC) + timedelta(hours=1) # Token valid for 1 hour - } - token = jwt.encode(payload, self.jwt_secret, algorithm="HS256") - return token - - async def add_to_websocket_group(self, session_id, websocket): - await self._lock.acquire() - try: - if session_id not in self.websocket_groups: - self.websocket_groups[session_id] = [] - self.websocket_groups[session_id].append(websocket) - finally: - self._lock.release() - - async def remove_from_websocket_group(self, session_id, websocket): - await self._lock.acquire() - try: - if session_id in self.websocket_groups: - self.websocket_groups[session_id].remove(websocket) - if not self.websocket_groups[session_id]: - del self.websocket_groups[session_id] - finally: - self._lock.release() - - # async def execute_task(self, task): - # print(f"Executing task: {task['type']}") - # if task['type'] == 'check_and_reflect': - # self.current_generation = task['generation_nonce'] - # await self.check_and_reflect(task['session_id'], task['user_id'], task['generation_nonce'], - # task['message_id'], task['message']) - # elif task['type'] == 'broadcast': - # await self.websocket_broadcast(task['websocket'], task['message']) - # # print(f"Finished executing task: {task['type']}") - - async def websocket_broadcast(self, websocket, message): - if message: - await websocket.send_text(message) - - async def stop_all_reflect_tasks(self): - await self.channel_engine.reset_processing_queue() - - async def get_ontology(self, user_id, session_id, message_id, message): - composable_string = f"for user {user_id}, of {session_id}, the message is: {message}" - # print(f"\t\t[ composable_string :: {composable_string} ]") - - entities, pos_tags, dependencies, relations, srl_results, timestamp, context_entities = self.ontological_feature_detection.build_ontology_from_paragraph( - user_id, session_id, message_id, composable_string) - - if self.use_neo4j: - self.ontological_feature_detection.store_ontology(user_id, session_id, message_id, message, timestamp, context_entities, relations) - - input_components = message, entities, dependencies, relations, srl_results, timestamp, context_entities - - mermaid_syntax = self.ontological_feature_detection.extract_mermaid_syntax(input_components, input_type="components") - return mermaid_syntax - - def has_message_id(self, message_id): - if self.use_neo4j: - return self.ontological_feature_detection.check_message_exists(message_id) - else: - return False - - async def integrate(self, token, data, app_state, cancel_old_tasks): - payload = json.loads(data) - message = payload["message"] - - # create a new message id, with 36 characters max - message_id = str(uuid4()) - - # check for collisions - while self.has_message_id(message_id): - # re-roll a new message id, with 36 characters max - message_id = str(uuid4()) - - # Decode JWT token to extract user_id and session_id - try: - decoded_token = jwt.decode(token, self.jwt_secret, algorithms=["HS256"]) - user_id = decoded_token.get("user_id", "") - except InvalidTokenError: - print(f"Invalid JWT token error :: {token}") - # await websocket.send_json({"status": "error", "response": "Invalid JWT token"}) - return - - session_id = payload.get("session_id", "") - - # if no user_id, bail - if user_id == "" or session_id == "": - return - - current_topic = payload.get("topic", "Unknown") - - # from app state - message_history = app_state.get_value(f"message_history_{session_id}", []) - - prior_ontology = app_state.get_value(f"prior_ontology_{session_id}", []) - - current_ontology = await self.get_ontology(user_id, session_id, message_id, message) - - # print(f"[ prior_ontology: {prior_ontology} ]") - # print(f"[ current_ontology: {current_ontology} ]") - - prior_ontology.append(current_ontology) - - app_state.set_state(f"prior_ontology_{session_id}", prior_ontology) - - mermaid_to_ascii = self.ontological_feature_detection.mermaid_to_ascii(current_ontology) - # print(f"[ mermaid_to_ascii: {mermaid_to_ascii} ]") - - new_history_item = { - "data": { - "user_id": user_id, - "content": message, - "timestamp": datetime.now().isoformat(), - "message_id": message_id, - "topic": current_topic - }, - "ontology": current_ontology, - "mermaid": mermaid_to_ascii - } - - message_history.append(new_history_item) - - app_state.set_state(f"message_history_{session_id}", message_history) - - # Create new Generation - generation_nonce = self.generate_nonce() - - if cancel_old_tasks: - await self.stop_all_reflect_tasks() - - # print(f"Creating check_and_reflect task for message: {message_id}") - task = { - 'type': 'check_and_reflect', - 'session_id': session_id, - 'user_id': user_id, - 'generation_nonce': generation_nonce, - 'message_id': message_id, - 'message': message - } - print(f"Task created: {task}") - await self.channel_engine.add_task(task) - # print(f"Task added to queue for message: {message_id}") - - return current_ontology, message_id - - @staticmethod - def generate_nonce(): - return str(uuid4()) - - @staticmethod - def break_into_sentences(messages, min_words=20): - output = [] - for message in messages: - content = message["data"]["content"].strip() # Remove leading/trailing whitespace - sentences = sent_tokenize(content) - - current_sentence = [] - - for sentence in sentences: - sentence = sentence.strip() # Remove leading/trailing whitespace - if not sentence: - continue # Skip empty sentences - - words = sentence.split() - if len(current_sentence) + len(words) >= min_words: - current_sentence.extend(words) # Extend current sentence before appending - output.append({"role": message["role"], "data": {"user_id": message["data"]["user_id"], - "content": " ".join(current_sentence)}}) - current_sentence = [] # Reset current_sentence after appending - else: - current_sentence.extend(words) - - if current_sentence: - output.append({"role": message["role"], - "data": {"user_id": message["data"]["user_id"], "content": " ".join(current_sentence)}}) - - return output - - @staticmethod - def aggregate_user_messages(message_history: List[Dict]) -> Dict[str, List[str]]: - user_messages = {} - for message in message_history: - user_id = message['data']['user_id'] - content = message['data']['content'] - if user_id not in user_messages: - user_messages[user_id] = [] - user_messages[user_id].append(content) - return user_messages - - def incremental_clustering(self, clusters, previous_clusters): - updated_clusters = {} - for user_id, user_clusters in clusters.items(): - if user_id not in previous_clusters: - updated_clusters[user_id] = user_clusters - else: - updated_clusters[user_id] = {} - for cluster_id, cluster in user_clusters.items(): - previous_cluster_hash = previous_clusters[user_id].get(cluster_id, None) - if not previous_cluster_hash or cluster.cluster_hash != previous_cluster_hash.cluster_hash: - updated_clusters[user_id][cluster_id] = cluster - - return updated_clusters - - async def broadcast_to_websocket_group(self, session_id, json_message): - await self._lock.acquire() - try: - websockets = self.websocket_groups.get(session_id, []) - - for websocket in websockets: - await websocket.send_json(json_message) - finally: - self._lock.release() - - def check_generation_halting(self, generation_nonce): - if self.current_generation is not None and self.current_generation != generation_nonce: - return True - - return False - - async def check_and_reflect(self, session_id, user_id, generation_nonce, message_id, message): - print(f"\t[ check_and_reflect started for message: {message_id} ]") - # "Reflect" - # cluster message callback - # each cluster is defined by a cluster id (a hash of its messages, messages sorted alphabetically) - - # 1. early out if the cluster is identical - # 2. total message completion is based on all messages (a generation) - # 3. previous generations DO NOT complete - they are halted upon a new message - # 4. clustering will not be affected by other Users if their message has not changed, but generations - # always will because every new message from another player is dealt with re: claims/counterclaims - # 5. technically, each generation has a final score (though because of processing reqs, we're not expecting - # to have more than each generation w/ a final score, technically this can be done as well, but - # it's probably not germane to the convo needs, so let's just not) - - # prioritize wepcc (warrant evidence persuasiveness/justification claim counterclaim) for the user's cluster - - app_state = AppState().get_instance() - - message_history = app_state.get_value(f"message_history_{session_id}", []) - - # Step 1: Gather message history for specific users - user_messages = self.aggregate_user_messages(message_history) - # print(f"\t[ reflect :: user_messages :: {user_messages} ]") - - # Step 2: Cluster analysis for each user's messages - clusters = self.cluster_messages(user_messages, generation_nonce, session_id) - # print(f"\t[ reflect :: clustered_messages :: {len(clusters)} ]") - - # Send initial cluster data back to frontend - await self.broadcast_to_websocket_group(session_id, { - "status": "initial_clusters", - "clusters": {user_id: [cluster.to_dict() for cluster in user_clusters.values()] for user_id, user_clusters - in clusters.items()}, - "generation": generation_nonce - }) - if self.check_generation_halting(generation_nonce) is True: - return - - # Perform incremental clustering if needed - previous_clusters = app_state.get_value(f"previous_clusters_{session_id}_{user_id}", {}) - - # Extract properly ID-matching clusters from previous_clusters - for user_id, user_clusters in clusters.items(): - if user_id in previous_clusters: - for cluster_id, cluster in user_clusters.items(): - if cluster_id in previous_clusters[user_id]: - cluster.update_wepcc(previous_clusters[user_id][cluster_id].wepcc_result) - - updated_clusters = self.incremental_clustering(clusters, previous_clusters) - - # Send updated cluster data back to frontend - await self.broadcast_to_websocket_group(session_id, { - "status": "updated_clusters", - "clusters": {user_id: [cluster.to_dict() for cluster in user_clusters.values()] for user_id, user_clusters - in updated_clusters.items()}, - "generation": generation_nonce - }) - if self.check_generation_halting(generation_nonce) is True: - return - - async def report_wepcc_result(generation_nonce, user_id, cluster_id, cluster_hash, wepcc_result): - await self.broadcast_to_websocket_group(session_id, { - "status": "wepcc_result", - "generation": generation_nonce, - "user_id": user_id, - "cluster_id": cluster_id, - "cluster_hash": cluster_hash, - "wepcc_result": wepcc_result, - }) - if self.check_generation_halting(generation_nonce) is True: - return - - # Step 3: Run WEPCC on each cluster - # these each take a bit to process, so we're passing in the websocket group to stream the results back out - # due to timing these may be inconsequential re: generation, but they're going to send back the results anyhow. - wepcc_results = await self.wepcc_cluster(updated_clusters, report_wepcc_result) - # print(f"\t[ reflect :: wepcc_results :: {wepcc_results} ]") - - # Update clusters with WEPCC results - for user_id, user_clusters in updated_clusters.items(): - for cluster_id, cluster in user_clusters.items(): - if cluster_id in wepcc_results[user_id]: - wepcc = wepcc_results[user_id][cluster_id] - cluster.update_wepcc(wepcc) - - app_state.set_state(f"previous_clusters_{session_id}_{user_id}", clusters) - - # Check if there are enough clusters or users to perform argument matching - if len(clusters) < 2: - print("\t[ reflect :: Not enough clusters, but returning user's clusters ]") - - # Initialize shadow coverage with no coverage - cluster_shadow_coverage = {user_id: {} for user_id in clusters.keys()} - - # Assume wepcc_results are already calculated earlier in the process - unaddressed_score_multiplier = 2.5 # Example multiplier - - # Call gather_final_results - aggregated_scores, addressed_clusters, unaddressed_clusters, results = self.gather_final_results( - cluster_shadow_coverage, clusters, unaddressed_score_multiplier - ) - - await self.broadcast_to_websocket_group(session_id, { - "status": "final_results", - "generation": generation_nonce, - "aggregated_scores": aggregated_scores, - "addressed_clusters": addressed_clusters, - "unaddressed_clusters": unaddressed_clusters, - "results": results, - }) - - return - - # Define similarity cutoff threshold - cutoff = 0.35 - - # Define unaddressed score multiplier - unaddressed_score_multiplier = 2.5 - - # Initialize phase similarity and cluster weight modulator - # Step 4: Match each user's Counterclaims with all other users' Claims - # This function takes a moment, as it does an embedding check. Not super heavy, but with enough participants - # certainly an async operation - cluster_weight_modulator = self.get_cluster_weight_modulator(clusters, cutoff) - - # Step 5: Calculate the counter-factual shadow coverage for each cluster - # Create a new dictionary to hold the final combined scores - # This function is very fast, relatively speaking - cluster_shadow_coverage = self.get_cluster_shadow_coverage(cluster_weight_modulator, cutoff) - - # Step 6: Final aggregation and ranking - (aggregated_scores, - addressed_clusters, - unaddressed_clusters, - results) = self.gather_final_results(cluster_shadow_coverage, clusters, unaddressed_score_multiplier) - - print(f"\t[ reflect :: aggregated_scores :: {aggregated_scores} ]") - print(f"\t[ reflect :: addressed_clusters :: {addressed_clusters} ]") - print(f"\t[ reflect :: unaddressed_clusters :: {unaddressed_clusters} ]") - - # Print the number of unaddressed clusters for each user - print("\nUnaddressed Clusters Summary:") - for user_id, unaddressed_list in unaddressed_clusters.items(): - num_unaddressed = len(unaddressed_list) - print(f"\t\t[ User {user_id}: {num_unaddressed} unaddressed cluster(s) ]") - - - app_state.set_state("wepcc_results", wepcc_results) - app_state.set_state("aggregated_scores", aggregated_scores) - app_state.set_state("addressed_clusters", addressed_clusters) - app_state.set_state("unaddressed_clusters", unaddressed_clusters) - - await self.broadcast_to_websocket_group(session_id, { - "status": "final_results", - "generation": generation_nonce, - "aggregated_scores": aggregated_scores, - "addressed_clusters": addressed_clusters, - "unaddressed_clusters": unaddressed_clusters, - "results": results, - }) - - print(f"\t[ check_and_reflect :: Completed ]") - - def cluster_messages(self, user_messages, generation, session_id): - clustered_messages = {} - for user_id, messages in user_messages.items(): - if len(messages) > 1: - clusters, coherence_scores = self.argument_detection.cluster_sentences(messages, distance_threshold=0.3) - clustered_messages[user_id] = { - int(cluster_id): Cluster(cluster_id=int(cluster_id), - sentences=cluster_sentences, - user_id=user_id, - generation=generation, - session_id=session_id, - coherence=coherence_scores.get(cluster_id, 1.0)) - for cluster_id, cluster_sentences in clusters.items() - } - elif len(messages) == 1: - # Create a single cluster for the lone message - cluster_id = 0 - clustered_messages[user_id] = { - cluster_id: Cluster(cluster_id=cluster_id, - sentences=messages, - user_id=user_id, - generation=generation, - - session_id=session_id, - coherence=1.0) # Single message cluster always has perfect coherence - } - return clustered_messages - - async def wepcc_cluster(self, clusters: Dict[str, Cluster], report_wepcc_result): - wepcc_results = {} - for user_id, user_clusters in clusters.items(): - wepcc_results[user_id] = {} - for cluster_id, cluster in user_clusters.items(): - # print(f"\t[ reflect :: Running WEPCC for user {user_id}, cluster {cluster_id} ]") - warrant, evidence, persuasiveness_justification, claim, counterclaim = self.argument_detection.fetch_argument_definition( - cluster.sentences) - wepcc_results[user_id][cluster_id] = { - 'warrant': warrant, - 'evidence': evidence, - 'persuasiveness_justification': persuasiveness_justification, - 'claim': claim, - 'counterclaim': counterclaim - } - self.pretty_print_wepcc_result(user_id, cluster_id, wepcc_results[user_id][cluster_id]) - # print( - # f"\t[ reflect :: WEPCC for user {user_id}, cluster {cluster_id} :: {wepcc_results[user_id][cluster_id]} ]") - - # Output to websocket - await report_wepcc_result(cluster.cluster_hash, user_id, cluster_id, cluster.cluster_hash, - wepcc_results[user_id][cluster_id]) - return wepcc_results - - def get_cluster_weight_modulator(self, clusters, cutoff): - cluster_weight_modulator = {} - for user_idA, user_clustersA in clusters.items(): - cluster_weight_modulator[user_idA] = cluster_weight_modulator.get(user_idA, {}) - - for cluster_idA, clusterA in user_clustersA.items(): - phase_sim_A = [] - wepcc_cluster_a = clusterA.wepcc_result - counterclaim_a = json.loads(wepcc_cluster_a['counterclaim']) - counterclaim_embedding = self.fast_embedding_model.encode(counterclaim_a['content']) - - for user_idB, user_clustersB in clusters.items(): - if user_idA != user_idB: - for cluster_idB, clusterB in user_clustersB.items(): - wepcc_cluster_b = clusterB.wepcc_result - claim_b = json.loads(wepcc_cluster_b['claim']) - - # Calculate cosine similarity between counterclaims and claims - claim_embedding = self.fast_embedding_model.encode(claim_b['content']) - sim_score = cosine_similarity([counterclaim_embedding], [claim_embedding])[0][0] - print( - f"\t[ reflect :: Sim score between {user_idA}'s counterclaim (cluster {cluster_idA}) and {user_idB}'s claim (cluster {cluster_idB}) :: {sim_score} ]") - if sim_score > cutoff: - phase_sim_A.append((sim_score, cluster_idB, user_idB)) - if cluster_idA not in cluster_weight_modulator[user_idA]: - cluster_weight_modulator[user_idA][cluster_idA] = [] - for sim_score, cluster_idB, user_idB in phase_sim_A: - normalized_value = (sim_score - cutoff) / (1 - cutoff) - cluster_weight_modulator[user_idA][cluster_idA].append(normalized_value) - print( - f"\t[ reflect :: Normalized value for {user_idA} (cluster {cluster_idA}) :: {normalized_value} ]") - return cluster_weight_modulator - - def gather_final_results(self, cluster_shadow_coverage, clusters, unaddressed_score_multiplier): - aggregated_scores = {} - addressed_clusters = {} - unaddressed_clusters = {} - - results = [] - - for user_id, weight_mods in cluster_shadow_coverage.items(): - total_score = 0 - addressed_clusters[user_id] = [] - unaddressed_clusters[user_id] = [] - - user_result = {"user": user_id, "clusters": []} - - for cluster_id, modulator in weight_mods.items(): - try: - cluster = clusters[user_id][cluster_id] - persuasiveness_score = float( - json.loads(cluster.wepcc_result['persuasiveness_justification'])['content'][ - 'persuasiveness_score']) - - addressed_score = (1 - modulator) * persuasiveness_score - total_score += addressed_score - addressed_clusters[user_id].append((cluster_id, addressed_score)) - user_result["clusters"].append({ - "cluster": cluster_id, - "type": "addressed", - "score": addressed_score - }) - print( - f"\t[ reflect :: Addressed score for User {user_id}, Cluster {cluster_id} :: {addressed_score} ]") - except (json.JSONDecodeError, KeyError) as e: - print(f"\t[ reflect :: Error for User {user_id}, Cluster {cluster_id} :: {e} ]") - - # Add unaddressed arguments' scores - for cluster_id, cluster in clusters[user_id].items(): - if cluster_id not in weight_mods: - try: - persuasiveness_score = float( - json.loads(cluster.wepcc_result['persuasiveness_justification']) - ['content']['persuasiveness_score']) - - unaddressed_score = persuasiveness_score * unaddressed_score_multiplier - total_score += unaddressed_score - unaddressed_clusters[user_id].append((cluster_id, unaddressed_score)) - user_result["clusters"].append({ - "cluster": cluster_id, - "type": "unaddressed", - "score": unaddressed_score - }) - print( - f"\t[ reflect :: Unaddressed score for User {user_id}, Cluster {cluster_id} :: {unaddressed_score} ]") - except (json.JSONDecodeError, KeyError) as e: - print(f"\t[ reflect :: Error for User {user_id}, Cluster {cluster_id} :: {e} ]") - - aggregated_scores[user_id] = total_score - user_result["total_score"] = total_score - results.append(user_result) - print(f"\t[ reflect :: Aggregated score for User {user_id} :: {total_score} ]") - - # Process remaining clusters without shadow coverage - for user_id, user_clusters in clusters.items(): - if user_id not in aggregated_scores: - total_score = 0 - addressed_clusters[user_id] = [] - unaddressed_clusters[user_id] = [] - - user_result = {"user": user_id, "clusters": []} - - for cluster_id, cluster in user_clusters.items(): - if cluster_id not in cluster_shadow_coverage.get(user_id, {}): - try: - persuasiveness_score = float( - json.loads(cluster.wepcc_result['persuasiveness_justification'])['content'][ - 'persuasiveness_score']) - - unaddressed_score = persuasiveness_score * unaddressed_score_multiplier - total_score += unaddressed_score - unaddressed_clusters[user_id].append((cluster_id, unaddressed_score)) - user_result["clusters"].append({ - "cluster": cluster_id, - "type": "unaddressed", - "score": unaddressed_score - }) - print( - f"\t[ reflect :: Unaddressed score for User {user_id}, Cluster {cluster_id} :: {unaddressed_score} ]") - except (json.JSONDecodeError, KeyError) as e: - print(f"\t[ reflect :: Error for User {user_id}, Cluster {cluster_id} :: {e} ]") - - aggregated_scores[user_id] = total_score - user_result["total_score"] = total_score - results.append(user_result) - print(f"\t[ reflect :: Aggregated score for User {user_id} :: {total_score} ]") - - return aggregated_scores, addressed_clusters, unaddressed_clusters, results - - def get_cluster_shadow_coverage(self, cluster_weight_modulator, cutoff): - final_scores = {} - - # Post-process the collected normalized values for each cluster - for user_id, cluster_data in cluster_weight_modulator.items(): - final_scores[user_id] = final_scores.get(user_id, {}) - for cluster_idA, normalized_values in cluster_data.items(): - if normalized_values: - highest = max(normalized_values) - shadow_coverage = highest - for value in normalized_values: - if value != highest: - shadow_coverage += (value * (1.0 - cutoff)) * (1 - shadow_coverage) - # Since we're adding coverage, shadow_coverage should naturally stay within [0,1] - # No need to clamp or use min - - # Initialize the nested dictionary if it doesn't exist - if cluster_idA not in final_scores[user_id]: - final_scores[user_id][cluster_idA] = 0 - - # Store the final score - final_scores[user_id][cluster_idA] = shadow_coverage - print( - f"\t[ reflect :: Combined score for {user_id} (cluster {cluster_idA}) :: {shadow_coverage} ]") - - return final_scores - - def pretty_print_wepcc_result(self, user_id, cluster_id, wepcc_result): - print(f"\t[ reflect :: WEPCC for user {user_id}, cluster {cluster_id} ]") - - print("\nWarrant:") - print(json.loads(wepcc_result["warrant"])["content"]) - print("\nEvidence:") - print(json.loads(wepcc_result["evidence"])["content"]) - print("\nPersuasiveness Justification:") - print("Persuasiveness Score:", json.loads(wepcc_result["persuasiveness_justification"])["content"]["persuasiveness_score"]) - print("Justification:", json.loads(wepcc_result["persuasiveness_justification"])["content"]["justification"]) - print("\nClaim:") - print(json.loads(wepcc_result["claim"])["content"]) - print("\nCounterclaim:") - print(json.loads(wepcc_result["counterclaim"])["content"]) - print("\n") - diff --git a/topos/channel/experimental/debatesim_experimental_think.py b/topos/channel/experimental/debatesim_experimental_think.py deleted file mode 100644 index 3189547..0000000 --- a/topos/channel/experimental/debatesim_experimental_think.py +++ /dev/null @@ -1,892 +0,0 @@ -# topos/channel/debatesim.py -import hashlib - -from typing import Dict, List - -import os -import uuid -from uuid import uuid4 -import threading -from queue import Queue - -from dotenv import load_dotenv - -import jwt -from jwt.exceptions import InvalidTokenError - -import json -from datetime import datetime -import time - -from transformers import AutoTokenizer, AutoModel -from sentence_transformers import SentenceTransformer -import torch -from sklearn.metrics.pairwise import cosine_similarity -import numpy as np -from scipy.stats import entropy - -from fastapi import WebSocket, WebSocketDisconnect -from ..FC.argument_detection import ArgumentDetection -from ..config import get_openai_api_key -from ..models.llm_classes import vision_models -from ..generations.chat_gens import LLMController -from ..services.database.app_state import AppState -from ..utilities.utils import create_conversation_string -from ..services.classification_service.base_analysis import base_text_classifier, base_token_classifier -from topos.FC.conversation_cache_manager import ConversationCacheManager -from topos.FC.semantic_compression import SemanticCompression -from topos.FC.ontological_feature_detection import OntologicalFeatureDetection - -# chess is more complicated than checkers but less complicated than go - -# current: -# graph LR -# timestamp["Timestamp: 2024-06-08T23:47:36.059626"] -# user["user (USER)"] -# sessionTEMP["sessionTEMP (SESSION)"] -# userPRIME["userPRIME (USER)"] -# than --> checkers -# sessionTEMP --> of -# checkers --> complicated -# message --> is -# userPRIME --> for -# is --> chess -# is --> message -# checkers --> than -# of --> sessionTEMP -# chess --> is -# for --> userPRIME -# complicated --> is -# timestamp --> user - -# target: -# graph LR -# userPRIME["userPRIME (USER)"] -# sessionTEMP["sessionTEMP (SESSION)"] -# timestamp["Timestamp: 2024-06-08T23:18:05.206590"] -# message["message"] -# chess["chess"] -# more_complicated["more complicated"] -# checkers["checkers"] -# less_complicated["less complicated"] -# go["go"] -# -# userPRIME --> user -# sessionTEMP --> session -# timestamp --> user -# message --> userPRIME -# message --> sessionTEMP -# message --> timestamp -# chess --> message -# more_complicated --> chess -# more_complicated --> checkers -# less_complicated --> chess -# less_complicated --> go - - -class Cluster: - def __init__(self, cluster_id, sentences, user_id, generation, session_id): - self.cluster_id = cluster_id - self.sentences = sentences - self.cluster_hash = self.generate_hash() - self.user_id = user_id - self.generation = generation - self.session_id = session_id - - def generate_hash(self): - sorted_sentences = sorted(self.sentences) - return hashlib.sha256(json.dumps(sorted_sentences).encode()).hexdigest() - - def to_dict(self): - return { - "cluster_id": self.cluster_id, - "sentences": self.sentences, - "cluster_hash": self.cluster_hash, - "user_id": self.user_id, - "generation": self.generation, - "session_id": self.session_id - } - - -class DebateSimulatorThink: - _instance = None - _lock = threading.Lock() - - @staticmethod - def get_instance(): - if DebateSimulator._instance is None: - with DebateSimulator._lock: - if DebateSimulator._instance is None: - DebateSimulator._instance = DebateSimulator() - return DebateSimulator._instance - - def __init__(self, use_neo4j=False): - if DebateSimulator._instance is not None: - raise Exception("This class is a singleton!") - else: - # Load the pre-trained model and tokenizer - self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') - self.model = AutoModel.from_pretrained('bert-base-uncased') - - self.operational_llm_model = "ollama:dolphin-llama3" - - # Initialize the SentenceTransformer model for embedding text - self.fast_embedding_model = SentenceTransformer('all-MiniLM-L6-v2') - self.argument_detection = ArgumentDetection(model=self.operational_llm_model, api_key="ollama") - - self.semantic_compression = SemanticCompression(model=self.operational_llm_model, api_key="ollama") - - self.app_state = AppState.get_instance() - - load_dotenv() # Load environment variables - - neo4j_uri = os.getenv("NEO4J_URI") - neo4j_user = os.getenv("NEO4J_USER") - neo4j_password = os.getenv("NEO4J_PASSWORD") - self.showroom_db_name = os.getenv("NEO4J_SHOWROOM_DATABASE") - self.use_neo4j = use_neo4j - - # self.cache_manager = ConversationCacheManager() - self.ontological_feature_detection = OntologicalFeatureDetection(neo4j_uri, neo4j_user, neo4j_password, - self.showroom_db_name, self.use_neo4j) - - # JWT secret key (should be securely stored, e.g., in environment variables) - self.jwt_secret = os.getenv("JWT_SECRET") - - self.task_queue = Queue() - self.processing_thread = threading.Thread(target=self.process_tasks) - self.processing_thread.daemon = True - self.processing_thread.start() - - def add_task(self, task): - self.task_queue.put(task) - - - def websocket_broadcast(self, websocket, message): - while True: - if message: # Condition to broadcast - websocket.send(message) - time.sleep(1) # Adjust as necessary - - # Function to start the subprocess - def start_broadcast_subprocess(self, websocket, message): - broadcast_thread = threading.Thread(target=self.websocket_broadcast, args=(websocket, message)) - broadcast_thread.start() - - def get_ontology(self, user_id, session_id, message_id, message): - composable_string = f"for user {user_id}, of {session_id}, the message is: {message}" - print(f"\t\t[ composable_string :: {composable_string} ]") - - entities, pos_tags, dependencies, relations, srl_results, timestamp, context_entities = self.ontological_feature_detection.build_ontology_from_paragraph( - user_id, session_id, message_id, composable_string) - - if self.use_neo4j: - self.ontological_feature_detection.store_ontology(user_id, session_id, message_id, message, timestamp, context_entities, relations) - - input_components = message, entities, dependencies, relations, srl_results, timestamp, context_entities - - mermaid_syntax = self.ontological_feature_detection.extract_mermaid_syntax(input_components, input_type="components") - return mermaid_syntax - - def search_messages_by_user(self, user_id): - return self.ontological_feature_detection.get_messages_by_user(user_id) - - def search_messages_by_session(self, session_id, relation_type): - return self.ontological_feature_detection.get_messages_by_session(session_id, relation_type) - - def search_users_by_session(self, session_id, relation_type): - return self.ontological_feature_detection.get_users_by_session(session_id, relation_type) - - def search_sessions_by_user(self, user_id, relation_type): - return self.ontological_feature_detection.get_sessions_by_user(user_id, relation_type) - - def has_message_id(self, message_id): - if self.use_neo4j: - return self.ontological_feature_detection.check_message_exists(message_id) - else: - return False - - # @note: integrate is post, due to constant - async def integrate(self, token, data, app_state): - payload = json.loads(data) - message = payload["message"] - - # create a new message id, with 36 characters max - message_id = str(uuid4()) - - # check for collisions - while self.has_message_id(message_id): - # re-roll a new message id, with 36 characters max - message_id = str(uuid4()) - - # Decode JWT token to extract user_id and session_id - try: - decoded_token = jwt.decode(token, self.jwt_secret, algorithms=["HS256"]) - user_id = decoded_token.get("user_id", "") - session_id = decoded_token.get("session_id", "") - except InvalidTokenError: - # await websocket.send_json({"status": "error", "response": "Invalid JWT token"}) - return - - # if no user_id, bail - if user_id == "" or session_id == "": - return - - current_topic = payload.get("topic", "Unknown") - - # from app state - message_history = app_state.get_value(f"message_history_{session_id}", []) - - prior_ontology = app_state.get_value(f"prior_ontology_{session_id}", []) - - current_ontology = self.get_ontology(user_id, session_id, message_id, message) - - print(f"[ prior_ontology: {prior_ontology} ]") - print(f"[ current_ontology: {current_ontology} ]") - - prior_ontology.append(current_ontology) - - app_state.set_state(f"prior_ontology_{session_id}", prior_ontology) - - mermaid_to_ascii = self.ontological_feature_detection.mermaid_to_ascii(current_ontology) - print(f"[ mermaid_to_ascii: {mermaid_to_ascii} ]") - - message_history.append(message) - - app_state.set_state(f"message_history_{session_id}", message_history) - - # Create new Generation - generation_nonce = self.generate_nonce() - - self.add_task({ - 'type': 'check_and_reflect', - 'session_id': session_id, - 'user_id': user_id, - 'generation_nonce': generation_nonce, - 'message_id': message_id, - 'message': message} - ) - - return current_ontology - - @staticmethod - def generate_nonce(): - return str(uuid.uuid4()) - - @staticmethod - def aggregate_user_messages(message_history: List[Dict]) -> Dict[str, List[str]]: - user_messages = {} - for message in message_history: - user_id = message['data']['user_id'] - content = message['data']['content'] - if user_id not in user_messages: - user_messages[user_id] = [] - user_messages[user_id].append(content) - return user_messages - - def incremental_clustering(self, clusters, previous_clusters): - updated_clusters = {} - for user_id, user_clusters in clusters.items(): - if user_id not in previous_clusters: - updated_clusters[user_id] = user_clusters - else: - updated_clusters[user_id] = {} - for cluster_id, cluster in user_clusters.items(): - previous_cluster_hash = previous_clusters[user_id].get(cluster_id, None) - if not previous_cluster_hash or cluster.cluster_hash != previous_cluster_hash.cluster_hash: - updated_clusters[user_id][cluster_id] = cluster - - return updated_clusters - - async def broadcast_to_websocket_group(self, websocket_group, json_message): - for websocket in websocket_group: - await websocket.send_json(json_message) - - - async def debate_step(self, websocket: WebSocket, data, app_state): - payload = json.loads(data) - message = payload["message"] - - # create a new message id, with 36 characters max - message_id = str(uuid4()) - - # check for collisions - while self.has_message_id(message_id): - # re-roll a new message id, with 36 characters max - message_id = str(uuid4()) - - user_id = payload.get("user_id", "") - session_id = payload.get("session_id", "") - - user_id = app_state.get_value("user_id", "") - session_id = app_state.get_value("session_id", "") - - # if no user_id, bail - if user_id == "": - await websocket.send_json({"status": "error", "response": "Invalid JSON payload"}) - return - - - # if no session_id, bail - - if session_id == "": - await websocket.send_json({"status": "error", "response": "Invalid JSON payload"}) - return - - # default to app state if not provided - if user_id == "": - user_id = app_state.get_value("user_id", "") - - message_history = payload["message_history"] - - # model specifications - model = payload.get("model", "solar") - provider = payload.get('provider', 'ollama') # defaults to ollama right now - api_key = payload.get('api_key', 'ollama') - - llm_client = LLMController(model_name=model, provider=provider, api_key=api_key) - - temperature = float(payload.get("temperature", 0.04)) - current_topic = payload.get("topic", "Unknown") - - prior_ontology = app_state.get_value("prior_ontology", []) - - # if prior_ontology is []: - # prior_ontology = [] - - current_ontology = self.get_ontology(user_id, session_id, message_id, message) - - mermaid_to_ascii = self.ontological_feature_detection.mermaid_to_ascii(current_ontology) - - print(f"[ prior_ontology: {prior_ontology} ]") - - print(f"[ current_ontology: {current_ontology} ]") - - print(f"[ mermaid_to_ascii: {mermaid_to_ascii} ]") - - prior_ontology.append(current_ontology) - - app_state.set_state("prior_ontology", prior_ontology) - - # algo approach(es): - - # ontological feature detection - - # break previous messages into ontology - # cache ontology - # ** use diffuser to spot differentials - # *** map causal ontology back to spot reference point - # break current message into ontology - # ** BLEU score a 10x return on the ontology - # read ontology + newest - - # await self.think(topic="Chess vs Checkers", prior_ontology=prior_ontology) - - await self.reflect(topic=current_topic, message_history=message_history) - - - # topic3: - # a hat is worn by a person, who has an edge in a meeting due to wearing the hat - - # topic2: - # I think larger nuclear reactors are better than smaller ones - - # topic: - # checkers is better than chess - # - # user1: - # [user1] chess is better than checkers - # - # user2: - # [user2] no, checkers is better than chess - it's faster - # - # user1: - # [user1] I don't believe so - checkers always takes at least a large fixed time to perform moves, and chess can mate in less than 10 if you're good - # - # user2: - # [user2] but checkers is more accessible to a wider audience, and it's easier to learn - # - # user1: - # [user1] that's true, but chess has a richer history and more complex strategies - # - # user2: - # [user2] but checkers is more fun to play, and it's more engaging - - # Set system prompt - has_topic = False - system_prompt = f"" - - user_definition_prompt = f"""----- - Users are defined by the following roles: user1, user2, user3, etc. The moderator is defined by the role: moderator.\n - Roles are indicated by the format: - [user1]: "I have an opinion on XYZ" - [user2]: "I have another opinion on ABC" - [moderator]: "I think the topic might be XYZ" - ------ - """ - - - if current_topic == "unknown topic": - system_prompt = f"""You are a smooth talking, eloquent, poignant, insightful AI moderator. The current topic is unknown, so try not to make any judgements thus far - only re-express the input words in your own style, in the format of:\n - {{\"role\":\"moderator\", \"content\":\"I think the topic might be...(_insert name of what you think the topic might be based on the ongoing discussion here!_)\", \"certainty_score\": \"(_insert certainty score 1-10 here!_)\"}}""" - else: - has_topic = True - system_prompt = f"""You are a smooth talking, eloquent, poignant, insightful AI moderator. The current topic is {current_topic}.\n - You keep track of who is speaking, in the context of saying out loud every round:\n - {{\"role\": \"moderator\", \"content\": \"The topic is...(_insert name of topic here!_)\", \"synopsis\": \"(_insert synopsis of the content so far, with debaters points in abstract_)\", \"affirmative_negative score\": \"(_insert debate affirmative (is affirming the premise of the current \'topic\') score, 1 to 10, here!_) / (_insert debate negative (is not affirming the premise of the current "topic", and is correlated to the inverse of the statement) score, 1 to 10, here!_)\"}}""" - - system_prompt = f"{user_definition_prompt}\n{system_prompt}" - - user_prompt = f"{message}" - - # print(f"\t[ system prompt :: {system_prompt} ]") - print(f"\t[ user prompt :: {user_prompt} ]") - simp_msg_history = [{'role': 'system', 'content': system_prompt}] - - # Simplify message history to required format - for index, message in enumerate(message_history): - message_role = message['role'] - if message_role == "user": - message_user_id = f"{message['data']['user_id']}:" - message_content = message['data']['content'] - else: - message_user_id = "" - message_content = message['content'] - - simplified_message = {'role': message['role'], 'content': f"{message_user_id}{message_content}"} - if 'images' in message: - simplified_message['images'] = message['images'] - - simp_msg_history.append(simplified_message) - - simp_msg_history.append({'role': 'user', 'content': f"{user_id}:{user_prompt}"}) - - # Processing the chat - output_combined = "" - for chunk in llm_client.stream_chat(simp_msg_history, model=model, temperature=temperature): - output_combined += chunk - await websocket.send_json({"status": "generating", "response": output_combined, 'completed': False}) - - output_json = [] - try: - result = json.loads(f"{output_combined}") - output_json = result - except json.JSONDecodeError: - output_json = output_combined - print(f"\t\t[ error in decoding :: {output_combined} ]") - - # Fetch semantic category from the output - semantic_category = self.semantic_compression.fetch_semantic_category(output_combined) - - # Send the final completed message - await websocket.send_json( - {"status": "completed", "response": output_combined, "semantic_category": semantic_category.content, - "completed": True}) - - def embed_text(self, text): - # Tokenize the input text - inputs = self.tokenizer(text, return_tensors='pt') - - # Get the hidden states from the model - with torch.no_grad(): - outputs = self.model(**inputs) - - # Use the [CLS] token representation as the embedding - cls_embedding = outputs.last_hidden_state[:, 0, :].numpy().squeeze() - return cls_embedding - - def compute_semantic_distances(self, ontology, topic_embedding): - # Embed the ontology text - ontology_embedding = self.embed_text(ontology) - - # Compute cosine similarity between ontology and topic - similarity = cosine_similarity([ontology_embedding], [topic_embedding])[0][0] - - return 1 - similarity # Return distance instead of similarity - - def normalize_distances(self, distances): - total = np.sum(distances) - if total == 0: - return np.zeros_like(distances) - return distances / total - - def aggregate_distributions(self, semantic_distances): - # Convert to numpy array for easier manipulation - distances = np.array(semantic_distances) - - # Compute the mean across all distances to form the collective distribution - if len(distances) == 0: - return np.array([0.5]) # Handle empty distributions - collective_distribution = np.mean(distances, axis=0) - - return collective_distribution - - def calculate_kl_divergence(self, p, q): - # Ensure the distributions are numpy arrays - p = np.asarray(p, dtype=float) - q = np.asarray(q, dtype=float) - - # Normalize the distributions - p = p / np.sum(p) - q = q / np.sum(q) - - # Calculate the KL-Divergence - kl_div = entropy(p, q) - - return kl_div - - def calculate_impact_scores(self, kl_divergences, collective_distribution): - # Calculate the impact score for each contribution - impact_scores = [] - for kl_divergence in kl_divergences: - impact_score = kl_divergence - collective_distribution - impact_scores.append(impact_score) - - return impact_scores - - async def think(self, topic, prior_ontology): - print(f"\t[ think :: topic :: {topic} ]") - print(f"\t[ think :: prior_ontology :: {prior_ontology} ]") - - # Embed the topic - topic_embedding = self.embed_text(topic) - - # Compute semantic distances for each contribution - semantic_distances = [] - for ontology in prior_ontology: - distance = self.compute_semantic_distances(ontology, topic_embedding) - print(f"\t\t[ think :: distance :: {distance} ]") - semantic_distances.append(distance) - - # Normalize distances - normalized_distances = self.normalize_distances(semantic_distances) - print(f"\t[ think :: normalized_distances :: {normalized_distances} ]") - - # Aggregate the distributions to form a collective distribution - collective_distribution = self.aggregate_distributions(normalized_distances) - print(f"\t[ think :: collective_distribution :: {collective_distribution} ]") - - # Calculate KL-Divergence for each contribution - kl_divergences = [] - for distance in normalized_distances: - kl_divergence = self.calculate_kl_divergence([distance, 1 - distance], - [collective_distribution, 1 - collective_distribution]) - print(f"\t\t[ think :: kl_divergence :: {kl_divergence} ]") - kl_divergences.append(kl_divergence) - - # Calculate impact scores - impact_scores = self.calculate_impact_scores(kl_divergences, collective_distribution) - print(f"\t[ think :: impact_scores :: {impact_scores} ]") - - # Store results in app_state (subkey session_id) - app_state = AppState().get_instance() - app_state.set_state("kl_divergences", kl_divergences) - - print(f"\t[ think :: kl_divergences :: {kl_divergences} ]") - - parsed_ontology = [self.parse_mermaid_to_dict(component) for component in prior_ontology] - print(f"\t[ think :: parsed_ontology :: {parsed_ontology} ]") - - # Build a graph from parsed ontology - graph = self.build_graph(parsed_ontology) - print(f"\t[ think :: graph :: {graph} ]") - - # Calculate weights for each vertex based on the number of edges - vertex_weights = self.calculate_vertex_weights(graph) - print(f"\t[ think :: vertex_weights :: {vertex_weights} ]") - - # Identify and weigh densely connected sub-graphs - sub_graph_weights = self.calculate_sub_graph_weights(graph) - print(f"\t[ think :: sub_graph_weights :: {sub_graph_weights} ]") - - # Combine weights to determine the strength of each argument - argument_weights = self.combine_weights(vertex_weights, sub_graph_weights) - print(f"\t[ think :: argument_weights :: {argument_weights} ]") - - # Rank arguments based on their weights - ranked_arguments = self.rank_arguments(argument_weights) - print(f"\t[ think :: ranked_arguments :: {ranked_arguments} ]") - - return ranked_arguments - - def parse_mermaid_to_dict(self, mermaid_str): - """Parse mermaid flowchart syntax into a dictionary with 'relations'.""" - lines = mermaid_str.strip().split("\n") - relations = [] - for line in lines: - if "-->" in line: - parts = line.split("-->") - subject = parts[0].strip().split('[')[0] - relation_type = "less complicated" if "|\"less complicated\"|" in parts[1] else "relation" - obj = parts[1].strip().split('[')[0].split('|')[0].strip() - relations.append((subject, relation_type, obj)) - return {"relations": relations} - - def build_graph(self, ontology): - print(f"\t[ build_graph :: start ]") - graph = {} - for index, component in enumerate(ontology): - print(f"\t[ build_graph :: component[{index}] :: {component} ]") - if isinstance(component, dict) and 'relations' in component: - for relation in component['relations']: - subject, relation_type, obj = relation - print(f"\t[ build_graph :: relation :: {subject} --{relation_type}--> {obj} ]") - if subject not in graph: - graph[subject] = [] - if obj not in graph: - graph[obj] = [] - graph[subject].append(obj) - graph[obj].append(subject) - else: - print(f"\t[ build_graph :: error :: component[{index}] is not a dict or missing 'relations' key ]") - print(f"\t[ build_graph :: graph :: {graph} ]") - return graph - - def calculate_vertex_weights(self, graph): - print(f"\t[ calculate_vertex_weights :: start ]") - vertex_weights = {vertex: len(edges) for vertex, edges in graph.items()} - for vertex, weight in vertex_weights.items(): - print(f"\t[ calculate_vertex_weights :: vertex :: {vertex} :: weight :: {weight} ]") - return vertex_weights - - def calculate_sub_graph_weights(self, graph): - print(f"\t[ calculate_sub_graph_weights :: start ]") - sub_graph_weights = {} - visited = set() - for vertex in graph: - if vertex not in visited: - sub_graph = self.dfs(graph, vertex, visited) - sub_graph_weight = sum(self.calculate_vertex_weights(sub_graph).values()) - sub_graph_weights[vertex] = sub_graph_weight - print(f"\t[ calculate_sub_graph_weights :: vertex :: {vertex} :: sub_graph_weight :: {sub_graph_weight} ]") - return sub_graph_weights - - def dfs(self, graph, start, visited): - print(f"\t[ dfs :: start :: {start} ]") - stack = [start] - sub_graph = {} - while stack: - vertex = stack.pop() - if vertex not in visited: - visited.add(vertex) - sub_graph[vertex] = graph[vertex] - stack.extend([v for v in graph[vertex] if v not in visited]) - print(f"\t[ dfs :: vertex :: {vertex} :: visited :: {visited} ]") - print(f"\t[ dfs :: sub_graph :: {sub_graph} ]") - return sub_graph - - def combine_weights(self, vertex_weights, sub_graph_weights): - print(f"\t[ combine_weights :: start ]") - combined_weights = {} - for vertex in vertex_weights: - combined_weights[vertex] = vertex_weights[vertex] + sub_graph_weights.get(vertex, 0) - print(f"\t[ combine_weights :: vertex :: {vertex} :: combined_weight :: {combined_weights[vertex]} ]") - return combined_weights - - def rank_arguments(self, argument_weights): - print(f"\t[ rank_arguments :: start ]") - ranked_arguments = sorted(argument_weights.items(), key=lambda item: item[1], reverse=True) - for rank, (vertex, weight) in enumerate(ranked_arguments, 1): - print(f"\t[ rank_arguments :: rank :: {rank} :: vertex :: {vertex} :: weight :: {weight} ]") - return ranked_arguments - - def cluster_messages(self, user_messages, generation, session_id): - clustered_messages = {} - for user_id, messages in user_messages.items(): - if len(messages) > 1: - clusters = self.argument_detection.cluster_sentences(messages, distance_threshold=1.45) - clustered_messages[user_id] = { - cluster_id: Cluster(user_id, generation, session_id, cluster_id, cluster_sentences) - for cluster_id, cluster_sentences in clusters.items() - } - return clustered_messages - - def wepcc_cluster(self, clusters: Dict[str, Cluster], report_wepcc_result): - wepcc_results = {} - for user_id, user_clusters in clusters.items(): - wepcc_results[user_id] = {} - for cluster_id, cluster in user_clusters.items(): - print(f"\t[ reflect :: Running WEPCC for user {user_id}, cluster {cluster_id} ]") - warrant, evidence, persuasiveness_justification, claim, counterclaim = self.argument_detection.fetch_argument_definition( - cluster.sentences) - wepcc_results[user_id][cluster_id] = { - 'warrant': warrant, - 'evidence': evidence, - 'persuasiveness_justification': persuasiveness_justification, - 'claim': claim, - 'counterclaim': counterclaim - } - print( - f"\t[ reflect :: WEPCC for user {user_id}, cluster {cluster_id} :: {wepcc_results[user_id][cluster_id]} ]") - - # Output to websocket - report_wepcc_result(cluster.cluster_hash, user_id, cluster_id, cluster.cluster_hash, - wepcc_results[user_id][cluster_id]) - return wepcc_results - - def get_cluster_weight_modulator(self, wepcc_results, cutoff): - cluster_weight_modulator = {} - for user_idA, clustersA in wepcc_results.items(): - cluster_weight_modulator[user_idA] = cluster_weight_modulator.get(user_idA, {}) - - for cluster_idA, wepccA in clustersA.items(): - phase_sim_A = [] - for user_idB, clustersB in wepcc_results.items(): - if user_idA != user_idB: - for cluster_idB, wepccB in clustersB.items(): - # Calculate cosine similarity between counterclaims and claims - counterclaim_embedding = self.fast_embedding_model.encode(wepccA['counterclaim']) - claim_embedding = self.fast_embedding_model.encode(wepccB['claim']) - sim_score = cosine_similarity([counterclaim_embedding], [claim_embedding])[0][0] - print( - f"\t[ reflect :: Sim score between {user_idA}'s counterclaim (cluster {cluster_idA}) and {user_idB}'s claim (cluster {cluster_idB}) :: {sim_score} ]") - if sim_score > cutoff: - phase_sim_A.append((sim_score, cluster_idB, user_idB)) - if cluster_idA not in cluster_weight_modulator[user_idA]: - cluster_weight_modulator[user_idA][cluster_idA] = [] - for sim_score, cluster_idB, user_idB in phase_sim_A: - normalized_value = (sim_score - cutoff) / (1 - cutoff) - cluster_weight_modulator[user_idA][cluster_idA].append(normalized_value) - print( - f"\t[ reflect :: Normalized value for {user_idA} (cluster {cluster_idA}) :: {normalized_value} ]") - return cluster_weight_modulator - - def gather_final_results(self, cluster_shadow_coverage, wepcc_results, unaddressed_score_multiplier): - aggregated_scores = {} - addressed_clusters = {} - unaddressed_clusters = {} - - results = [] - - for user_id, weight_mods in cluster_shadow_coverage.items(): - total_score = 0 - addressed_clusters[user_id] = [] - unaddressed_clusters[user_id] = [] - - user_result = {"user": user_id, "clusters": []} - - for cluster_id, modulator in weight_mods.items(): - try: - persuasiveness_object = json.loads( - wepcc_results[user_id][cluster_id]['persuasiveness_justification']) - persuasiveness_score = float(persuasiveness_object['content']['persuasiveness_score']) - addressed_score = (1 - modulator) * persuasiveness_score - total_score += addressed_score - addressed_clusters[user_id].append((cluster_id, addressed_score)) - user_result["clusters"].append({ - "cluster": cluster_id, - "type": "addressed", - "score": addressed_score - }) - print( - f"\t[ reflect :: Addressed score for User {user_id}, Cluster {cluster_id} :: {addressed_score} ]") - except json.JSONDecodeError as e: - print(f"\t[ reflect :: JSONDecodeError for User {user_id}, Cluster {cluster_id} :: {e} ]") - print( - f"\t[ reflect :: Invalid JSON :: {wepcc_results[user_id][cluster_id]['persuasiveness_justification']} ]") - - # Add unaddressed arguments' scores - for cluster_id, wepcc in wepcc_results[user_id].items(): - if cluster_id not in weight_mods: - try: - persuasiveness_object = json.loads(wepcc['persuasiveness_justification']) - persuasiveness_score = float(persuasiveness_object['content']['persuasiveness_score']) - unaddressed_score = persuasiveness_score * unaddressed_score_multiplier - total_score += unaddressed_score - unaddressed_clusters[user_id].append((cluster_id, unaddressed_score)) - user_result["clusters"].append({ - "cluster": cluster_id, - "type": "unaddressed", - "score": unaddressed_score - }) - print( - f"\t[ reflect :: Unaddressed score for User {user_id}, Cluster {cluster_id} :: {unaddressed_score} ]") - except json.JSONDecodeError as e: - print(f"\t[ reflect :: JSONDecodeError for User {user_id}, Cluster {cluster_id} :: {e} ]") - print(f"\t[ reflect :: Invalid JSON :: {wepcc['persuasiveness_justification']} ]") - - aggregated_scores[user_id] = total_score - user_result["total_score"] = total_score - results.append(user_result) - print(f"\t[ reflect :: Aggregated score for User {user_id} :: {total_score} ]") - - return aggregated_scores, addressed_clusters, unaddressed_clusters, results - - def get_cluster_shadow_coverage(self, cluster_weight_modulator, cutoff): - final_scores = {} - - # Post-process the collected normalized values for each cluster - for user_id, cluster_data in cluster_weight_modulator.items(): - final_scores[user_id] = final_scores.get(user_id, {}) - for cluster_idA, normalized_values in cluster_data.items(): - if normalized_values: - highest = max(normalized_values) - shadow_coverage = highest - for value in normalized_values: - if value != highest: - shadow_coverage += (value * (1.0 - cutoff)) * (1 - shadow_coverage) - # Since we're adding coverage, shadow_coverage should naturally stay within [0,1] - # No need to clamp or use min - - # Initialize the nested dictionary if it doesn't exist - if cluster_idA not in final_scores[user_id]: - final_scores[user_id][cluster_idA] = 0 - - # Store the final score - final_scores[user_id][cluster_idA] = shadow_coverage - print( - f"\t[ reflect :: Combined score for {user_id} (cluster {cluster_idA}) :: {shadow_coverage} ]") - - return final_scores - - async def reflect(self, topic, message_history): - unaddressed_score_multiplier = 2.5 - - print(f"\t[ reflect :: topic :: {topic} ]") - - # Check if there are at least two users with at least one cluster each - if len(clustered_messages) < 2 or any(len(clusters) < 1 for clusters in clustered_messages.values()): - print("\t[ reflect :: Not enough clusters or users to perform argument matching ]") - return - - # Step 3: Run WEPCC on each cluster - wepcc_results = self.wepcc_cluster(clustered_messages) - print(f"\t[ reflect :: wepcc_results :: {wepcc_results} ]") - - # Define similarity cutoff threshold - cutoff = 0.5 - - # Initialize phase similarity and cluster weight modulator - # Step 4: Match each user's Counterclaims with all other users' Claims - cluster_weight_modulator = self.get_cluster_weight_modulator(wepcc_results, cutoff) - - # Step 5: Calculate the counter-factual shadow coverage for each cluster - # Create a new dictionary to hold the final combined scores - cluster_shadow_coverage = self.get_cluster_shadow_coverage(cluster_weight_modulator, cutoff) - - # Step 6: Final aggregation and ranking - # Final aggregation and ranking - (aggregated_scores, - addressed_clusters, - unaddressed_clusters, - results) = self.gather_final_results(cluster_shadow_coverage, wepcc_results, unaddressed_score_multiplier) - - print(f"\t[ reflect :: aggregated_scores :: {aggregated_scores} ]") - print(f"\t[ reflect :: addressed_clusters :: {addressed_clusters} ]") - print(f"\t[ reflect :: unaddressed_clusters :: {unaddressed_clusters} ]") - - app_state = AppState().get_instance() - app_state.set_state("wepcc_results", wepcc_results) - app_state.set_state("aggregated_scores", aggregated_scores) - app_state.set_state("addressed_clusters", addressed_clusters) - app_state.set_state("unaddressed_clusters", unaddressed_clusters) - - print(f"\t[ reflect :: Completed ]") - - return results - -#alright! once again, same style, same acumen, boil over each and every one of those - -#okay compose it all in a series of functions so I can copy paste. - -#AFTERWARDS I'd like a list of all of the new functions you need to yet provide super-stubs for \ No newline at end of file diff --git a/topos/config.yaml b/topos/config.yaml new file mode 100644 index 0000000..8fde4f5 --- /dev/null +++ b/topos/config.yaml @@ -0,0 +1 @@ +active_spacy_model: en_core_web_trf diff --git a/topos/lobby/lobby_server.py b/topos/lobby/lobby_server.py deleted file mode 100644 index 477ae54..0000000 --- a/topos/lobby/lobby_server.py +++ /dev/null @@ -1,78 +0,0 @@ -import asyncio -import json -from fastapi import WebSocket, WebSocketDisconnect -from typing import Dict, List - -class LobbyServer: - def __init__(self): - self.general_lobby = [] - self.game_lobbies = {} # game_id -> list of websockets - self.websocket_to_user = {} # websocket -> user_id - - async def connect(self, websocket: WebSocket, user_id: str): - await websocket.accept() - self.websocket_to_user[websocket] = user_id - print(f"\t[ User {user_id} connected ]") - - async def disconnect(self, websocket: WebSocket): - user_id = self.websocket_to_user[websocket] - del self.websocket_to_user[websocket] - print(f"\t[ User {user_id} disconnected ]") - if websocket in self.general_lobby: - self.general_lobby.remove(websocket) - for game_id in self.game_lobbies: - if websocket in self.game_lobbies[game_id]: - self.game_lobbies[game_id].remove(websocket) - - async def join_general_lobby(self, websocket: WebSocket): - self.general_lobby.append(websocket) - await self.send_message(websocket, "GeneralLobbyEntered", {"status": "ok"}) - print(f"\t\t[ User joined general lobby ]") - await self.send_available_games(websocket) - - async def join_game_lobby(self, websocket: WebSocket, game_id: str): - if game_id not in self.game_lobbies: - self.game_lobbies[game_id] = [] - self.game_lobbies[game_id].append(websocket) - await self.send_message(websocket, "InGameLobbyJoined", {"status": "ok", "game_id": game_id}) - print(f"\t\t[ User joined game lobby {game_id} ]") - - async def send_message(self, websocket: WebSocket, message_type: str, data: dict): - message = {"type": message_type, "data": data} - await websocket.send_text(json.dumps(message)) - - async def send_available_games(self, websocket: WebSocket): - # This is a placeholder. Implement your logic to get the available games. - available_games = [{"gameId": "1", "name": "Debate Game 1"}, {"gameId": "2", "name": "Debate Game 2"}] - await self.send_message(websocket, "AvailableGames", {"games": available_games}) - - async def handle_message(self, websocket: WebSocket, message: str): - data = json.loads(message) - message_type = data["type"] - user_id = self.websocket_to_user[websocket] - - if message_type == "JoinGeneralLobby": - await self.join_general_lobby(websocket) - elif message_type == "JoinInGameLobby": - game_id = data["game_id"] - await self.join_game_lobby(websocket, game_id) - elif message_type == "ReadyGame": - game_id = data["game_id"] - await self.send_message(websocket, "GameReadied", {"status": "ready", "game_id": game_id}) - elif message_type == "UnreadyGame": - game_id = data["game_id"] - await self.send_message(websocket, "GameUnreadied", {"status": "unready", "game_id": game_id}) - - async def handle_connection(self, websocket: WebSocket, user_id: str): - await self.connect(websocket, user_id) - try: - while True: - data = await websocket.receive_text() - await self.handle_message(websocket, data) - except WebSocketDisconnect: - await self.disconnect(websocket) - except Exception as e: - print(f"Error: {e}") - await self.disconnect(websocket) - -lobby_server = LobbyServer() diff --git a/topos/models/models.py b/topos/models/models.py index 405b5bc..1620dfa 100644 --- a/topos/models/models.py +++ b/topos/models/models.py @@ -2,12 +2,27 @@ from pydantic import BaseModel - class Message(BaseModel): content: str sender: str - class ModelConfig(BaseModel): model: str - temperature: float \ No newline at end of file + temperature: float + +class MermaidChartPayload(BaseModel): + message: str = None + conversation_id: str + full_conversation: bool = False + model: str = "dolphin-llama3" + provider: str = "ollama" + api_key: str = "ollama" + temperature: float = 0.04 + + +class ConversationTopicsRequest(BaseModel): + conversation_id: str + model: str + +class ConversationIDRequest(BaseModel): + conversation_id: str diff --git a/topos/generations/__init__.py b/topos/services/generations_service/__init__.py similarity index 100% rename from topos/generations/__init__.py rename to topos/services/generations_service/__init__.py diff --git a/topos/generations/chat_gens.py b/topos/services/generations_service/chat_gens.py similarity index 100% rename from topos/generations/chat_gens.py rename to topos/services/generations_service/chat_gens.py diff --git a/topos/generations/llm_client.py b/topos/services/generations_service/llm_client.py similarity index 100% rename from topos/generations/llm_client.py rename to topos/services/generations_service/llm_client.py diff --git a/topos/services/ontology_service/mermaid_chart.py b/topos/services/ontology_service/mermaid_chart.py index e576f3d..2bd8e65 100644 --- a/topos/services/ontology_service/mermaid_chart.py +++ b/topos/services/ontology_service/mermaid_chart.py @@ -2,7 +2,7 @@ import re from topos.FC.ontological_feature_detection import OntologicalFeatureDetection -from topos.generations.chat_gens import LLMController +from topos.services.generations_service.chat_gens import LLMController class MermaidCreator: def __init__(self, LLMController: LLMController): diff --git a/topos/utilities/utils.py b/topos/utilities/utils.py index ff1be6a..e9df92b 100644 --- a/topos/utilities/utils.py +++ b/topos/utilities/utils.py @@ -14,7 +14,15 @@ def get_python_command(): def get_config_path(): config_path = os.getenv('TOPOS_CONFIG_PATH') if not config_path: - raise EnvironmentError("TOPOS_CONFIG_PATH environment variable is not set") + print("TOPOS_CONFIG_PATH environment variable is not set") + print("trying to locate in root directory") + path = get_root_directory() + "/config.yaml" + print(f"{path} is directory: {os.path.isfile(path)}") + if os.path.isfile(path): + print(f"{path} config found in root directory") + config_path = path + else: + raise EnvironmentError("TOPOS_CONFIG_PATH environment variable is not set AND no config.yaml found") return config_path def get_root_directory():