feat: Loading analytics from new Postgres Setup (WIP)

jonnyjohnson1 · Oct 26, 2024 · d45c87b · d45c87b
1 parent 3f7cf9c
commit d45c87b
Show file tree

Hide file tree

Showing 6 changed files with 202 additions and 167 deletions.
diff --git a/default.nix b/default.nix
@@ -44,7 +44,7 @@ in pkgs.mkShell {
     psql -d $POSTGRES_DB <<SQL | tee -a $LOGFILE
     
     -- Create the conversation table
-    CREATE TABLE IF NOT EXISTS conversation_table (
+    CREATE TABLE IF NOT EXISTS conversation (
         message_id VARCHAR PRIMARY KEY,
         conv_id VARCHAR NOT NULL,
         userid VARCHAR NOT NULL,
@@ -55,7 +55,7 @@ in pkgs.mkShell {
     );
 
     -- Create the utterance_token_info table
-    CREATE TABLE IF NOT EXISTS utterance_token_info_table (
+    CREATE TABLE IF NOT EXISTS utterance_token_info (
         message_id VARCHAR PRIMARY KEY,
         conv_id VARCHAR NOT NULL,
         userid VARCHAR NOT NULL,
@@ -66,7 +66,7 @@ in pkgs.mkShell {
     );
 
     -- Create the utterance_text_info table
-    CREATE TABLE IF NOT EXISTS utterance_text_info_table (
+    CREATE TABLE IF NOT EXISTS utterance_text_info (
         message_id VARCHAR PRIMARY KEY,
         conv_id VARCHAR NOT NULL,
         userid VARCHAR NOT NULL,

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "topos"
-version = "0.2.2"
+version = "0.2.3"
 description = "The official Python client for Topos."
 authors = ["Dialogues <[email protected]>"]
 license = "MIT"

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = topos
-version = 0.2.1
+version = 0.2.3
 author = Jonny Johnson
 author_email = [email protected]
 description = For interacting with Topos tooling

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='topos',
-    version='0.2.1',
+    version='0.2.3',
     packages=find_packages(),
     entry_points={
         'console_scripts': [

diff --git a/topos/api/routers/report/report.py b/topos/api/routers/report/report.py
@@ -30,88 +30,64 @@
 @router.post("/chat_conversation_analysis")
 async def chat_conversation_analysis(request: ConversationIDRequest):
     conversation_id = request.conversation_id
-    # load conversation
-    conv_data = cache_manager.load_from_cache(conversation_id)
-
-    if conv_data is None:
-        raise HTTPException(status_code=404, detail="Conversation not found in cache")
-    # Initialize counters
-    named_entity_counter = Counter()
-    entity_text_counter = Counter()
-    emotion_counter = Counter()
-
-    # Initialize user-based counters
-    named_entity_counter_per_user = defaultdict(Counter)
-    entity_text_counter_per_user = defaultdict(Counter)
-    emotion_counter_per_user = defaultdict(Counter)
-
-    print(f"\t[ conversational analysis ]")
+
+    # Connect to the PostgreSQL database
     if cache_manager.use_postgres:
-        # Extract counts
-        for conversation_id, messages_list in conv_data.items():
-            print(f"\t\t[ item :: {conversation_id} ]")
-            for message_dict in messages_list:
-                for cntn in message_dict:
-                    for message_id, content in cntn.items():
-                        # print(f"\t\t\t[ content :: {str(content)[40:]} ]")
-                        # print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]")
-                        role = content['role']
-                        user = role
-                        if role == "user" and 'user_name' in content:
-                            user = content['user_name']
-
-                        # Process named entities and base analysis
-                        base_analysis = content['in_line']['base_analysis']
-                        for entity_type, entities in base_analysis.items():
-                            named_entity_counter[entity_type] += len(entities)
-                            named_entity_counter_per_user[user][entity_type] += len(entities)
-                            for entity in entities:
-                                entity_text_counter[str(entity.get('text', ''))] += 1
-                                entity_text_counter_per_user[user][str(entity.get('text', ''))] += 1
-
-                        # Process emotions
-                        emotions = content['commenter']['base_analysis']['emo_27']
-                        for emotion in emotions:
-                            emotion_counter[emotion['label']] += 1
-                            emotion_counter_per_user[user][emotion['label']] += 1
+        try:
+            # Query to load token classification data (utterance_token_info_table)
+            token_data = cache_manager.load_utterance_token_info(conversation_id)
+
+            # Query to load text classification data (utterance_text_info_table)
+            text_data = cache_manager.load_utterance_text_info(conversation_id)
+
+            if not token_data and not text_data:
+                raise HTTPException(status_code=404, detail="Conversation not found in cache")
+
+        except Exception as e:
+            logging.error(f"Failed to retrieve data from PostgreSQL: {e}", exc_info=True)
+            raise HTTPException(status_code=500, detail="Failed to retrieve data from cache")
+
+        # Initialize counters
+        named_entity_counter = Counter()
+        entity_text_counter = Counter()
+        emotion_counter = Counter()
+
+        # Initialize user-based counters
+        named_entity_counter_per_user = defaultdict(Counter)
+        entity_text_counter_per_user = defaultdict(Counter)
+        emotion_counter_per_user = defaultdict(Counter)
+
+        print(f"\t[ conversational analysis ]")
+        # Extract counts from token data
+        for token_row in token_data:
+            message_id, conv_id, userid, name, role, timestamp, ents = token_row
+            user = name or role  # use name if available, otherwise role
+            # Process named entities and base analysis
+            for entity in ents:
+                entity_list = ents[entity]
+                for ent in entity_list:
+                    entity_type = ent.get('label')
+                    entity_text = ent.get('text', '')
+                    named_entity_counter[entity_type] += 1
+                    named_entity_counter_per_user[user][entity_type] += 1
+                    entity_text_counter[entity_text] += 1
+                    entity_text_counter_per_user[user][entity_text] += 1
+
+        # Extract counts from text data
+        for text_row in text_data:
+            message_id, conv_id, userid, name, role, timestamp, moderator, mod_label, tern_sent, tern_label, emo_27, emo_27_label = text_row
+            user = name if name != "unknown" else role  # use name if available, otherwise role
+
+            # Process emotions
+            for emotion in emo_27:
+                emotion_label = emotion['label']
+                emotion_counter[emotion_label] += 1
+                emotion_counter_per_user[user][emotion_label] += 1
+
     else:
-        # Extract counts
-        for conversation_id, messages in conv_data.items():
-            print(f"\t\t[ item :: {conversation_id} ]")
-            for message_id, content in messages.items():
-                # print(f"\t\t\t[ content :: {str(content)[40:]} ]")
-                # print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]")
-                role = content['role']
-                user = role
-                if role == "user" and 'user_name' in content:
-                    user =  content['user_name']
-                base_analysis = content['in_line']['base_analysis']
-                for entity_type, entities in base_analysis.items():
-                    named_entity_counter[entity_type] += len(entities)
-                    named_entity_counter_per_user[user][entity_type] += len(entities)
-                    for entity in entities:
-                        entity_text_counter[str(entity['text'])] += 1
-                        entity_text_counter_per_user[user][str(entity['text'])] += 1
-
-                emotions = content['commenter']['base_analysis']['emo_27']
-                for emotion in emotions:
-                    emotion_counter[emotion['label']] += 1
-                    emotion_counter_per_user[user][emotion['label']] += 1
-
-    # Evocations equals num of each entity
-    # print("Named Entity Count:")
-    # print(named_entity_counter)       # get the count of each entity from the conv_data
-
-    # # Actual Items summoned
-    # print("\nEntity Text Count:")
-    # print(entity_text_counter)        # get the count of each summoned item from the conv_data
-
-    # # Detected emotions in the population
-    # print("\nEmotion Count:")
-    # print(emotion_counter)            # also get a population count of all the emotions that were invoked in the conversation
-
-    # print("\t\t[ emotion counter per-user :: {emotion_counter_per_user}")
-    # Convert Counter objects to dictionaries
+        # Non-Postgres handling if needed, otherwise raise an exception
+        raise HTTPException(status_code=501, detail="PostgreSQL is the only supported cache manager.")
+
     named_entity_dict = {
         "totals": dict(named_entity_counter),
         "per_role": {user: dict(counter) for user, counter in named_entity_counter_per_user.items()}
@@ -132,7 +108,6 @@ async def chat_conversation_analysis(request: ConversationIDRequest):
         'emotions27': emotion_dict
     }
 
-
     # Return the conversation or any other response needed
     return {"conversation": conversation}