diff --git a/config.yaml b/config.yaml index 8fde4f5..8b9c5bd 100644 --- a/config.yaml +++ b/config.yaml @@ -1 +1 @@ -active_spacy_model: en_core_web_trf +active_spacy_model: en_core_web_sm diff --git a/default.nix b/default.nix index ab998b8..f80f9e9 100644 --- a/default.nix +++ b/default.nix @@ -42,23 +42,43 @@ in pkgs.mkShell { # Set up the test database, role, and tables psql -d $POSTGRES_DB < + # utterance_text_info_table: message_id, conv_id, userid, name, role, timestamp, 'moderator' , mod_label , tern_sent , tern_label , emo_27 , emo_27_label + + def _save_to_postgres(self, conv_id, new_data): if self.conn is None: logging.error("PostgreSQL connection is not initialized") return @@ -239,17 +260,79 @@ def _save_to_postgres(self, conv_id, new_data): try: logging.debug(f"Attempting to save data for conv_id: {conv_id}") with self.conn.cursor() as cur: - cur.execute(""" - INSERT INTO conversation_cache (conv_id, message_data) - VALUES (%s, %s::jsonb) - ON CONFLICT (conv_id) DO UPDATE - SET message_data = conversation_cache.message_data || EXCLUDED.message_data - """, (conv_id, json.dumps([new_data], default=serialize_datetime))) + for message_id, message_data in new_data.items(): + role = message_data['role'] + timestamp = message_data['timestamp'] + message = message_data['message'] + userid = "unknown" # Assuming you get this from elsewhere + name = "unknown" # Assuming you get this from elsewhere + + # Insert conversation data + cur.execute(""" + INSERT INTO conversation_table (message_id, conv_id, userid, timestamp, name, role, message) + VALUES (%s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (message_id) DO UPDATE + SET message = EXCLUDED.message, role = EXCLUDED.role, timestamp = EXCLUDED.timestamp; + """, (message_id, conv_id, userid, timestamp, name, role, message)) + + # Insert token information (utterance_token_info_table) + if 'in_line' in message_data: + ents_data = message_data['in_line']['base_analysis'] + if len(ents_data) > 0: + ents = json.dumps(ents_data) + cur.execute(""" + INSERT INTO utterance_token_info_table (message_id, conv_id, userid, name, role, timestamp, ents) + VALUES (%s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (message_id) DO UPDATE + SET ents = EXCLUDED.ents, timestamp = EXCLUDED.timestamp; + """, (message_id, conv_id, userid, name, role, timestamp, ents)) + + # Insert text analysis information (utterance_text_info_table) + if 'commenter' in message_data: + base_analysis = message_data['commenter']['base_analysis'] + mod_label = base_analysis['mod_level'][0]['label'] + tern_sent = json.dumps(base_analysis['tern_sent']) + tern_label = base_analysis['tern_sent'][0]['label'] + emo_27 = json.dumps(base_analysis['emo_27']) + emo_27_label = base_analysis['emo_27'][0]['label'] + + cur.execute(""" + INSERT INTO utterance_text_info_table + (message_id, conv_id, userid, name, role, timestamp, moderator, mod_label, tern_sent, tern_label, emo_27, emo_27_label) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (message_id) DO UPDATE + SET moderator = EXCLUDED.moderator, mod_label = EXCLUDED.mod_label, + tern_sent = EXCLUDED.tern_sent, tern_label = EXCLUDED.tern_label, + emo_27 = EXCLUDED.emo_27, emo_27_label = EXCLUDED.emo_27_label; + """, (message_id, conv_id, userid, name, role, timestamp, + json.dumps(base_analysis['mod_level']), mod_label, tern_sent, tern_label, emo_27, emo_27_label)) + self.conn.commit() logging.info(f"Successfully saved data for conv_id: {conv_id}") except Exception as e: logging.error(f"Failed to save to PostgreSQL for conv_id {conv_id}: {e}", exc_info=True) self.conn.rollback() + + # def _save_to_postgres(self, conv_id, new_data): + # if self.conn is None: + # logging.error("PostgreSQL connection is not initialized") + # return + + # try: + # logging.debug(f"Attempting to save data for conv_id: {conv_id}") + # with self.conn.cursor() as cur: + # print("POSTGRES DATA", new_data) + # cur.execute(""" + # INSERT INTO conversation_cache (conv_id, message_data) + # VALUES (%s, %s::jsonb) + # ON CONFLICT (conv_id) DO UPDATE + # SET message_data = conversation_cache.message_data || EXCLUDED.message_data + # """, (conv_id, json.dumps([new_data], default=serialize_datetime))) + # self.conn.commit() + # logging.info(f"Successfully saved data for conv_id: {conv_id}") + # except Exception as e: + # logging.error(f"Failed to save to PostgreSQL for conv_id {conv_id}: {e}", exc_info=True) + # self.conn.rollback() def clear_cache(self): """Clear the cache directory or PostgreSQL table.""" diff --git a/topos/services/ontology_service/mermaid_chart.py b/topos/services/ontology_service/mermaid_chart.py index 2bd8e65..4786f2f 100644 --- a/topos/services/ontology_service/mermaid_chart.py +++ b/topos/services/ontology_service/mermaid_chart.py @@ -1,7 +1,7 @@ # ontological_feature_detection.py import re -from topos.FC.ontological_feature_detection import OntologicalFeatureDetection +from topos.services.ontology_service.ontological_feature_detection import OntologicalFeatureDetection from topos.services.generations_service.chat_gens import LLMController class MermaidCreator: diff --git a/topos/FC/ontological_feature_detection.py b/topos/services/ontology_service/ontological_feature_detection.py similarity index 99% rename from topos/FC/ontological_feature_detection.py rename to topos/services/ontology_service/ontological_feature_detection.py index 7e96e65..634fdf8 100644 --- a/topos/FC/ontological_feature_detection.py +++ b/topos/services/ontology_service/ontological_feature_detection.py @@ -6,7 +6,7 @@ import nltk import spacy import warnings -from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline +from transformers import AutoTokenizer, AutoModelForTokenClassification from datetime import datetime from topos.services.database.app_state import AppState