Skip to content

Commit

Permalink
feat: Loading analytics from new Postgres Setup (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonnyjohnson1 committed Oct 26, 2024
1 parent 3f7cf9c commit d45c87b
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 167 deletions.
6 changes: 3 additions & 3 deletions default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ in pkgs.mkShell {
psql -d $POSTGRES_DB <<SQL | tee -a $LOGFILE
-- Create the conversation table
CREATE TABLE IF NOT EXISTS conversation_table (
CREATE TABLE IF NOT EXISTS conversation (
message_id VARCHAR PRIMARY KEY,
conv_id VARCHAR NOT NULL,
userid VARCHAR NOT NULL,
Expand All @@ -55,7 +55,7 @@ in pkgs.mkShell {
);
-- Create the utterance_token_info table
CREATE TABLE IF NOT EXISTS utterance_token_info_table (
CREATE TABLE IF NOT EXISTS utterance_token_info (
message_id VARCHAR PRIMARY KEY,
conv_id VARCHAR NOT NULL,
userid VARCHAR NOT NULL,
Expand All @@ -66,7 +66,7 @@ in pkgs.mkShell {
);
-- Create the utterance_text_info table
CREATE TABLE IF NOT EXISTS utterance_text_info_table (
CREATE TABLE IF NOT EXISTS utterance_text_info (
message_id VARCHAR PRIMARY KEY,
conv_id VARCHAR NOT NULL,
userid VARCHAR NOT NULL,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "topos"
version = "0.2.2"
version = "0.2.3"
description = "The official Python client for Topos."
authors = ["Dialogues <[email protected]>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = topos
version = 0.2.1
version = 0.2.3
author = Jonny Johnson
author_email = [email protected]
description = For interacting with Topos tooling
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='topos',
version='0.2.1',
version='0.2.3',
packages=find_packages(),
entry_points={
'console_scripts': [
Expand Down
137 changes: 56 additions & 81 deletions topos/api/routers/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,88 +30,64 @@
@router.post("/chat_conversation_analysis")
async def chat_conversation_analysis(request: ConversationIDRequest):
conversation_id = request.conversation_id
# load conversation
conv_data = cache_manager.load_from_cache(conversation_id)

if conv_data is None:
raise HTTPException(status_code=404, detail="Conversation not found in cache")
# Initialize counters
named_entity_counter = Counter()
entity_text_counter = Counter()
emotion_counter = Counter()

# Initialize user-based counters
named_entity_counter_per_user = defaultdict(Counter)
entity_text_counter_per_user = defaultdict(Counter)
emotion_counter_per_user = defaultdict(Counter)

print(f"\t[ conversational analysis ]")

# Connect to the PostgreSQL database
if cache_manager.use_postgres:
# Extract counts
for conversation_id, messages_list in conv_data.items():
print(f"\t\t[ item :: {conversation_id} ]")
for message_dict in messages_list:
for cntn in message_dict:
for message_id, content in cntn.items():
# print(f"\t\t\t[ content :: {str(content)[40:]} ]")
# print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]")
role = content['role']
user = role
if role == "user" and 'user_name' in content:
user = content['user_name']

# Process named entities and base analysis
base_analysis = content['in_line']['base_analysis']
for entity_type, entities in base_analysis.items():
named_entity_counter[entity_type] += len(entities)
named_entity_counter_per_user[user][entity_type] += len(entities)
for entity in entities:
entity_text_counter[str(entity.get('text', ''))] += 1
entity_text_counter_per_user[user][str(entity.get('text', ''))] += 1

# Process emotions
emotions = content['commenter']['base_analysis']['emo_27']
for emotion in emotions:
emotion_counter[emotion['label']] += 1
emotion_counter_per_user[user][emotion['label']] += 1
try:
# Query to load token classification data (utterance_token_info_table)
token_data = cache_manager.load_utterance_token_info(conversation_id)

# Query to load text classification data (utterance_text_info_table)
text_data = cache_manager.load_utterance_text_info(conversation_id)

if not token_data and not text_data:
raise HTTPException(status_code=404, detail="Conversation not found in cache")

except Exception as e:
logging.error(f"Failed to retrieve data from PostgreSQL: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Failed to retrieve data from cache")

# Initialize counters
named_entity_counter = Counter()
entity_text_counter = Counter()
emotion_counter = Counter()

# Initialize user-based counters
named_entity_counter_per_user = defaultdict(Counter)
entity_text_counter_per_user = defaultdict(Counter)
emotion_counter_per_user = defaultdict(Counter)

print(f"\t[ conversational analysis ]")
# Extract counts from token data
for token_row in token_data:
message_id, conv_id, userid, name, role, timestamp, ents = token_row
user = name or role # use name if available, otherwise role
# Process named entities and base analysis
for entity in ents:
entity_list = ents[entity]
for ent in entity_list:
entity_type = ent.get('label')
entity_text = ent.get('text', '')
named_entity_counter[entity_type] += 1
named_entity_counter_per_user[user][entity_type] += 1
entity_text_counter[entity_text] += 1
entity_text_counter_per_user[user][entity_text] += 1

# Extract counts from text data
for text_row in text_data:
message_id, conv_id, userid, name, role, timestamp, moderator, mod_label, tern_sent, tern_label, emo_27, emo_27_label = text_row
user = name if name != "unknown" else role # use name if available, otherwise role

# Process emotions
for emotion in emo_27:
emotion_label = emotion['label']
emotion_counter[emotion_label] += 1
emotion_counter_per_user[user][emotion_label] += 1

else:
# Extract counts
for conversation_id, messages in conv_data.items():
print(f"\t\t[ item :: {conversation_id} ]")
for message_id, content in messages.items():
# print(f"\t\t\t[ content :: {str(content)[40:]} ]")
# print(f"\t\t\t[ keys :: {str(content.keys())[40:]} ]")
role = content['role']
user = role
if role == "user" and 'user_name' in content:
user = content['user_name']
base_analysis = content['in_line']['base_analysis']
for entity_type, entities in base_analysis.items():
named_entity_counter[entity_type] += len(entities)
named_entity_counter_per_user[user][entity_type] += len(entities)
for entity in entities:
entity_text_counter[str(entity['text'])] += 1
entity_text_counter_per_user[user][str(entity['text'])] += 1

emotions = content['commenter']['base_analysis']['emo_27']
for emotion in emotions:
emotion_counter[emotion['label']] += 1
emotion_counter_per_user[user][emotion['label']] += 1

# Evocations equals num of each entity
# print("Named Entity Count:")
# print(named_entity_counter) # get the count of each entity from the conv_data

# # Actual Items summoned
# print("\nEntity Text Count:")
# print(entity_text_counter) # get the count of each summoned item from the conv_data

# # Detected emotions in the population
# print("\nEmotion Count:")
# print(emotion_counter) # also get a population count of all the emotions that were invoked in the conversation

# print("\t\t[ emotion counter per-user :: {emotion_counter_per_user}")
# Convert Counter objects to dictionaries
# Non-Postgres handling if needed, otherwise raise an exception
raise HTTPException(status_code=501, detail="PostgreSQL is the only supported cache manager.")

named_entity_dict = {
"totals": dict(named_entity_counter),
"per_role": {user: dict(counter) for user, counter in named_entity_counter_per_user.items()}
Expand All @@ -132,7 +108,6 @@ async def chat_conversation_analysis(request: ConversationIDRequest):
'emotions27': emotion_dict
}


# Return the conversation or any other response needed
return {"conversation": conversation}

Loading

0 comments on commit d45c87b

Please sign in to comment.