Skip to content

Commit

Permalink
Merge branch 'staging'
Browse files Browse the repository at this point in the history
  • Loading branch information
Ledoux committed Nov 12, 2024
2 parents ea80769 + 364333a commit 95f0126
Show file tree
Hide file tree
Showing 13 changed files with 624 additions and 74 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# SPDX-License-Identifier: CC0-1.0

# Project-specific
store
session.txt
data/store
data/session.txt

*.pyc
__pycache__
Expand Down
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,41 @@ Pour lancer le bot executez :
python app
```

#### NOTE 1

Cette commande stoppera surement si vous ne la lancez pas en mode sudo car
elle installe par défault le data/store et le data/session.txt à la racine "/".
Vous pouvez lancer l'application pour qu'elle crée ces fichiers dans le dossier du projet directement avec la commande :

```bash
export STORE_PATH='./data/store/' && export SESSION_PATH='./data/session.txt' && python app
```

#### NOTE 2

Si vous voulez développez tout en faisant que le bot reload automatiquement, vous pouvez utiliser par exemple [nodemon](https://github.com/python-nodemon/nodemon) en module global python et lancer la commande suivante dans un terminal :

```bash
nodemon --watch app --ext py --exec "export STORE_PATH='./data/store/' && export SESSION_PATH='./data/session.txt' && python app"
```

#### NOTE 3

Si vous voulez que vos messages engendrés par le bot se distinguent des autres messages, possiblement envoyé par d'autres bots (comme celui de staging):

```bash
nodemon --watch app --ext py --exec "export MESSAGE_PREFIX='[DEV]' && export STORE_PATH='./data/store/' && export SESSION_PATH='./data/session.txt' && python app"
```

#### NOTE 4

Si vous voulez merger votre branche de dev pour la tester sur beta.tchap (branche staging) :

```bash
git checkout staging
git merge <your-branch>
git push origin staging
```

### Troubleshooting

Expand Down
7 changes: 6 additions & 1 deletion app/bot_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class AlbertMsg:
shorts = {
"help": f"Pour retrouver ce message informatif, tapez `{COMMAND_PREFIX}aide`. Pour les geek tapez `{COMMAND_PREFIX}aide -v`.",
"reset": f"Pour ré-initialiser notre conversation, tapez `{COMMAND_PREFIX}reset`",
"collections": f"Pour modifier l'ensemble des collections utilisées quand vous me posez une question, tapez `{COMMAND_PREFIX}collections list/use/unuse/info COLLECTION_NAME/{Config().albert_all_public_command}`",
"conversation": f"Pour activer/désactiver le mode conversation, tapez `{COMMAND_PREFIX}conversation`",
"debug": f"Pour afficher des informations sur la configuration actuelle, `{COMMAND_PREFIX}debug`",
"model": f"Pour modifier le modèle, tapez `{COMMAND_PREFIX}model MODEL_NAME`",
Expand All @@ -22,6 +23,10 @@ class AlbertMsg:

failed = "🤖 Albert a échoué à répondre. Veuillez réessayez dans un moment."

flush_start = "Nettoyage des collections RAG propres à cette conversation..."

flush_end = "Nettoyage des collections RAG terminé."

reset = "**La conversation a été remise à zéro**. Vous pouvez néanmoins toujours répondre dans un fil de discussion."

user_not_allowed = "Albert est en phase de test et n'est pas encore disponible pour votre utilisateur. Contactez [email protected] pour demander un accès."
Expand Down Expand Up @@ -52,7 +57,7 @@ def help(model_url, model_short_name, cmds):

def commands(cmds):
msg = "Les commandes spéciales suivantes sont disponibles :\n\n"
msg += "- " + "\n- ".join(cmds) # type: ignore
msg += "- " + "\n- ".join(cmds) # type: ignore
return msg

def unknown_command(cmds_msg):
Expand Down
212 changes: 189 additions & 23 deletions app/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,29 @@
from matrix_bot.client import MatrixClient
from matrix_bot.config import logger
from matrix_bot.eventparser import EventNotConcerned, EventParser
from nio import Event, RoomMemberEvent, RoomMessageText
from nio import Event, RoomEncryptedFile, RoomMemberEvent, RoomMessageText

from bot_msg import AlbertMsg
from config import COMMAND_PREFIX, Config
from core_llm import (
flush_collections_with_name,
get_all_public_collections,
get_or_create_collection_with_name,
get_or_not_collection_with_name,
get_documents,
generate,
generate_sources,
get_available_models,
get_available_modes,
upload_file,
)
from iam import TchapIam
from tchap_utils import get_cleanup_body, get_previous_messages, get_thread_messages, isa_reply_to

from tchap_utils import (
get_cleanup_body,
get_decrypted_file,
get_previous_messages,
get_thread_messages,
isa_reply_to
)

@dataclass
class CommandRegistry:
Expand Down Expand Up @@ -163,15 +173,17 @@ async def wrapper(ep: EventParser, matrix_client: MatrixClient):

config = user_configs[ep.sender]
is_allowed, msg = await tiam.is_user_allowed(config, ep.sender, refresh=True)
if is_allowed:
return await func(ep, matrix_client)
if not is_allowed:
if not msg or ep.is_command(COMMAND_PREFIX):
# Only send back the message for the generic albert_answer method
# ignoring other callbacks.
raise EventNotConcerned

if not msg or ep.is_command(COMMAND_PREFIX):
# Only send back the message for the generic albert_answer method
# ignoring other callbacks.
raise EventNotConcerned
await log_not_allowed(msg, ep, matrix_client)
return

await log_not_allowed(msg, ep, matrix_client)
await func(ep, matrix_client)
await matrix_client.room_typing(ep.room.room_id, typing_state=False)

return wrapper

Expand Down Expand Up @@ -239,6 +251,15 @@ async def albert_reset(ep: EventParser, matrix_client: MatrixClient):
await matrix_client.send_markdown_message(
ep.room.room_id, reset_message, msgtype="m.notice"
)

message = AlbertMsg.flush_start
await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice")
await matrix_client.room_typing(ep.room.room_id)
flush_collections_with_name(config, ep.room.room_id)
config.albert_collections_by_id = {}
message = AlbertMsg.flush_end
await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice")

else:
await matrix_client.send_markdown_message(
ep.room.room_id,
Expand Down Expand Up @@ -296,8 +317,7 @@ async def albert_model(ep: EventParser, matrix_client: MatrixClient):
await matrix_client.room_typing(ep.room.room_id)
command = ep.get_command()
# Get all available models
all_models = get_available_models(config)
all_models = [k for k, v in all_models.items() if v["type"] == "text-generation"]
all_models = list(get_available_models(config))
models_list = "\n\n- " + "\n- ".join(
map(lambda x: x + (" *" if x == config.albert_model else ""), all_models)
)
Expand Down Expand Up @@ -327,11 +347,9 @@ async def albert_model(ep: EventParser, matrix_client: MatrixClient):
@only_allowed_user
async def albert_mode(ep: EventParser, matrix_client: MatrixClient):
config = user_configs[ep.sender]
await matrix_client.room_typing(ep.room.room_id)
command = ep.get_command()
# Get all available mode for the current model
all_modes = get_available_modes(config)
all_modes += ["norag"]
mode_list = "\n\n- " + "\n- ".join(
map(lambda x: x + (" *" if x == config.albert_mode else ""), all_modes)
)
Expand All @@ -347,8 +365,18 @@ async def albert_mode(ep: EventParser, matrix_client: MatrixClient):
old_mode = config.albert_mode
config.albert_mode = mode
message = f"Le mode a été modifié : {old_mode} -> {mode}"

await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice")

if mode == "norag":
message = AlbertMsg.flush_start
await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice")
await matrix_client.room_typing(ep.room.room_id)
flush_collections_with_name(config, ep.room.room_id)
config.albert_collections_by_id = {}
message = AlbertMsg.flush_end
await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice")


@register_feature(
group="albert",
Expand All @@ -361,15 +389,13 @@ async def albert_sources(ep: EventParser, matrix_client: MatrixClient):
config = user_configs[ep.sender]

try:
if config.last_rag_references:
if config.last_rag_chunks:
await matrix_client.room_typing(ep.room.room_id)
sources = generate_sources(config, config.last_rag_references)
sources_msg = ""
for source in sources:
extra_context = ""
if source.get("context"):
extra_context = f'({source["context"]})'
sources_msg += f'- {source["title"]} {extra_context}: {source["url"]} \n'
for chunk in config.last_rag_chunks[:max(30, len(config.last_rag_chunks))]:
sources_msg += f'________________________________________\n'
sources_msg += f'####{chunk["metadata"]["document_name"]}\n'
sources_msg += f'{chunk["content"]}\n'
else:
sources_msg = "Aucune source trouvée, veuillez me poser une question d'abord."
except Exception:
Expand All @@ -380,6 +406,144 @@ async def albert_sources(ep: EventParser, matrix_client: MatrixClient):
await matrix_client.send_markdown_message(ep.room.room_id, sources_msg)


@register_feature(
group="albert",
onEvent=RoomMessageText,
command="collections",
help=AlbertMsg.shorts["collections"],
)
@only_allowed_user
async def albert_collection(ep: EventParser, matrix_client: MatrixClient):
config = user_configs[ep.sender]
await matrix_client.room_typing(ep.room.room_id)
command = ep.get_command()
if len(command) <= 1:
message = f"La commande !collections nécessite de donner list/use/unuse/info puis éventuellement <nom_de_collection>/{config.albert_all_public_command} :"
message += "\n\nExemple: `!collections use decisions-adlc`"
elif command[1] != 'list' and len(command) <= 2:
if command[1] not in ['use', 'unuse']:
message = f"La commande !collections {command[1]} n'est pas reconnue, seul list/use/unuse sont autorisés"
else:
message = f"La commande !collections {command[1]} nécessite de donner en plus COLLECTION_NAME/{config.albert_all_public_command} :"
message += "\n\nExemple: `!collections use decisions-adlc`"
else:
method = command[1]
if method == 'list':
collections = config.albert_collections_by_id.values()
collection_display_names = [c['name'] if c['name'] != ep.room.room_id else config.albert_my_private_collection_name for c in collections]
collection_ids = [c['id'] for c in collections]
collection_infos = '\n - ' + '\n - '.join([f"{display_name}" for display_name, collection_id in zip(collection_display_names, collection_ids)])
if not collections:
message = "Vous n'avez pas de collections enregistrées pour le moment qui pourraient m'aider à répondre à vos questions."
else:
message = (
"Les collections :\n"
f"{collection_infos}\n\n"
"sont prises en compte pour m'aider à répondre à vos questions."
)
collections = get_all_public_collections(config)
message += "\n\nNotez que les collections publiques à votre disposition sont:\n"
message += '\n - ' + '\n - '.join([f"{c['name']}" for c in collections])
message += f"\n\nVous pouvez toutes les ajouter d'un coup en utilisant la commande `!collections use {config.albert_all_public_command}`"
elif method == 'info':
collection_name = command[2] if command[2] != config.albert_my_private_collection_name else ep.room.room_id
collection = get_or_not_collection_with_name(config, collection_name)
if not collection:
message = f"La collection {collection_name} n'existe pas."
else:
document_infos = [f"{d['name']} ({d['id']})" for d in get_documents(config, collection['id'])]
if not document_infos:
message = (
f"Collection '{command[2]}' ({collection['id']}) : \n\n"
f"Aucun document n'est présent dans cette collection ({collection['id']})."
)
else:
document_infos_message = '\n - ' + '\n - '.join(document_infos)
message = (
f"Collection '{command[2]}' ({collection['id']}) : \n\n"
"Voici les documents actuellement présents dans la collection : \n\n"
f"{document_infos_message}"
"\n\n"
)
elif method == 'use':
if command[2] == config.albert_all_public_command:
collections = get_all_public_collections(config)
else:
collection = get_or_not_collection_with_name(config, command[2])
if not collection:
message = f"La collection {command[2]} n'existe pas."
collections = []
else:
collections = [collection]
if collections:
collection_names = ','.join([c['name'] for c in collections])
for collection in collections:
config.albert_collections_by_id[collection["id"]] = collection
collection_infos = '\n - ' + '\n - '.join([f"{c['name']}" for c in config.albert_collections_by_id.values()])
message = (
f"Les collections {collection_names} sont ajoutées à vos collections.\n\n" if len(collections) > 1 else f"La collection {command[2]} est ajoutée à vos collections.\n\n"
"Maintenant, les collections :\n"
f"{collection_infos}\n\n"
"sont disponibles pour m'aider à répondre à vos questions."
)
else:
collections = config.albert_collections_by_id.values()
collection_names = ','.join([c['name'] for c in collections])
config.albert_collections_by_id = {}
if not collections:
message = "Il n'y avait pas de collections à retirer."
else:
message = f"Les collections {collection_names} sont retirées de vos collections."
await matrix_client.send_markdown_message(ep.room.room_id, message, msgtype="m.notice")

@register_feature(
group="albert",
onEvent=RoomEncryptedFile,
help=None
)
@only_allowed_user
async def albert_document(ep: EventParser, matrix_client: MatrixClient):
config = user_configs[ep.sender]

try:
await matrix_client.room_typing(ep.room.room_id)
if ep.event.mimetype in ['application/json', 'application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document']:
config.update_last_activity()
config.albert_mode = "rag"
collection = get_or_create_collection_with_name(config, ep.room.room_id)
config.albert_collections_by_id[collection['id']] = collection
file = await get_decrypted_file(ep)
upload_file(config, file, collection['id'])
private_document_infos = [d['name'] for d in get_documents(config, collection['id'])]
private_document_infos_message = '\n - ' + '\n - '.join(private_document_infos)
response = (
"Votre document : \n\n"
f"\"{file.name}\"\n\n"
"a été chargé dans votre collection privée.\n\n"
"Voici les documents actuellement présents dans votre collection privée : \n\n"
f"{private_document_infos_message}"
"\n\n"
"Je tiendrai compte de tous ces documents pour répondre. \n\n"
"Vous pouvez taper \"!mode norag\" pour vider votre collection privée de tous ces documents."
)
else:
response = (
f"J'ai détecté que vous avez téléchargé un fichier {ep.event.mimetype}. "
"Ce fichier n'est pris en charge par Albert. "
"Veuillez téléverser un fichier PDF, DOCX ou JSON."
)
await matrix_client.send_markdown_message(ep.room.room_id, response, msgtype="m.notice")

except Exception as albert_err:
logger.error(f"{albert_err}")
traceback.print_exc()
await matrix_client.send_markdown_message(ep.room.room_id, AlbertMsg.failed, msgtype="m.notice")
if config.errors_room_id:
try:
await matrix_client.send_markdown_message(config.errors_room_id, AlbertMsg.error_debug(albert_err, config))
except:
print("Failed to find error room ?!")

@register_feature(
group="albert",
onEvent=RoomMessageText,
Expand All @@ -405,9 +569,11 @@ async def albert_answer(ep: EventParser, matrix_client: MatrixClient):
await matrix_client.send_markdown_message(
ep.room.room_id, reset_message, msgtype="m.notice"
)
flush_collections_with_name(config, ep.room.room_id)
config.albert_collections_by_id = {}

config.update_last_activity()
await matrix_client.room_typing(ep.room.room_id, typing_state=True, timeout=180_000)
await matrix_client.room_typing(ep.room.room_id)
try:
# Build the messages history
# --
Expand Down
6 changes: 5 additions & 1 deletion app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,22 @@ class Config(BaseConfig):
# ============================
# PER USER SETTINGS !
# ============================
albert_collections_by_id: dict[str, dict] = Field({}, description="Collections to use for Albert API chat completion with RAG")
albert_model: str = Field(
"AgentPublic/albertlight-7b",
description="Albert model name to use (see Albert models hub on HuggingFace)",
)
albert_model_embedding: str = Field("BAAI/bge-m3", description="Embedding model (Rag, COT, etc)")
albert_mode: str = Field("rag", description="Albert API mode")
albert_with_history: bool = Field(True, description="Conversational mode")
albert_history_lookup: int = Field(0, description="How far we lookup in the history")
albert_max_rewind: int = Field(20, description="Max history rewind for stability purposes")
albert_my_private_collection_name: str = Field("ma_collection_privée", description="Name of the private collection for the user")
albert_all_public_command: str = Field("<all_public>", description="Command to use to get all public collections")
conversation_obsolescence: int = Field(
15 * 60, description="time after which a conversation is considered obsolete, in seconds"
)
last_rag_references: list[dict] | None = Field(None, description="Last sources used for the RAG.")
last_rag_chunks: list[dict] | None = Field(None, description="Last chunks used for the RAG.")

@property
def is_conversation_obsolete(self) -> bool:
Expand Down
Loading

0 comments on commit 95f0126

Please sign in to comment.