Skip to content

Feature summarize to doc #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: next
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .hydra-conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,7 @@ services_broker:
url: ${oc.env:SERVICES_BROKER,redis://localhost:6379}
password: ${oc.env:BROKER_PASS,EMPTY}

document:
min_chunk_size: ${oc.decode:${oc.env:MIN_CHUNK_SIZE,null}}

debug: false
9 changes: 7 additions & 2 deletions .hydra-conf/services/fr.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
fr:
type: summary
type: document
fields: 2
name: summarize-fr
route: summarize-fr
Expand All @@ -20,4 +20,9 @@ fr:
reduceSummary: false
consolidateSummary: false
reduce_prompt: null
type: abstractive
type: markdown
document:
template_path: ./templates/template.md
max_sentences_title_generation: ${oc.decode:${oc.env:MAX_SENTENCES_TITLE_GENERATION,6}}
doc_title_prompt: ./prompts/doc_title_prompt.txt
paragraph_title_prompt: ./prompts/paragraph_title_prompt.txt
20 changes: 11 additions & 9 deletions app/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@ def __init__(self, task_data):
self.logger.info(f"Setting up backend with params: {task_data['backendParams']} for task: {task_data['task_id']}")
self.task_id = task_data['task_id']
self.content = task_data['content']
self.name = task_data['name']
self.task_type = task_data['type']
self.promptFields = task_data['fields']
try:
# Load prompt from txt file
self.loadPrompt(task_data["type"], task_data["fields"])
self.loadPrompt()

# Set default values for all attributes
for key, default_value in cfg.backend_defaults.items():
Expand All @@ -56,12 +59,12 @@ def __init__(self, task_data):
setattr(self, key, value)

# Set up tokenizer and chunker
self.tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
self.tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer", legacy=False)
self.prompt_token_count = len(self.tokenizer(self.prompt)['input_ids'])
self.chunker = Chunker(self.tokenizer, self.createNewTurnAfter)

if (task_data["backendParams"]['reduceSummary'] == True) and (task_data["backendParams"]["reduce_prompt"] is not None):
self.load_reduce_prompt(task_data["type"], task_data["backendParams"]["reduce_prompt"])
self.load_reduce_prompt(task_data["name"], task_data["backendParams"]["reduce_prompt"])
else :
self.reduce_prompt = None

Expand All @@ -71,20 +74,19 @@ def __init__(self, task_data):
self.logger.error(f"Error setting up backend: {e}")
raise e

def loadPrompt(self, service_name: str, fieldCount: int = 0):
def loadPrompt(self):
"""
Loads the prompt from a text file and sets the prompt fields.

Args:
service_name (str): The name of the service to load the prompt for.
fieldCount (int, optional): The number of prompt fields. Defaults to 0.
"""
self.logger.info(f"Loading prompt for service: {service_name}")
self.promptFields = fieldCount
self.logger.info(f"Loading prompt for service: {self.name}")
self.logger.info(f"Prompt fields: {self.promptFields}")

# Construct path to the prompt text file
txt_filepath = os.path.join(cfg.prompt_path,f'{service_name}.txt')
txt_filepath = os.path.join(cfg.prompt_path,f'{self.name}.txt')
try:
with open(txt_filepath, 'r') as f:
# Prevent file system caching
Expand All @@ -95,8 +97,8 @@ def loadPrompt(self, service_name: str, fieldCount: int = 0):
self.logger.error(f"Error loading prompt from {txt_filepath}: {e}")
raise e

def load_reduce_prompt(self, service_name: str, reduce_prompt: str):
self.logger.info(f"Loading reduce prompt for service: {service_name}")
def load_reduce_prompt(self, reduce_prompt: str):
self.logger.info(f"Loading reduce prompt for service: {self.name}")

# Construct path to the prompt text file
txt_filepath = os.path.join(cfg.prompt_path,f'{reduce_prompt}.txt')
Expand Down
149 changes: 149 additions & 0 deletions app/backends/document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from pydantic import BaseModel
import asyncio
import jinja2
from typing import List, Sequence
from pathlib import Path
import os

import logging

from langchain_experimental.text_splitter import SemanticChunker
from langchain_huggingface import HuggingFaceEmbeddings
import re
from conf import cfg_instance

# Load configuration
cfg = cfg_instance(cfg_name="config")
semantic_chunker = SemanticChunker(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2"), min_chunk_size=cfg.document.min_chunk_size)
class Chapter(BaseModel):
title: str
content: str

class DocGenerator:
def __init__(self, task_data, llm_adapter) -> None:
self.document_config = task_data['backendParams']["document"]
self.llm_adapter = llm_adapter
self.chunker = semantic_chunker
self.template_path = self.document_config["template_path"]
self.template_file = Path(self.template_path).name
self.max_sentences_title_generation = self.document_config["max_sentences_title_generation"]
self.semaphore = asyncio.Semaphore(cfg.semaphore.max_concurrent_inferences)
self.logger = logging.getLogger("DocGenerator")
# System prompts
self.load_prompts(task_data)

def load_prompts(self, task_data):
"""
Loads the title generation prompts from text files.
"""

# Construct path to the prompt text files from task config
doc_title_prompt_path = self.document_config["doc_title_prompt"]
paragraph_title_prompt_path = self.document_config["paragraph_title_prompt"]
try:
with open(doc_title_prompt_path, 'r') as f:
# Prevent file system caching
os.fsync(f.fileno())
self.doc_title_prompt = f.read()
with open(paragraph_title_prompt_path, 'r') as f:
# Prevent file system caching
os.fsync(f.fileno())
self.paragraph_title_prompt = f.read()
except Exception as e:
self.logger.error(f"Error loading title generation prompts: {e}")
raise e

def split_summary_into_paragraphs(self, summary):
"""
Splits the provided summary text into paragraphs.
Args:
summary (str): The summary text to be split into paragraphs.
Returns:
list: A list of paragraphs obtained from the summary text.
"""

return self.chunker.split_text(summary)


async def create_chapters(self, paragraphs: List[str]) -> List:
"""
Asynchronously creates chapters from a list of paragraphs.
This method processes each paragraph, generates a title using the first max_sentences_title_generation sentences,
and creates a Chapter object with the generated title and the paragraph content.
Args:
paragraphs (List[str]): A list of paragraphs to be converted into chapters.
Returns:
Sequence[Chapter]: A sequence of Chapter objects created from the paragraphs.
"""

chapters: List[Chapter] = []

# Process each paragraph
async def process_paragraph(paragraph: str):
async with self.semaphore:
sentences = re.split(self.chunker.sentence_split_regex, paragraph)[:self.max_sentences_title_generation]
title = await self.llm_adapter.async_publish(' '.join(sentences), system_prompt=self.paragraph_title_prompt, temperature=0.5, max_tokens=20)
return Chapter(title=title, content=paragraph)

tasks = [process_paragraph(paragraph) for paragraph in paragraphs]

# Generate chapters
chapters = await asyncio.gather(*tasks)

# Generate document title based on chapter titles
chapter_titles = [chapter.title for chapter in chapters]
title = await self.llm_adapter.async_publish('\n'.join(chapter_titles), system_prompt=self.doc_title_prompt, temperature=0.5, max_tokens=20)
return title, chapters


def render_document(self, title: str, chapters: Sequence[Chapter]) -> str:
"""
Renders a document using a Jinja template.
Args:
title (str): The title of the document.
chapters (Sequence[Chapter]): A sequence of Chapter objects to be included in the document.
Returns:
str: The rendered document as a string.
"""

chapters_to_render = [
{
"title": chapter.title,
"paragraph": chapter.content,
}
for chapter in chapters if chapter.content
]

# Load Jinja template
current_dir = Path(self.template_path).parent
jinja_environment = jinja2.Environment(loader=jinja2.FileSystemLoader(current_dir))
template = jinja_environment.get_template(self.template_file)

# Render template
rendered = template.render(
title=title,
chapters=chapters_to_render,
)

return rendered

def run(self, summary: str) -> List[Chapter]:
"""
Pipeline to split a summary into paragraphs and segment it into chapters then output a markdown document.
Args:
summary (str): The input summary to process.
Returns:
List[Chapter]: A list of segmented chapters.
"""
self.logger.info(f"Running document generation")

# Split summary into paragraphs
paragraphs = self.split_summary_into_paragraphs(summary)

# Create chapters
title, chapters = asyncio.run(self.create_chapters(paragraphs))

# Render document
return self.render_document(title, chapters)

#TODO add min_chunk_size to the config and using in the semantic split
12 changes: 9 additions & 3 deletions app/backends/llm_inference.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import asyncio
from .backend import LLMBackend
from .batch_manager import BatchManager

from .document import DocGenerator

class LLMInferenceEngine(LLMBackend):
def __init__(self, task_data: dict, celery_task):
super().__init__(task_data)
self.batch_manager = BatchManager(task_data,self.tokenizer, self.prompt, self.prompt_token_count, self.reduce_prompt, celery_task)

if self.task_type == "document":
self.doc_generator = DocGenerator(task_data, self.batch_manager.openai_adapter)

def run(self) -> str:
"""
Expand Down Expand Up @@ -44,5 +45,10 @@ def run(self) -> str:
# consolidate turns for progressive summary
if self.consolidateSummary:
self.summary = self.chunker.consolidate_turns(self.summary)

self.summary = self.batch_manager.format_summary(self.summary)

if self.task_type == "document":
self.summary = self.doc_generator.run(self.summary)

return self.batch_manager.format_summary(self.summary)
return self.summary
72 changes: 58 additions & 14 deletions app/backends/openai_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from typing import List
from tenacity import retry, stop_after_attempt, wait_random_exponential
from conf import cfg_instance
import typing
from pydantic import BaseModel, AfterValidator
import json

cfg = cfg_instance(cfg_name="config")

Expand All @@ -21,7 +24,14 @@ def __init__(self, task_data: dict):


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(cfg.api_params.max_retries))
def publish(self, content: str) -> str:
def publish(
self,
content: str,
system_prompt: typing.Optional[str] = None,
temperature: typing.Optional[float] = None,
top_p: typing.Optional[float] = None,
max_tokens: typing.Optional[int] = None
) -> str:
"""
Sync publishes a message to the OpenAI chat model and returns the response.
Args:
Expand All @@ -30,21 +40,31 @@ def publish(self, content: str) -> str:
str: The response content from the chat model if successful.
None: If an error occurs during the publishing process.
"""

# Add system prompt if provided
messages = [{"role": "system", "content": system_prompt}] if system_prompt else []

# Add user message
messages.append({"role": "user", "content": content})
chat_response = self.client.chat.completions.create(
model=self.modelName,

messages=[
{"role": "user", "content": content}
],
temperature=self.temperature,
top_p=self.top_p,
max_tokens=self.maxGenerationLength
messages=messages,
temperature=temperature if temperature is not None else self.temperature,
top_p=top_p if top_p is not None else self.top_p,
max_tokens=max_tokens if max_tokens is not None else self.maxGenerationLength
)
return chat_response.choices[0].message.content


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(cfg.api_params.max_retries))
async def async_publish(self, content: str) -> str:
async def async_publish(
self,
content: str,
system_prompt: typing.Optional[str] = None,
temperature: typing.Optional[float] = None,
top_p: typing.Optional[float] = None,
max_tokens: typing.Optional[int] = None
) -> str:
"""
Async publishes a message to the OpenAI chat model and returns the response.
Args:
Expand All @@ -53,11 +73,35 @@ async def async_publish(self, content: str) -> str:
str: The response content from the chat model if successful.
None: If an error occurs during the publishing process.
"""

# Add system prompt if provided
messages = [{"role": "system", "content": system_prompt}] if system_prompt else []

# Add user message
messages.append({"role": "user", "content": content})

chat_response = await self.async_client.chat.completions.create(
model=self.modelName,
messages=[{"role": "user", "content": content}],
temperature=self.temperature,
top_p=self.top_p,
max_tokens=self.maxGenerationLength
messages=messages,
temperature=temperature if temperature is not None else self.temperature,
top_p=top_p if top_p is not None else self.top_p,
max_tokens=max_tokens if max_tokens is not None else self.maxGenerationLength
)
return chat_response.choices[0].message.content


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(cfg.api_params.max_retries))
async def generate_title(self, text : str) -> str:
messages = [
{"role": "system", "content": "Please generate a short title for the following text.\n\nBe VERY SUCCINCT. No more than 6 words."},
{"role": "user", "content": text},
]


response = await self.async_client.chat.completions.create(
model=self.modelName,
messages=messages,
max_tokens=20,
temperature=0.5,
)
return chat_response.choices[0].message.content
return response.choices[0].message.content.strip()
3 changes: 2 additions & 1 deletion app/http_server/ingress.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ async def generate(file: UploadFile = None, flavor: str = Form(...), temperature

task_data = {
"backend": service['backend'],
"type": service['name'],
"name": service['name'],
"type": service['type'],
"backendParams": backend_params,
"fields": service['fields'],
"content": content
Expand Down
Loading