Skip to content

Commit

Permalink
Compression amount now chunk-based, also versioned for PyPI
Browse files Browse the repository at this point in the history
  • Loading branch information
emcf committed May 17, 2024
1 parent e7059a3 commit a2aa550
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='thepipe_api',
version='0.3.4',
version='0.3.5',
author='Emmett McFarlane',
author_email='[email protected]',
description='Automate information extraction for multimodal LLMs.',
Expand Down
4 changes: 2 additions & 2 deletions thepipe_api/compressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
CTAGS_EXECUTABLE_PATH = "C:\ctags.exe" if os.name == 'nt' else "ctags-universal"
CTAGS_LANGUAGES = {'py': "Python", "cpp": "C++", "c": "C"}
CTAGS_OUTPUT_FILE = 'ctags_output.json'
MAX_COMPRESSION_ATTEMPTS = 10
MAX_COMPRESSION_ATTEMPTS = 25

def compress_with_ctags(chunk: Chunk, extension: str) -> Chunk:
if chunk.text is None:
Expand Down Expand Up @@ -96,7 +96,7 @@ def calculate_tokens(chunk: Chunk) -> int:

def compress_chunks(chunks: List[Chunk], verbose: bool = False, limit: Optional[int] = None) -> List[Chunk]:
new_chunks = chunks
for _ in range(MAX_COMPRESSION_ATTEMPTS):
for _ in range(min(MAX_COMPRESSION_ATTEMPTS, len(chunks))):
if count_tokens(new_chunks) <= limit:
break
if verbose: print_status(f"Compressing prompt ({count_tokens(chunks)} tokens / {limit} limit)", status='info')
Expand Down

0 comments on commit a2aa550

Please sign in to comment.