Skip to content

Commit

Permalink
Merge pull request #10 from emcf/dotenv-support
Browse files Browse the repository at this point in the history
Prompt formatting fixes, dotenv
  • Loading branch information
emcf authored Apr 19, 2024
2 parents 3836f0f + 5d95089 commit 559ec55
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 3 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ requests
pillow
cssutils
beautifulsoup4
magika
magika
python-dotenv
3 changes: 2 additions & 1 deletion requirements_local.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ torch
llmlingua
PyMuPDF
pdf2image
python-magic
python-magic
python-dotenv
5 changes: 5 additions & 0 deletions thepipe_api/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from .core import Chunk, print_status, SourceTypes, create_chunks_from_messages, API_URL
import tempfile
import mimetypes
import dotenv
dotenv.load_dotenv()

FILES_TO_IGNORE = {'package-lock.json', '.gitignore', '.bin', '.pyc', '.pyo', '.exe', '.bat', '.dll', '.obj', '.o', '.a', '.lib', '.so', '.dylib', '.ncb', '.sdf', '.suo', '.pdb', '.idb', '.pyd', '.ipynb_checkpoints', '.npy', '.pth'} # Files to ignore, please feel free to customize!
CODE_EXTENSIONS = {'.h', '.json', '.js', '.jsx', '.ts', '.tsx', '.cs', '.java', '.html', '.css', '.ini', '.xml', '.yaml', '.xaml', '.sh'} # Plaintext files that should not be compressed with LLMLingua
Expand Down Expand Up @@ -62,6 +64,9 @@ def extract_from_file(file_path: str, source_type: str, verbose: bool = False, a
if 'error' in response:
raise ValueError(f"{response['error']}")
chunks = create_chunks_from_messages(response['messages'])
for c in chunks:
c.path = file_path
c.source_type = source_type
return chunks
try:
if source_type == SourceTypes.PDF:
Expand Down
2 changes: 1 addition & 1 deletion thepipe_api/thepipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def save_outputs(chunks: List[Chunk], verbose: bool = False, text_only: bool = F
if chunk is None:
continue
if chunk.text is not None:
text += f"""```\n{chunk.text}\n```\n\n"""
text += chunk.text
if (chunk.image is not None) and (not text_only):
if chunk.path is None:
clean_path = f"image"
Expand Down

0 comments on commit 559ec55

Please sign in to comment.