Skip to content

Commit

Permalink
passing local to dir extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
emcf committed Apr 16, 2024
1 parent 973b897 commit 1afbb47
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='thepipe_api',
version='0.2.2',
version='0.2.3',
author='Emmett McFarlane',
author_email='[email protected]',
description='Automate information extraction for multimodal LLMs.',
Expand Down
6 changes: 3 additions & 3 deletions thepipe_api/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def extract_from_source(source: str, match: Optional[str] = None, ignore: Option
if source_type == SourceTypes.DIR or source == '.' or source == './':
if source == '.' or source == './':
source = os.getcwd()
return extract_from_directory(dir_path=source, match=match, ignore=ignore, verbose=verbose, ai_extraction=ai_extraction, text_only=text_only, limit=limit)
return extract_from_directory(dir_path=source, match=match, ignore=ignore, verbose=verbose, ai_extraction=ai_extraction, text_only=text_only, limit=limit, local=local)
elif source_type == SourceTypes.GITHUB:
return extract_github(github_url=source, file_path='', match=match, ignore=ignore, text_only=text_only, verbose=verbose, ai_extraction=ai_extraction, branch='master')
elif source_type == SourceTypes.URL:
Expand Down Expand Up @@ -148,13 +148,13 @@ def should_ignore(file_path: str, ignore: Optional[str] = None) -> bool:
return True
return False

def extract_from_directory(dir_path: str, match: Optional[str] = None, ignore: Optional[str] = None, verbose: bool = False, ai_extraction: bool = False, text_only: bool = False, limit: int = None) -> List[Chunk]:
def extract_from_directory(dir_path: str, match: Optional[str] = None, ignore: Optional[str] = None, verbose: bool = False, ai_extraction: bool = False, text_only: bool = False, limit: int = None, local: bool = True) -> List[Chunk]:
all_files = glob.glob(dir_path + "/**/*", recursive=True)
matched_files = [file for file in all_files if re.search(match, file, re.IGNORECASE)] if match else all_files
file_paths = [file for file in matched_files if not should_ignore(file, ignore)]
contents = []
with ThreadPoolExecutor() as executor:
results = executor.map(lambda file_path: extract_from_source(source=file_path, match=match, ignore=ignore, verbose=verbose, ai_extraction=ai_extraction, text_only=text_only, limit=limit), file_paths)
results = executor.map(lambda file_path: extract_from_source(source=file_path, match=match, ignore=ignore, verbose=verbose, ai_extraction=ai_extraction, text_only=text_only, limit=limit, local=local), file_paths)
for result in results:
contents += result
return contents
Expand Down

0 comments on commit 1afbb47

Please sign in to comment.