From 87343ec7fdfdf19aae3467efc00cf8ff5eea2b0d Mon Sep 17 00:00:00 2001 From: Emmett McFaralne Date: Sat, 20 Jul 2024 21:10:35 -0400 Subject: [PATCH] imcrease default max pages to use max allocation of gpus --- thepipe/scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thepipe/scraper.py b/thepipe/scraper.py index 0394430..3093d6b 100644 --- a/thepipe/scraper.py +++ b/thepipe/scraper.py @@ -149,7 +149,7 @@ def scrape_zip(file_path: str, include_regex: Optional[str] = None, verbose: boo def scrape_pdf(file_path: str, ai_extraction: bool = False, text_only: bool = False, verbose: bool = False) -> List[Chunk]: chunks = [] BATCH_SIZE = 16 - MAX_PAGES = 100 + MAX_PAGES = 128 if ai_extraction: # if using AI extraction, for each page, generate markdown and cropped figures