diff --git a/app/server/embedding.py b/app/server/embedding.py index 79e4f0c..1ecde0c 100644 --- a/app/server/embedding.py +++ b/app/server/embedding.py @@ -109,7 +109,7 @@ def split_document( ################################## # Splitters - End ################################## - match extension: + match extension.lower(): case "pdf": doc_split = text_splitter.split_documents(document) case "html": @@ -151,7 +151,7 @@ def load_and_split_documents( stat = os.stat(file) extension = os.path.splitext(file)[1][1:] logger.info("Loading %s (%i bytes)", name, stat.st_size) - match extension: + match extension.lower(): case "pdf": loader = document_loaders.PyPDFLoader(file) case "html": @@ -161,7 +161,7 @@ def load_and_split_documents( case "csv": loader = document_loaders.CSVLoader(file) case _: - logger.error("Un-supported file extension: %s", extension) + raise ValueError(f"{extension} is not a supported file extension") loaded_doc = loader.load() logger.info("Loaded Pages: %i", len(loaded_doc)) diff --git a/app/server/endpoints.py b/app/server/endpoints.py index c8ba30f..df6b8bc 100644 --- a/app/server/endpoints.py +++ b/app/server/endpoints.py @@ -457,6 +457,8 @@ async def split_embed( ) return_files = list({doc.metadata["filename"] for doc in split_docos if "filename" in doc.metadata}) return schema.Response[list](data=return_files, msg=f"{len(split_docos)} chunks embedded.") + except ValueError as ex: + raise HTTPException(status_code=500, detail=str(ex)) from ex except Exception as ex: logger.error("An exception occurred: %s", ex) raise HTTPException(status_code=500, detail="Unexpected error") from ex