From f00d7cbb81c2be6de9df7b46619f18001d77f297 Mon Sep 17 00:00:00 2001 From: Emmett McFaralne Date: Fri, 26 Apr 2024 22:15:33 -0400 Subject: [PATCH] fixed json datetime encoding --- setup.py | 2 +- thepipe_api/extractor.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 87ef909..9bec88e 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='thepipe_api', - version='0.3.1', + version='0.3.2', author='Emmett McFarlane', author_email='emmett@thepi.pe', description='Automate information extraction for multimodal LLMs.', diff --git a/thepipe_api/extractor.py b/thepipe_api/extractor.py index 2dae60e..d88128d 100644 --- a/thepipe_api/extractor.py +++ b/thepipe_api/extractor.py @@ -15,8 +15,16 @@ import tempfile import mimetypes import dotenv +from datetime import datetime +from json import JSONEncoder dotenv.load_dotenv() +class JSONDateEncoder(JSONEncoder): + def default(self, obj): + if isinstance(obj, datetime): + return obj.isoformat() + return JSONEncoder.default(self, obj) + FILES_TO_IGNORE = {'package-lock.json', '.gitignore', '.bin', '.pyc', '.pyo', '.exe', '.bat', '.dll', '.obj', '.o', '.a', '.lib', '.so', '.dylib', '.ncb', '.sdf', '.suo', '.pdb', '.idb', '.pyd', '.ipynb_checkpoints', '.npy', '.pth'} # Files to ignore, please feel free to customize! CODE_EXTENSIONS = {'.h', '.json', '.js', '.jsx', '.ts', '.tsx', '.cs', '.java', '.html', '.css', '.ini', '.xml', '.yaml', '.xaml', '.sh'} # Plaintext files that should not be compressed with LLMLingua CTAGS_CODE_EXTENSIONS = {'.c', '.cpp', '.py'} # code files that work with ctags @@ -244,7 +252,7 @@ def extract_spreadsheet(file_path: str) -> Chunk: elif file_path.endswith(".xls") or file_path.endswith(".xlsx"): df = pd.read_excel(file_path) dict = df.to_dict(orient='records') - json_dict = json.dumps(dict, indent=4) + json_dict = json.dumps(dict, indent=4, cls=JSONDateEncoder) return Chunk(path=file_path, text=json_dict, image=None, source_type=SourceTypes.SPREADSHEET) def extract_url(url: str, text_only: bool = False, local: bool = True, limit: int = None) -> List[Chunk]: