Skip to content

Commit

Permalink
fixed json datetime encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
emcf committed Apr 27, 2024
1 parent da82427 commit f00d7cb
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='thepipe_api',
version='0.3.1',
version='0.3.2',
author='Emmett McFarlane',
author_email='[email protected]',
description='Automate information extraction for multimodal LLMs.',
Expand Down
10 changes: 9 additions & 1 deletion thepipe_api/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@
import tempfile
import mimetypes
import dotenv
from datetime import datetime
from json import JSONEncoder
dotenv.load_dotenv()

class JSONDateEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return JSONEncoder.default(self, obj)

FILES_TO_IGNORE = {'package-lock.json', '.gitignore', '.bin', '.pyc', '.pyo', '.exe', '.bat', '.dll', '.obj', '.o', '.a', '.lib', '.so', '.dylib', '.ncb', '.sdf', '.suo', '.pdb', '.idb', '.pyd', '.ipynb_checkpoints', '.npy', '.pth'} # Files to ignore, please feel free to customize!
CODE_EXTENSIONS = {'.h', '.json', '.js', '.jsx', '.ts', '.tsx', '.cs', '.java', '.html', '.css', '.ini', '.xml', '.yaml', '.xaml', '.sh'} # Plaintext files that should not be compressed with LLMLingua
CTAGS_CODE_EXTENSIONS = {'.c', '.cpp', '.py'} # code files that work with ctags
Expand Down Expand Up @@ -244,7 +252,7 @@ def extract_spreadsheet(file_path: str) -> Chunk:
elif file_path.endswith(".xls") or file_path.endswith(".xlsx"):
df = pd.read_excel(file_path)
dict = df.to_dict(orient='records')
json_dict = json.dumps(dict, indent=4)
json_dict = json.dumps(dict, indent=4, cls=JSONDateEncoder)
return Chunk(path=file_path, text=json_dict, image=None, source_type=SourceTypes.SPREADSHEET)

def extract_url(url: str, text_only: bool = False, local: bool = True, limit: int = None) -> List[Chunk]:
Expand Down

0 comments on commit f00d7cb

Please sign in to comment.