We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
To reproduce the issue:
from adam_core.orbits import Orbits from adam_core.utils.helpers import make_real_orbits orbits = make_real_orbits() orbits = orbits.set_column("coordinates.covariance.values", None) orbits.to_parquet("orbits.parquet") orbits = Orbits.from_parquet("orbits.parquet")
Which raises a ArrowInvalidError:
--------------------------------------------------------------------------- ArrowInvalid Traceback (most recent call last) Cell In[6], line 1 ----> 1 orbits.from_parquet("test.parquet") File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/quivr/tables.py:797, in Table.from_parquet(cls, path, memory_map, pq_buffer_size, filters, column_name_map, validate, **kwargs) 765 @classmethod 766 def from_parquet( 767 cls, (...) 774 **kwargs: AttributeValueType, 775 ) -> Self: 776 """Read a table from a Parquet file. 777 778 :param path: The path to the Parquet file. (...) 795 796 """ --> 797 table = cls._load_parquet_table( 798 path=path, 799 memory_map=memory_map, 800 pq_buffer_size=pq_buffer_size, 801 filters=filters, 802 column_name_map=column_name_map, 803 ) 804 return cls.from_pyarrow(table=table, validate=validate, permit_nulls=False, **kwargs) File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/quivr/tables.py:830, in Table._load_parquet_table(cls, path, memory_map, pq_buffer_size, filters, column_name_map) 827 column_names = [field.name for field in cls.schema] 828 schema = cls.schema --> 830 table = pyarrow.parquet.read_table( 831 source=path, 832 columns=column_names, 833 memory_map=memory_map, 834 buffer_size=pq_buffer_size, 835 filters=filters, 836 schema=schema, 837 ) 838 md = pyarrow.parquet.read_metadata(path, memory_map=memory_map) 839 table = table.replace_schema_metadata(md.metadata) File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/pyarrow/parquet/core.py:3002, in read_table(source, columns, use_threads, metadata, schema, use_pandas_metadata, read_dictionary, memory_map, buffer_size, partitioning, filesystem, filters, use_legacy_dataset, ignore_prefixes, pre_buffer, coerce_int96_timestamp_unit, decryption_properties, thrift_string_size_limit, thrift_container_size_limit) 2991 # TODO test that source is not a directory or a list 2992 dataset = ParquetFile( 2993 source, metadata=metadata, read_dictionary=read_dictionary, 2994 memory_map=memory_map, buffer_size=buffer_size, (...) 2999 thrift_container_size_limit=thrift_container_size_limit, 3000 ) -> 3002 return dataset.read(columns=columns, use_threads=use_threads, 3003 use_pandas_metadata=use_pandas_metadata) 3005 warnings.warn( 3006 "Passing 'use_legacy_dataset=True' to get the legacy behaviour is " 3007 "deprecated as of pyarrow 8.0.0, and the legacy implementation will " 3008 "be removed in a future version.", 3009 FutureWarning, stacklevel=2) 3011 if ignore_prefixes is not None: File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/pyarrow/parquet/core.py:2630, in _ParquetDatasetV2.read(self, columns, use_threads, use_pandas_metadata) 2622 index_columns = [ 2623 col for col in _get_pandas_index_columns(metadata) 2624 if not isinstance(col, dict) 2625 ] 2626 columns = ( 2627 list(columns) + list(set(index_columns) - set(columns)) 2628 ) -> 2630 table = self._dataset.to_table( 2631 columns=columns, filter=self._filter_expression, 2632 use_threads=use_threads 2633 ) 2635 # if use_pandas_metadata, restore the pandas metadata (which gets 2636 # lost if doing a specific `columns` selection in to_table) 2637 if use_pandas_metadata: File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/pyarrow/_dataset.pyx:556, in pyarrow._dataset.Dataset.to_table() File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/pyarrow/_dataset.pyx:3638, in pyarrow._dataset.Scanner.to_table() File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/pyarrow/error.pxi:144, in pyarrow.lib.pyarrow_internal_check_status() File ~/software/anaconda3/envs/thor_py310/lib/python3.10/site-packages/pyarrow/error.pxi:100, in pyarrow.lib.check_status() ArrowInvalid: Expected all lists to be of size=36 but index 1 had size=0
The text was updated successfully, but these errors were encountered:
This is ultimately caused by apache/arrow#35692.
Sorry, something went wrong.
No branches or pull requests
To reproduce the issue:
Which raises a ArrowInvalidError:
The text was updated successfully, but these errors were encountered: