diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py index 68fb6d4ec..8c4a753b8 100644 --- a/src/pymovements/gaze/io.py +++ b/src/pymovements/gaze/io.py @@ -279,6 +279,7 @@ def from_asc( trial_columns: str | list[str] | None = None, add_columns: dict[str, str] | None = None, column_schema_overrides: dict[str, Any] | None = None, + encoding: str = 'ascii', ) -> GazeDataFrame: """Initialize a :py:class:`pymovements.gaze.GazeDataFrame`. @@ -307,6 +308,8 @@ def from_asc( column_schema_overrides: dict[str, Any] | None Dictionary containing types for columns. (default: None) + encoding: str + Text encoding of the file. (default: 'ascii') Returns ------- @@ -351,7 +354,11 @@ def from_asc( # Read data. gaze_data, metadata = parse_eyelink( - file, patterns=patterns, schema=schema, metadata_patterns=metadata_patterns, + file, + patterns=patterns, + schema=schema, + metadata_patterns=metadata_patterns, + encoding=encoding, ) if add_columns is not None: diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index e13160777..de71ce8a2 100755 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -180,6 +180,7 @@ def parse_eyelink( patterns: list[dict[str, Any] | str] | None = None, schema: dict[str, Any] | None = None, metadata_patterns: list[dict[str, Any] | str] | None = None, + encoding: str = 'ascii', ) -> tuple[pl.DataFrame, dict[str, Any]]: """Process EyeLink asc file. @@ -193,6 +194,8 @@ def parse_eyelink( Dictionary to optionally specify types of columns parsed by patterns. (default: None) metadata_patterns: list[dict[str, Any] | str] | None list of patterns to match for additional metadata. (default: None) + encoding: str + Text encoding of the file. (default: 'ascii') Returns ------- @@ -228,7 +231,7 @@ def parse_eyelink( **additional, } - with open(filepath, encoding='ascii') as asc_file: + with open(filepath, encoding=encoding) as asc_file: lines = asc_file.readlines() # will return an empty string if the key does not exist diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index cfd7adff5..6795014e5 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -889,3 +889,33 @@ def test_parse_eyelink_mount_config(tmp_path, metadata, expected_mount_config): _, parsed_metadata = pm.utils.parsing.parse_eyelink(filepath) assert parsed_metadata['mount_configuration'] == expected_mount_config + + +@pytest.mark.parametrize( + ('bytestring', 'encoding', 'expected_text'), + [ + pytest.param( + b'MSG 2154555 H\xe4user\n', + 'latin1', + 'Häuser', + id='latin1', + ), + pytest.param( + b'MSG 2154555 H\xc3\xa4user\n', + 'utf-8', + 'Häuser', + id='utf-8', + ), + ], +) +def test_parse_eyelink_encoding(tmp_path, bytestring, encoding, expected_text): + filepath = tmp_path / 'sub.asc' + filepath.write_bytes(bytestring) + + _, parsed_metadata = pm.utils.parsing.parse_eyelink( + filepath, + metadata_patterns=[r'(?P.+)'], + encoding=encoding, + ) + + assert parsed_metadata['text'] == expected_text