diff --git a/src/pymovements/gaze/_utils/parsing.py b/src/pymovements/gaze/_utils/parsing.py index 96d5cfad2..fa09c9b85 100755 --- a/src/pymovements/gaze/_utils/parsing.py +++ b/src/pymovements/gaze/_utils/parsing.py @@ -69,11 +69,27 @@ r'(?P\d.\d\d)\s+max', ) +# TODO: support all EFIX/ESACC/EBLINK formats (optional angular position) +FIXATION_START_REGEX = re.compile(r'SFIX\s+(R|L)\s+(?P(\d+[.]?\d*))\s*') +FIXATION_STOP_REGEX = re.compile( + r'EFIX\s+(R|L)\s+(?P(\d+[.]?\d*))\s+' + r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s+' + r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s*', +) +SACCADE_START_REGEX = re.compile(r'SSACC\s+(R|L)\s+(?P(\d+[.]?\d*))\s*') +SACCADE_STOP_REGEX = re.compile( + r'ESACC\s+(R|L)\s+(?P(\d+[.]?\d*))\s+' + r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s+' + r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s+' + r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s+' + r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s*', +) BLINK_START_REGEX = re.compile(r'SBLINK\s+(R|L)\s+(?P(\d+[.]?\d*))\s*') BLINK_STOP_REGEX = re.compile( r'EBLINK\s+(R|L)\s+(?P(\d+[.]?\d*))\s+' r'(?P(\d+[.]?\d*))\s+(?P(\d+[.]?\d*))\s*', ) + INVALID_SAMPLE_REGEX = re.compile( r'(?P(\d+[.]?\d*))\s+\.\s+\.\s+(?P0\.0)?\s+0\.0\s+\.\.\.\s*', ) @@ -175,13 +191,37 @@ def get_pattern_keys(compiled_patterns: list[dict[str, Any]], pattern_key: str) return keys +def parse_eyelink_event_start(line: str) -> str | None: + """Check if the line contains the start of an event and return the event name.""" + if FIXATION_START_REGEX.match(line): + return 'fixation' + if SACCADE_START_REGEX.match(line): + return 'saccade' + if BLINK_START_REGEX.match(line): + return 'blink' + return None + + +def parse_eyelink_event_end(line: str) -> tuple[str, float, float] | None: + """Check if the line contains the start of an event and return the event name and times.""" + if match := FIXATION_STOP_REGEX.match(line): + return 'fixation', float( + match.group('timestamp_start'), + ), float(match.group('timestamp_end')) + if match := SACCADE_STOP_REGEX.match(line): + return 'saccade', float(match.group('timestamp_start')), float(match.group('timestamp_end')) + if match := BLINK_STOP_REGEX.match(line): + return 'blink', float(match.group('timestamp_start')), float(match.group('timestamp_end')) + return None + + def parse_eyelink( filepath: Path | str, patterns: list[dict[str, Any] | str] | None = None, schema: dict[str, Any] | None = None, metadata_patterns: list[dict[str, Any] | str] | None = None, encoding: str = 'ascii', -) -> tuple[pl.DataFrame, dict[str, Any]]: +) -> tuple[pl.DataFrame, pl.DataFrame, dict[str, Any]]: """Parse EyeLink asc file. Parameters @@ -199,8 +239,8 @@ def parse_eyelink( Returns ------- - tuple[pl.DataFrame, dict[str, Any]] - A tuple containing the parsed sample data and the metadata in a dictionary. + tuple[pl.DataFrame, pl.DataFrame, dict[str, Any]] + A tuple containing the parsed gaze sample data, the parsed event data, and the metadata. Raises ------ @@ -216,19 +256,25 @@ def parse_eyelink( compiled_metadata_patterns = compile_patterns(metadata_patterns) additional_columns = get_pattern_keys(compiled_patterns, 'column') - additional: dict[str, list[Any]] = { - additional_column: [] for additional_column in additional_columns - } current_additional = { additional_column: None for additional_column in additional_columns } + current_event_additional: dict[str, dict[str, Any]] = { + 'fixation': {}, 'saccade': {}, 'blink': {}, + } samples: dict[str, list[Any]] = { 'time': [], 'x_pix': [], 'y_pix': [], 'pupil': [], - **additional, + **{additional_column: [] for additional_column in additional_columns}, + } + events: dict[str, list[Any]] = { + 'name': [], + 'onset': [], + 'offset': [], + **{additional_column: [] for additional_column in additional_columns}, } with open(filepath, encoding=encoding) as asc_file: @@ -248,6 +294,7 @@ def parse_eyelink( validations = [] calibrations = [] + # TODO: remove blink metadata blinks = [] invalid_samples = [] @@ -284,20 +331,37 @@ def parse_eyelink( ) cal_timestamp = '' - elif BLINK_START_REGEX.match(line): - blink = True - - elif match := BLINK_STOP_REGEX.match(line): - blink = False - parsed_blink = match.groupdict() - blink_info = { - 'start_timestamp': float(parsed_blink['timestamp_start']), - 'stop_timestamp': float(parsed_blink['timestamp_end']), - 'duration_ms': float(parsed_blink['duration_ms']), - 'num_samples': num_blink_samples, - } - num_blink_samples = 0 - blinks.append(blink_info) + elif event_name := parse_eyelink_event_start(line): + current_event_additional[event_name] = {**current_additional} + # TODO: remove + if BLINK_START_REGEX.match(line): + blink = True + + elif event := parse_eyelink_event_end(line): + event_name, event_onset, event_offset = event + events['name'].append(f'{event_name}_eyelink') + events['onset'].append(event_onset) + events['offset'].append(event_offset) + + for additional_column in additional_columns: + events[additional_column].append( + current_event_additional[event_name][additional_column], + ) + current_event_additional[event_name] = {} + + # TODO: remove + if event_name == 'blink': + blink = False + blink_info = { + 'start_timestamp': event_onset, + 'stop_timestamp': event_offset, + # TODO: https://www.sr-research.com/support/thread-9411.html + # 'duration_ms': float(parsed_blink['duration_ms']), + 'duration_ms': float(event_offset - event_onset), + 'num_samples': num_blink_samples, + } + num_blink_samples = 0 + blinks.append(blink_info) elif match := START_RECORDING_REGEX.match(line): start_recording_timestamp = match.groupdict()['timestamp'] @@ -375,18 +439,27 @@ def parse_eyelink( pre_processed_metadata['data_loss_ratio_blinks'] = data_loss_ratio_blinks pre_processed_metadata['total_recording_duration_ms'] = total_recording_duration - schema_overrides = { + gaze_schema_overrides = { 'time': pl.Float64, 'x_pix': pl.Float64, 'y_pix': pl.Float64, 'pupil': pl.Float64, } if schema is not None: - schema_overrides.update(schema) + gaze_schema_overrides.update(schema) + + event_schema_overrides = { + 'name': pl.String, + 'onset': pl.Float64, + 'offset': pl.Float64, + } + if schema is not None: + event_schema_overrides.update(schema) - df = pl.from_dict(data=samples).cast(schema_overrides) + gaze_df = pl.from_dict(data=samples).cast(gaze_schema_overrides) + event_df = pl.from_dict(data=events).cast(event_schema_overrides) - return df, pre_processed_metadata + return gaze_df, event_df, pre_processed_metadata def _pre_process_metadata(metadata: defaultdict[str, Any]) -> dict[str, Any]: diff --git a/src/pymovements/gaze/io.py b/src/pymovements/gaze/io.py index 5fef8e391..231f79fc3 100644 --- a/src/pymovements/gaze/io.py +++ b/src/pymovements/gaze/io.py @@ -25,6 +25,7 @@ import polars as pl +from pymovements.events.frame import EventDataFrame from pymovements.gaze._utils.parsing import parse_eyelink from pymovements.gaze.experiment import Experiment from pymovements.gaze.gaze_dataframe import GazeDataFrame # pylint: disable=cyclic-import @@ -352,7 +353,7 @@ def from_asc( raise ValueError(f"unknown pattern key '{patterns}'. Supported keys are: eyelink") # Read data. - gaze_data, metadata = parse_eyelink( + gaze_data, event_data, metadata = parse_eyelink( file, patterns=patterns, schema=schema, @@ -438,10 +439,12 @@ def from_asc( + '\n'.join(f'- {issue}' for issue in issues), ) - # Create gaze data frame. + # Create gaze and event data frames. + event_df = EventDataFrame(event_data) gaze_df = GazeDataFrame( gaze_data, experiment=experiment, + events=event_df, trial_columns=trial_columns, time_column='time', time_unit='ms', diff --git a/src/pymovements/utils/parsing.py b/src/pymovements/utils/parsing.py index adb7114ba..5dca0d29a 100644 --- a/src/pymovements/utils/parsing.py +++ b/src/pymovements/utils/parsing.py @@ -71,7 +71,7 @@ def parse_eyelink( Warning If no metadata is found in the file. """ - gaze, metadata = _parse_eyelink( + gaze, _, metadata = _parse_eyelink( filepath=filepath, patterns=patterns, schema=schema, diff --git a/tests/unit/gaze/_utils/_parsing_test.py b/tests/unit/gaze/_utils/_parsing_test.py index 675f89998..bfc16896c 100644 --- a/tests/unit/gaze/_utils/_parsing_test.py +++ b/tests/unit/gaze/_utils/_parsing_test.py @@ -53,13 +53,14 @@ SAMPLES GAZE LEFT RATE 1000.00 TRACKING CR FILTER 2 INPUT the next line has all additional trial columns set to None START 10000000 RIGHT SAMPLES EVENTS +SFIX R 10000000 10000000 850.7 717.5 714.0 0.0 ... END 10000001 SAMPLES EVENTS RES 38.54 31.12 MSG 10000001 START_A START 10000002 RIGHT SAMPLES EVENTS the next line now should have the task column set to A 10000002 850.7 717.5 714.0 0.0 ... -END 10000002 SAMPLES EVENTS RES 38.54 31.12 +END 10000003 SAMPLES EVENTS RES 38.54 31.12 MSG 10000003 STOP_A the task should be set to None again START 10000004 RIGHT SAMPLES EVENTS @@ -75,11 +76,13 @@ the next line now should have the trial column set to 1 START 10000008 RIGHT SAMPLES EVENTS 10000008 850.7 717.5 714.0 0.0 ... +EFIX R 10000000 10000008 10 850.7 717.5 714.0 END 10000009 SAMPLES EVENTS RES 38.54 31.12 MSG 10000009 STOP_TRIAL_1 MSG 10000010 START_TRIAL_2 the next line now should have the trial column set to 2 START 10000011 RIGHT SAMPLES EVENTS +SSACC R 10000011 10000011 850.7 717.5 714.0 0.0 ... END 10000012 SAMPLES EVENTS RES 38.54 31.12 MSG 10000012 STOP_TRIAL_2 @@ -95,12 +98,13 @@ task and trial should be set to None again MSG 10000017 METADATA_3 START 10000017 RIGHT SAMPLES EVENTS -10000017 850.7 717.5 . 0.0 ... -SBLINK R 10000018 -10000019 . . 0.0 0.0 ... +10000017 850.7 717.5 714.0 0.0 ... +10000019 850.7 717.5 . 0.0 ... +SBLINK R 10000020 10000020 . . 0.0 0.0 ... -EBLINK R 10000018 10000020 2 10000021 . . 0.0 0.0 ... +EBLINK R 10000020 10000022 4 +ESACC R 10000011 10000022 13 850.7 717.5 850.7 717.5 19.00 590 END 10000022 SAMPLES EVENTS RES 38.54 31.12 """ @@ -136,20 +140,34 @@ {'pattern': r'METADATA_4', 'key': 'metadata_4', 'value': True}, ] -EXPECTED_DF = pl.from_dict( +EXPECTED_GAZE_DF = pl.from_dict( { 'time': [ 10000000.0, 10000002.0, 10000004.0, 10000006.0, 10000008.0, 10000011.0, 10000014.0, 10000017.0, 10000019.0, 10000020.0, 10000021.0, ], - 'x_pix': [850.7, 850.7, 850.7, 850.7, 850.7, 850.7, 850.7, 850.7, np.nan, np.nan, np.nan], - 'y_pix': [717.5, 717.5, 717.5, 717.5, 717.5, 717.5, 717.5, 717.5, np.nan, np.nan, np.nan], - 'pupil': [714.0, 714.0, 714.0, 714.0, 714.0, 714.0, 714.0, np.nan, 0.0, 0.0, 0.0], + 'x_pix': [ + 850.7, 850.7, 850.7, 850.7, 850.7, 850.7, 850.7, 850.7, 850.7, np.nan, np.nan, + ], + 'y_pix': [ + 717.5, 717.5, 717.5, 717.5, 717.5, 717.5, 717.5, 717.5, 717.5, np.nan, np.nan, + ], + 'pupil': [714.0, 714.0, 714.0, 714.0, 714.0, 714.0, 714.0, 714.0, np.nan, 0.0, 0.0], 'task': [None, 'A', None, 'B', 'B', 'B', 'B', None, None, None, None], 'trial_id': [None, None, None, None, '1', '2', '3', None, None, None, None], }, ) +EXPECTED_EVENT_DF = pl.from_dict( + { + 'name': ['fixation_eyelink', 'blink_eyelink', 'saccade_eyelink'], + 'onset': [10000000.0, 10000020.0, 10000011.0], + 'offset': [10000008.0, 10000022.0, 10000022.0], + 'task': [None, None, 'B'], + 'trial_id': [None, None, '2'], + }, +) + EXPECTED_METADATA = { 'weekday': 'Wed', 'month': 'Mar', @@ -169,15 +187,15 @@ 'calibrations': [], 'validations': [], 'resolution': (1280, 1024), - 'data_loss_ratio_blinks': 0.18181818181818182, - 'data_loss_ratio': 0.2727272727272727, - 'total_recording_duration_ms': 11, + 'data_loss_ratio_blinks': 0.16666666666666666, + 'data_loss_ratio': 0.25, + 'total_recording_duration_ms': 12.0, 'datetime': datetime.datetime(2023, 3, 8, 9, 25, 20), 'blinks': [{ - 'duration_ms': 2, + 'duration_ms': 4.0, 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, + 'start_timestamp': 10000020.0, + 'stop_timestamp': 10000022.0, }], 'mount_configuration': { 'mount_type': 'Desktop', @@ -196,13 +214,14 @@ def test_parse_eyelink(tmp_path): filepath = tmp_path / 'sub.asc' filepath.write_text(ASC_TEXT) - df, metadata = pm.gaze._utils.parsing.parse_eyelink( + gaze_df, event_df, metadata = pm.gaze._utils.parsing.parse_eyelink( filepath, patterns=PATTERNS, metadata_patterns=METADATA_PATTERNS, ) - assert_frame_equal(df, EXPECTED_DF, check_column_order=False) + assert_frame_equal(gaze_df, EXPECTED_GAZE_DF, check_column_order=False) + assert_frame_equal(event_df, EXPECTED_EVENT_DF, check_column_order=False) assert metadata == EXPECTED_METADATA @@ -236,7 +255,7 @@ def test_parse_eyelink(tmp_path): ], ) def test_from_asc_metadata_patterns(kwargs, expected_metadata): - _, metadata = pm.gaze._utils.parsing.parse_eyelink(**kwargs) + _, _, metadata = pm.gaze._utils.parsing.parse_eyelink(**kwargs) for key, value in expected_metadata.items(): assert metadata[key] == value @@ -344,7 +363,7 @@ def test_parse_eyelink_version(tmp_path, metadata, expected_version, expected_mo filepath = tmp_path / 'sub.asc' filepath.write_text(metadata) - _, metadata = pm.gaze._utils.parsing.parse_eyelink( + _, _, metadata = pm.gaze._utils.parsing.parse_eyelink( filepath, ) @@ -367,7 +386,7 @@ def test_no_metadata_warning(tmp_path, metadata, expected_msg): filepath.write_text(metadata) with pytest.raises(Warning) as info: - _, metadata = pm.gaze._utils.parsing.parse_eyelink( + _, _, metadata = pm.gaze._utils.parsing.parse_eyelink( filepath, ) @@ -429,7 +448,7 @@ def test_val_cal_eyelink(tmp_path, metadata, expected_validation, expected_calib filepath = tmp_path / 'sub.asc' filepath.write_text(metadata) - _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) + _, _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) assert parsed_metadata['calibrations'] == expected_calibration assert parsed_metadata['validations'] == expected_validation @@ -438,7 +457,7 @@ def test_val_cal_eyelink(tmp_path, metadata, expected_validation, expected_calib def test_parse_val_cal_eyelink_monocular_file(): example_asc_monocular_path = Path('tests/files/eyelink_monocular_example.asc') - _, metadata = pm.gaze._utils.parsing.parse_eyelink(example_asc_monocular_path) + _, _, metadata = pm.gaze._utils.parsing.parse_eyelink(example_asc_monocular_path) expected_validation = [{ 'error': 'GOOD ERROR', @@ -467,10 +486,10 @@ def test_parse_val_cal_eyelink_monocular_file(): '10000020 . . 0.0 0.0 ...\n' 'EBLINK R 10000018 10000020 2\n', [{ - 'duration_ms': 2, + 'duration_ms': 2.0, 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, + 'start_timestamp': 10000018.0, + 'stop_timestamp': 10000020.0, }], id='blink', ), @@ -489,16 +508,16 @@ def test_parse_val_cal_eyelink_monocular_file(): 'EBLINK R 10000021 10000024 4\n', [ { - 'duration_ms': 2, + 'duration_ms': 2.0, 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, + 'start_timestamp': 10000018.0, + 'stop_timestamp': 10000020.0, }, { - 'duration_ms': 4, + 'duration_ms': 4.0, 'num_samples': 4, - 'start_timestamp': 10000021, - 'stop_timestamp': 10000024, + 'start_timestamp': 10000021.0, + 'stop_timestamp': 10000024.0, }, ], id='multiple_blinks', @@ -518,16 +537,16 @@ def test_parse_val_cal_eyelink_monocular_file(): 'EBLINK R 10000021 10000024 4\n', [ { - 'duration_ms': 2, + 'duration_ms': 2.0, 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, + 'start_timestamp': 10000018.0, + 'stop_timestamp': 10000020.0, }, { - 'duration_ms': 4, + 'duration_ms': 4.0, 'num_samples': 4, - 'start_timestamp': 10000021, - 'stop_timestamp': 10000024, + 'start_timestamp': 10000021.0, + 'stop_timestamp': 10000024.0, }, ], id='multiple_blinks_no_dummy', @@ -539,10 +558,10 @@ def test_parse_val_cal_eyelink_monocular_file(): '10000020 . . 0.0 0.0 ...\n' 'EBLINK R 10000018 10000020 2\n', [{ - 'duration_ms': 2, + 'duration_ms': 2.0, 'num_samples': 2, - 'start_timestamp': 10000018, - 'stop_timestamp': 10000020, + 'start_timestamp': 10000018.0, + 'stop_timestamp': 10000020.0, }], id='blinks_no_sampling_rate', ), @@ -552,7 +571,7 @@ def test_parse_eyelink_blinks(tmp_path, metadata, expected_blinks): filepath = tmp_path / 'sub.asc' filepath.write_text(metadata) - _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) + _, _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) assert parsed_metadata['blinks'] == expected_blinks @@ -690,7 +709,7 @@ def test_parse_eyelink_data_loss_ratio( filepath = tmp_path / 'sub.asc' filepath.write_text(metadata) - _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) + _, _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) assert parsed_metadata['data_loss_ratio_blinks'] == expected_blink_ratio assert parsed_metadata['data_loss_ratio'] == expected_overall_ratio @@ -703,7 +722,7 @@ def test_parse_eyelink_datetime(tmp_path): filepath = tmp_path / 'sub.asc' filepath.write_text(metadata) - _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) + _, _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) assert parsed_metadata['datetime'] == expected_datetime @@ -885,7 +904,7 @@ def test_parse_eyelink_mount_config(tmp_path, metadata, expected_mount_config): filepath = tmp_path / 'sub.asc' filepath.write_text(metadata) - _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) + _, _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink(filepath) assert parsed_metadata['mount_configuration'] == expected_mount_config @@ -911,7 +930,7 @@ def test_parse_eyelink_encoding(tmp_path, bytestring, encoding, expected_text): filepath = tmp_path / 'sub.asc' filepath.write_bytes(bytestring) - _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink( + _, _, parsed_metadata = pm.gaze._utils.parsing.parse_eyelink( filepath, metadata_patterns=[r'(?P.+)'], encoding=encoding, diff --git a/tests/unit/utils/parsing_test.py b/tests/unit/utils/parsing_test.py index 04f2aa830..4fa34dd21 100644 --- a/tests/unit/utils/parsing_test.py +++ b/tests/unit/utils/parsing_test.py @@ -30,7 +30,7 @@ def test_parse_eyelink_equal_gaze(): filepath = 'tests/files/eyelink_monocular_example.asc' - gaze, _ = pm.gaze._utils.parsing.parse_eyelink(filepath) + gaze, _, _ = pm.gaze._utils.parsing.parse_eyelink(filepath) gaze_depr, _ = pm.utils.parsing.parse_eyelink(filepath) assert_frame_equal(gaze, gaze_depr)