You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/usr/local/bin/source/huggingsound/examples/speech_recognition/finetune.py", line 71, in <module>
model.finetune(
File "/usr/local/lib/python3.10/dist-packages/huggingsound/speech_recognition/model.py", line 353, in finetune
train_dataset = self._get_dataset(processor, text_normalizer, train_data, train_data_cache_dir, training_args.length_column_name, num_workers)
File "/usr/local/lib/python3.10/dist-packages/huggingsound/speech_recognition/model.py", line 272, in _get_dataset
dataset = self._prepare_dataset_for_finetuning(dataset, processor, text_normalizer, length_column_name, num_workers)
File "/usr/local/lib/python3.10/dist-packages/huggingsound/speech_recognition/model.py", line 251, in _prepare_dataset_for_finetuning
dataset = dataset.map(
File "/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py", line 580, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py", line 545, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py", line 3087, in map
for rank, done, content in Dataset._map_single(**dataset_kwargs):
File "/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py", line 3441, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
File "/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py", line 3344, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
File "/usr/local/lib/python3.10/dist-packages/huggingsound/speech_recognition/model.py", line 242, in __process_dataset_sample
transcription = text_normalizer(sample["transcription"]) + " "
File "/usr/local/lib/python3.10/dist-packages/datasets/formatting/formatting.py", line 270, in __getitem__
value = self.data[key]
KeyError: 'transcription'
fix error in line 242: transcription = text_normalizer(sample["transcription"]) + " "
change to transcription = text_normalizer(sample["sentence"]) + " "
The text was updated successfully, but these errors were encountered:
fix error in line 242:
transcription = text_normalizer(sample["transcription"]) + " "
change to
transcription = text_normalizer(sample["sentence"]) + " "
The text was updated successfully, but these errors were encountered: