From a9a4f9ce909b1830601b6878b42049f7fcc4eb28 Mon Sep 17 00:00:00 2001 From: Hicham Abderrahim <76810468+HichamAbderrahim@users.noreply.github.com> Date: Mon, 14 Apr 2025 21:41:34 +0100 Subject: [PATCH] Update commonvoice.py Add encoding="utf-8" to work with all the languages --- src/torchaudio/datasets/commonvoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchaudio/datasets/commonvoice.py b/src/torchaudio/datasets/commonvoice.py index db0e035c61..e0e8dfd47a 100644 --- a/src/torchaudio/datasets/commonvoice.py +++ b/src/torchaudio/datasets/commonvoice.py @@ -49,7 +49,7 @@ def __init__(self, root: Union[str, Path], tsv: str = "train.tsv") -> None: self._path = os.fspath(root) self._tsv = os.path.join(self._path, tsv) - with open(self._tsv, "r") as tsv_: + with open(self._tsv, "r", encoding="utf-8") as tsv_: walker = csv.reader(tsv_, delimiter="\t") self._header = next(walker) self._walker = list(walker)