Skip to content

Commit

Permalink
fix: don't remove whitespace in european lagnguaes
Browse files Browse the repository at this point in the history
  • Loading branch information
hgiesel committed Feb 22, 2024
1 parent 2218ac4 commit 7bb3dae
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions src/card_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ def process_audio_asset(audio: AudioAsset):

return f"[sound:{name}]"

REMOVE_RE = re.compile(r"( +|\[(?!sound:).*?\])(?![^{]*})")
REMOVE_RE_EURO = re.compile(r"(\[(?!sound:).*?\])(?![^{]*})")
REMOVE_RE_CJK = re.compile(r"( +|\[(?!sound:).*?\])(?![^{]*})")

def remove_syntax(text: str, has_cjk: bool):
return REMOVE_RE_CJK.sub("", text) if has_cjk else REMOVE_RE_EURO.sub("", text)

def card_fields_from_dict(data: dict[str, any]):
br = "\n<br>\n"
Expand All @@ -91,10 +95,13 @@ def card_fields_from_dict(data: dict[str, any]):
imagess = br.join(images)

targetWord = data.get("targetWord", "")
targetWordNoSyntax = REMOVE_RE.sub("", targetWord)
cjk_found = len(re.findall(r'[\u2e80-\u9fff\uac00-\ud7ff]', targetWord)) > 0
print('foo', cjk_found, targetWord)

targetWordNoSyntax = remove_syntax(targetWord, cjk_found)

sentence = data.get("sentence", "")
sentenceNoSyntax = REMOVE_RE.sub("", sentence)
sentenceNoSyntax = remove_syntax(sentence, cjk_found)

return CardFields(
targetWord=targetWord,
Expand Down

0 comments on commit 7bb3dae

Please sign in to comment.