Скип недекодируемых символов без изменений

This commit is contained in:
2025-04-24 15:36:00 +02:00
parent a7a3c197a1
commit ff5b5bb44e

View File

@@ -2,7 +2,14 @@
def fix_encoding(text): def fix_encoding(text):
fixed_text = text.encode("latin1").decode("windows-1251") result = []
for ch in text:
try:
decoded = ch.encode("latin1").decode("windows-1251")
result.append(decoded)
except (UnicodeEncodeError, UnicodeDecodeError):
result.append(ch)
fixed_text = "".join(result)
fixed_text = fixed_text.replace("ј", "ё") fixed_text = fixed_text.replace("ј", "ё")
fixed_text = fixed_text.replace("ѕ", "<<") fixed_text = fixed_text.replace("ѕ", "<<")
fixed_text = fixed_text.replace("ї", ">>") fixed_text = fixed_text.replace("ї", ">>")