Synced with main

Detect emoji translation requests -- 5d73e00664

Catch cant detect errors in lang detection -- 97775d1ca4
This commit is contained in:
vdbhb59 2025-03-21 10:54:26 +05:30
commit f7afb0d104
2 changed files with 72 additions and 28 deletions

View file

@ -37,6 +37,19 @@ from libretranslate.locales import (
from .api_keys import Database, RemoteDatabase from .api_keys import Database, RemoteDatabase
from .suggestions import Database as SuggestionsDatabase from .suggestions import Database as SuggestionsDatabase
# Rough map of emoji characters
emojis = {e: True for e in \
[ord(' ')] + # Spaces
list(range(0x1F600,0x1F64F)) + # Emoticons
list(range(0x1F300,0x1F5FF)) + # Misc Symbols and Pictographs
list(range(0x1F680,0x1F6FF)) + # Transport and Map
list(range(0x2600,0x26FF)) + # Misc symbols
list(range(0x2700,0x27BF)) + # Dingbats
list(range(0xFE00,0xFE0F)) + # Variation Selectors
list(range(0x1F900,0x1F9FF)) + # Supplemental Symbols and Pictographs
list(range(0x1F1E6,0x1F1FF)) + # Flags
list(range(0x20D0,0x20FF)) # Combining Diacritical Marks for Symbols
}
def get_version(): def get_version():
try: try:
@ -153,6 +166,19 @@ def filter_unique(seq, extra):
seen_add = seen.add seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))] return [x for x in seq if not (x in seen or seen_add(x))]
def detect_translatable(src_texts):
if isinstance(src_texts, list):
return any(detect_translatable(t) for t in src_texts)
for ch in src_texts:
if not (ord(ch) in emojis):
return True
# All emojis
return False
def create_app(args): def create_app(args):
from libretranslate.init import boot from libretranslate.init import boot
@ -395,7 +421,7 @@ def create_app(args):
web_version=os.environ.get("LT_WEB") is not None, web_version=os.environ.get("LT_WEB") is not None,
version=get_version(), version=get_version(),
swagger_url=swagger_url, swagger_url=swagger_url,
available_locales=sorted[{'code': l['code'], 'name': _lazy(l['name'])} for l in get_available_locales(not args.debug)], key=lambda s: s['name']), available_locales=sorted([{'code': l['code'], 'name': _lazy(l['name'])} for l in get_available_locales(not args.debug)], key=lambda s: s['name']),
current_locale=get_locale(), current_locale=get_locale(),
alternate_locales=get_alternate_locale_links() alternate_locales=get_alternate_locale_links()
)) ))
@ -647,13 +673,17 @@ def create_app(args):
if batch: if batch:
request.req_cost = max(1, len(q)) request.req_cost = max(1, len(q))
if source_lang == "auto": translatable = detect_translatable(src_texts)
candidate_langs = detect_languages(src_texts) if translatable:
detected_src_lang = candidate_langs[0] if source_lang == "auto":
candidate_langs = detect_languages(src_texts)
detected_src_lang = candidate_langs[0]
else:
detected_src_lang = {"confidence": 100.0, "language": source_lang}
else: else:
detected_src_lang = {"confidence": 100.0, "language": source_lang} detected_src_lang = {"confidence": 0.0, "language": "en"}
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None) src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
if src_lang is None: if src_lang is None:
@ -679,14 +709,18 @@ def create_app(args):
if translator is None: if translator is None:
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
if text_format == "html": if translatable:
translated_text = unescape(str(translate_html(translator, text))) if text_format == "html":
alternatives = [] # Not supported for html yet translated_text = unescape(str(translate_html(translator, text)))
alternatives = [] # Not supported for html yet
else:
hypotheses = translator.hypotheses(text, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
else: else:
hypotheses = translator.hypotheses(text, num_alternatives + 1) translated_text = text # Cannot translate, send the original text back
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value)) alternatives = []
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
batch_results.append(translated_text) batch_results.append(translated_text)
batch_alternatives.append(alternatives) batch_alternatives.append(alternatives)
@ -703,14 +737,18 @@ def create_app(args):
if translator is None: if translator is None:
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
if text_format == "html": if translatable:
translated_text = unescape(str(translate_html(translator, q))) if text_format == "html":
alternatives = [] # Not supported for html yet translated_text = unescape(str(translate_html(translator, q)))
alternatives = [] # Not supported for html yet
else:
hypotheses = translator.hypotheses(q, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
else: else:
hypotheses = translator.hypotheses(q, num_alternatives + 1) translated_text = q # Cannot translate, send the original text back
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value)) alternatives = []
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
result = {"translatedText": translated_text} result = {"translatedText": translated_text}
if source_lang == "auto": if source_lang == "auto":

View file

@ -3,7 +3,8 @@ from langdetect import DetectorFactory
DetectorFactory.seed = 0 DetectorFactory.seed = 0
from langdetect import detect_langs from langdetect import detect_langs, LangDetectException
from langdetect.lang_detect_exception import ErrorCode
from lexilang.detector import detect as lldetect from lexilang.detector import detect as lldetect
@ -35,11 +36,16 @@ class Detector:
if conf > 0: if conf > 0:
return [Language(code, round(conf * 100))] return [Language(code, round(conf * 100))]
top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3] try:
if not len(top_3_choices): top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3]
return [Language("en", 0)] if not len(top_3_choices):
if top_3_choices[0].prob == 0: return [Language("en", 0)]
return [Language("en", 0)] if top_3_choices[0].prob == 0:
return [Language("en", 0)]
except LangDetectException as e:
if e.code == ErrorCode.CantDetectError:
return [Language("en", 0)]
else:
raise e
return [Language(normalized_lang_code(lang), round(lang.prob * 100)) for lang in top_3_choices] return [Language(normalized_lang_code(lang), round(lang.prob * 100)) for lang in top_3_choices]