Synced with main
Detect emoji translation requests --5d73e00664Catch cant detect errors in lang detection --97775d1ca4
This commit is contained in:
parent
d6480bf880
commit
f7afb0d104
2 changed files with 72 additions and 28 deletions
|
|
@ -37,6 +37,19 @@ from libretranslate.locales import (
|
|||
from .api_keys import Database, RemoteDatabase
|
||||
from .suggestions import Database as SuggestionsDatabase
|
||||
|
||||
# Rough map of emoji characters
|
||||
emojis = {e: True for e in \
|
||||
[ord(' ')] + # Spaces
|
||||
list(range(0x1F600,0x1F64F)) + # Emoticons
|
||||
list(range(0x1F300,0x1F5FF)) + # Misc Symbols and Pictographs
|
||||
list(range(0x1F680,0x1F6FF)) + # Transport and Map
|
||||
list(range(0x2600,0x26FF)) + # Misc symbols
|
||||
list(range(0x2700,0x27BF)) + # Dingbats
|
||||
list(range(0xFE00,0xFE0F)) + # Variation Selectors
|
||||
list(range(0x1F900,0x1F9FF)) + # Supplemental Symbols and Pictographs
|
||||
list(range(0x1F1E6,0x1F1FF)) + # Flags
|
||||
list(range(0x20D0,0x20FF)) # Combining Diacritical Marks for Symbols
|
||||
}
|
||||
|
||||
def get_version():
|
||||
try:
|
||||
|
|
@ -153,6 +166,19 @@ def filter_unique(seq, extra):
|
|||
seen_add = seen.add
|
||||
return [x for x in seq if not (x in seen or seen_add(x))]
|
||||
|
||||
|
||||
def detect_translatable(src_texts):
|
||||
if isinstance(src_texts, list):
|
||||
return any(detect_translatable(t) for t in src_texts)
|
||||
|
||||
for ch in src_texts:
|
||||
if not (ord(ch) in emojis):
|
||||
return True
|
||||
|
||||
# All emojis
|
||||
return False
|
||||
|
||||
|
||||
def create_app(args):
|
||||
from libretranslate.init import boot
|
||||
|
||||
|
|
@ -395,7 +421,7 @@ def create_app(args):
|
|||
web_version=os.environ.get("LT_WEB") is not None,
|
||||
version=get_version(),
|
||||
swagger_url=swagger_url,
|
||||
available_locales=sorted[{'code': l['code'], 'name': _lazy(l['name'])} for l in get_available_locales(not args.debug)], key=lambda s: s['name']),
|
||||
available_locales=sorted([{'code': l['code'], 'name': _lazy(l['name'])} for l in get_available_locales(not args.debug)], key=lambda s: s['name']),
|
||||
current_locale=get_locale(),
|
||||
alternate_locales=get_alternate_locale_links()
|
||||
))
|
||||
|
|
@ -647,13 +673,17 @@ def create_app(args):
|
|||
|
||||
if batch:
|
||||
request.req_cost = max(1, len(q))
|
||||
|
||||
if source_lang == "auto":
|
||||
candidate_langs = detect_languages(src_texts)
|
||||
detected_src_lang = candidate_langs[0]
|
||||
|
||||
translatable = detect_translatable(src_texts)
|
||||
if translatable:
|
||||
if source_lang == "auto":
|
||||
candidate_langs = detect_languages(src_texts)
|
||||
detected_src_lang = candidate_langs[0]
|
||||
else:
|
||||
detected_src_lang = {"confidence": 100.0, "language": source_lang}
|
||||
else:
|
||||
detected_src_lang = {"confidence": 100.0, "language": source_lang}
|
||||
|
||||
detected_src_lang = {"confidence": 0.0, "language": "en"}
|
||||
|
||||
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
|
||||
|
||||
if src_lang is None:
|
||||
|
|
@ -679,14 +709,18 @@ def create_app(args):
|
|||
if translator is None:
|
||||
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
||||
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, text)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
if translatable:
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, text)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
else:
|
||||
hypotheses = translator.hypotheses(text, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
else:
|
||||
hypotheses = translator.hypotheses(text, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
|
||||
translated_text = text # Cannot translate, send the original text back
|
||||
alternatives = []
|
||||
|
||||
batch_results.append(translated_text)
|
||||
batch_alternatives.append(alternatives)
|
||||
|
||||
|
|
@ -703,14 +737,18 @@ def create_app(args):
|
|||
if translator is None:
|
||||
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
||||
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, q)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
if translatable:
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, q)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
else:
|
||||
hypotheses = translator.hypotheses(q, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
else:
|
||||
hypotheses = translator.hypotheses(q, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
|
||||
translated_text = q # Cannot translate, send the original text back
|
||||
alternatives = []
|
||||
|
||||
result = {"translatedText": translated_text}
|
||||
|
||||
if source_lang == "auto":
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ from langdetect import DetectorFactory
|
|||
|
||||
DetectorFactory.seed = 0
|
||||
|
||||
from langdetect import detect_langs
|
||||
from langdetect import detect_langs, LangDetectException
|
||||
from langdetect.lang_detect_exception import ErrorCode
|
||||
from lexilang.detector import detect as lldetect
|
||||
|
||||
|
||||
|
|
@ -35,11 +36,16 @@ class Detector:
|
|||
if conf > 0:
|
||||
return [Language(code, round(conf * 100))]
|
||||
|
||||
top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3]
|
||||
if not len(top_3_choices):
|
||||
return [Language("en", 0)]
|
||||
if top_3_choices[0].prob == 0:
|
||||
return [Language("en", 0)]
|
||||
try:
|
||||
top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3]
|
||||
if not len(top_3_choices):
|
||||
return [Language("en", 0)]
|
||||
if top_3_choices[0].prob == 0:
|
||||
return [Language("en", 0)]
|
||||
except LangDetectException as e:
|
||||
if e.code == ErrorCode.CantDetectError:
|
||||
return [Language("en", 0)]
|
||||
else:
|
||||
raise e
|
||||
|
||||
return [Language(normalized_lang_code(lang), round(lang.prob * 100)) for lang in top_3_choices]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue