Source code for orichain.lang_detect

from typing import List, Optional, Dict
from orichain import error_explainer

import asyncio

VERSION = "2.1.0"


[docs] class LanguageDetection(object): """ Synchronous interface for detecting the language of user messages using lingua-language-detector library. """
[docs] def __init__( self, languages: Optional[List[str]] = None, min_words: Optional[int] = None, low_accuracy: Optional[bool] = False, ) -> None: """Loading detector with requirements, by default loads all the languages with 0.0 min confidence Args: - languages (Optional[List], optional): List of languages to load. Defaults to None. - min_words (Optional[int], optional): Minimum words in the user message to detect language. Defaults to None. - low_accuracy (Optional[bool], optional): To enable low accuracy mode. Defaults to False. """ try: from lingua import Language, LanguageDetectorBuilder except ImportError: install = ( input( "lingua-language-detector is not installed. Do you want to install it now? (y/n): " ) .strip() .lower() ) if install == "y" or install == "yes": import subprocess subprocess.run( ["pip", "install", f"lingua-language-detector=={VERSION}"], check=True, ) else: raise ImportError( f"lingua-language-detector is required for LanguageDetection class. Please install it manually using `pip install orichain[lingua-language-detector]' or 'pip install lingua-language-detector=={VERSION}`." ) try: # Loading detector with requirements if languages: # Loading detector with specific languages language_objects = [ getattr(Language, lang.upper()) for lang in languages ] detector = LanguageDetectorBuilder.from_languages(*language_objects) else: # Loading detector with all languages detector = LanguageDetectorBuilder.from_all_languages() # Enabling low accuracy mode if low_accuracy: detector = detector.with_low_accuracy_mode() # Building the detector self.detector = detector.with_preloaded_language_models().build() self.min_words = min_words except Exception as e: error_explainer(e)
[docs] def __call__( self, user_message: str, min_words: Optional[int] = None, add_confidence: Optional[bool] = False, iso_code_639_3: Optional[bool] = False, ) -> Dict: """Runs language detection Args: - user_message (str): User message to detect language - min_words (Optional[int], optional): Minimum words in the user message to detect language. Defaults to None. - add_confidence (Optional[bool], optional): To add confidence in the result. Defaults to False. - iso_code_639_3 (Optional[bool], optional): To get iso code 639-3 instead of 639-1. Defaults to False. Returns: Dict: Result of language detection """ try: result = {"user_lang": None} min_words = min_words or self.min_words if min_words: if len(user_message.split()) < min_words: return result output = self.detector.compute_language_confidence_values(text=user_message) result["user_lang"] = ( output[0].language.iso_code_639_1.name if not iso_code_639_3 else output[0].language.iso_code_639_1.name ) if add_confidence: result["confidence"] = output[0].value return result except Exception as e: error_explainer(e) return {"error": 500, "reason": str(e)}
[docs] class AsyncLanguageDetection(object): """ Asynchronous interface for detecting the language of user messages using lingua-language-detector library. """
[docs] def __init__( self, languages: Optional[List[str]] = None, min_words: Optional[int] = None, low_accuracy: Optional[bool] = False, ) -> None: """Loading detector with requirements, by default loads all the languages with 0.0 min confidence Args: - languages (Optional[List], optional): List of languages to load. Defaults to None. - min_words (Optional[int], optional): Minimum words in the user message to detect language. Defaults to None. - low_accuracy (Optional[bool], optional): To enable low accuracy mode. Defaults to False. """ try: from lingua import Language, LanguageDetectorBuilder except ImportError: install = ( input( "lingua-language-detector is not installed. Do you want to install it now? (y/n): " ) .strip() .lower() ) if install == "y" or install == "yes": import subprocess subprocess.run( ["pip", "install", f"lingua-language-detector=={VERSION}"], check=True, ) else: raise ImportError( f"lingua-language-detector is required for AsyncLanguageDetection class. Please install it manually using `pip install orichain[lingua-language-detector]' or 'pip install lingua-language-detector=={VERSION}`." ) try: if languages: language_objects = [ getattr(Language, lang.upper()) for lang in languages ] detector = LanguageDetectorBuilder.from_languages(*language_objects) else: detector = LanguageDetectorBuilder.from_all_languages() if low_accuracy: detector = detector.with_low_accuracy_mode() self.detector = detector.with_preloaded_language_models().build() self.min_words = min_words except Exception as e: error_explainer(e)
[docs] async def __call__( self, user_message: str, min_words: Optional[int] = None, add_confidence: Optional[bool] = False, iso_code_639_3: Optional[bool] = False, ) -> Dict: """Runs language detection Args: - user_message (str): User message to detect language - min_words (Optional[int], optional): Minimum words in the user message to detect language. Defaults to None. - add_confidence (Optional[bool], optional): To add confidence in the result. Defaults to False. - iso_code_639_3 (Optional[bool], optional): To get iso code 639-3 instead of 639-1. Defaults to False. Returns: Dict: Result of language detection """ try: result = {"user_lang": None} min_words = min_words or self.min_words if min_words: if len(user_message.split()) < min_words: return result output = await asyncio.to_thread( self.detector.compute_language_confidence_values, text=user_message ) result["user_lang"] = ( output[0].language.iso_code_639_1.name if not iso_code_639_3 else output[0].language.iso_code_639_1.name ) if add_confidence: result["confidence"] = output[0].value return result except Exception as e: error_explainer(e) return {"error": 500, "reason": str(e)}