Source code for text_quality.language.classifier

import abc
import string


[docs] class LanguageClassifier(abc.ABC): """Abstract class for implementing different language classifiers."""
[docs] REMOVE_CHARACTERS = string.punctuation + "„"
@abc.abstractmethod
[docs] def classify(self, text: str) -> tuple[str, float]: """Classify a text string. Args: text: The text to classify. Returns: A tuple with the language and the confidence. """ return NotImplemented
@staticmethod
[docs] def preprocess(text: str) -> str: for c in LanguageClassifier.REMOVE_CHARACTERS: text = text.replace(c, " ") # noqa: self-cls-assignment return text.strip()