import abc
import string
[docs]
class LanguageClassifier(abc.ABC):
"""Abstract class for implementing different language classifiers."""
[docs]
REMOVE_CHARACTERS = string.punctuation + "„"
@abc.abstractmethod
[docs]
def classify(self, text: str) -> tuple[str, float]:
"""Classify a text string.
Args:
text: The text to classify.
Returns:
A tuple with the language and the confidence.
"""
return NotImplemented
@staticmethod
[docs]
def preprocess(text: str) -> str:
for c in LanguageClassifier.REMOVE_CHARACTERS:
text = text.replace(c, " ") # noqa: self-cls-assignment
return text.strip()