text_quality
latest
Contents:
API Reference
text_quality
Index
Edit on GitHub
Index
_
|
A
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
L
|
M
|
N
|
P
|
Q
|
R
|
S
|
T
_
__author__ (in module text_quality)
__email__ (in module text_quality)
__version__ (in module text_quality)
_aggregate_lines() (text_quality.language.fasttext.FastTextLanguageClassifier static method)
_classify_pagexml() (text_quality.classifier.pipeline.Pipeline method)
_classify_pagexml_with_scores() (text_quality.classifier.pipeline.Pipeline method)
_DEFAULT_MODEL_PATH (text_quality.language.fasttext.FastTextLanguageClassifier attribute)
_download_model() (text_quality.language.fasttext.FastTextLanguageClassifier static method)
_get_ngram_score() (text_quality.feature.scorer.q_gram.QGram method)
_get_ngram_scores() (text_quality.feature.scorer.q_gram.QGram method)
_get_qgrams() (text_quality.feature.scorer.q_gram.QGram static method)
_HYPHENS (text_quality.feature.tokenizer.NautilusOcrTokenizer attribute)
_is_short() (text_quality.classifier.pipeline.Pipeline static method)
_LABEL_PREFIX (text_quality.language.fasttext.FastTextLanguageClassifier attribute)
_lookup() (text_quality.feature.scorer.dictionary.Dictionary method)
(text_quality.feature.scorer.dictionary.HunspellDictionary method)
(text_quality.feature.scorer.dictionary.TokenDictionary method)
_VOWELS (text_quality.feature.scorer.garbage.GarbageDetector attribute)
A
as_dataframe() (text_quality.feature.featurize.Featurizer static method)
C
CLASSIFIER (text_quality.classifier.pipeline.Reason attribute)
CLASSIFIER_DIR (in module text_quality.settings)
ClassifierScores (in module text_quality.classifier.pipeline)
classify() (text_quality.classifier.pipeline.Pipeline method)
(text_quality.language.classifier.LanguageClassifier method)
(text_quality.language.fasttext.FastTextLanguageClassifier method)
classify_with_scores() (text_quality.classifier.pipeline.Pipeline method)
D
DATA_DIR (in module text_quality.settings)
DEFAULT_LANGUAGE (in module text_quality.settings)
default_scores_dict() (in module text_quality.classifier.pipeline)
dict_score (text_quality.feature.featurize.Scorers attribute)
dict_score_gt (text_quality.feature.featurize.Scorers attribute)
Dictionary (class in text_quality.feature.scorer.dictionary)
DICTS_DIR (in module text_quality.settings)
E
EMPTY (text_quality.classifier.pipeline.Reason attribute)
EMPTY_PAGE_OUTPUT (in module text_quality.settings)
ENCODING (in module text_quality.settings)
EPR_RULE1 (text_quality.feature.scorer.garbage.GarbageDetector attribute)
EPR_RULE2 (text_quality.feature.scorer.garbage.GarbageDetector attribute)
EPR_RULE3 (text_quality.feature.scorer.garbage.GarbageDetector attribute)
EPR_RULE4 (text_quality.feature.scorer.garbage.GarbageDetector attribute)
EPR_RULE5 (text_quality.feature.scorer.garbage.GarbageDetector attribute)
EPR_RULE9 (text_quality.feature.scorer.garbage.GarbageDetector attribute)
F
FastTextLanguageClassifier (class in text_quality.language.fasttext)
features (text_quality.classifier.pipeline.Pipeline property)
(text_quality.feature.featurize.Featurizer property)
featurize() (text_quality.feature.featurize.Featurizer method)
featurize_as_dataframe() (text_quality.feature.featurize.Featurizer method)
Featurizer (class in text_quality.feature.featurize)
from_file() (text_quality.classifier.pipeline.Pipeline class method)
(text_quality.feature.scorer.dictionary.TokenDictionary class method)
(text_quality.feature.scorer.q_gram.QGram class method)
(text_quality.page.page.Page class method)
from_path() (text_quality.feature.scorer.dictionary.HunspellDictionary class method)
G
garbage_score (text_quality.feature.featurize.Scorers attribute)
GarbageDetector (class in text_quality.feature.scorer.garbage)
get_rank() (text_quality.feature.scorer.q_gram.QGram method)
get_text() (text_quality.page.page.Page method)
H
HUNSPELL_DIR (in module text_quality.settings)
HUNSPELL_LANGUAGE (in module text_quality.settings)
HunspellDictionary (class in text_quality.feature.scorer.dictionary)
I
id (text_quality.page.page.Page property)
L
LANGUAGE (text_quality.classifier.pipeline.Reason attribute)
LanguageClassifier (class in text_quality.language.classifier)
LINE_SEPARATOR (in module text_quality.settings)
lines() (text_quality.page.page.Page method)
LOG_LEVEL (in module text_quality.settings)
M
MINIMUM_PAGE_LENGTH (in module text_quality.settings)
MODEL_URLS (text_quality.language.fasttext.FastTextLanguageClassifier attribute)
module
text_quality
text_quality.classifier
text_quality.classifier.pipeline
text_quality.feature
text_quality.feature.featurize
text_quality.feature.scorer
text_quality.feature.scorer.dictionary
text_quality.feature.scorer.garbage
text_quality.feature.scorer.q_gram
text_quality.feature.scorer.scorer
text_quality.feature.tokenizer
text_quality.language
text_quality.language.classifier
text_quality.language.fasttext
text_quality.page
text_quality.page.page
text_quality.settings
N
n_gram_score (text_quality.feature.featurize.Scorers attribute)
NautilusOcrTokenizer (class in text_quality.feature.tokenizer)
P
Page (class in text_quality.page.page)
Pipeline (class in text_quality.classifier.pipeline)
PIPELINE_FILE (in module text_quality.settings)
preprocess() (text_quality.language.classifier.LanguageClassifier static method)
Q
Q_GRAM_LENGTH (in module text_quality.settings)
Q_GRAMS_GAMMA (in module text_quality.settings)
QGram (class in text_quality.feature.scorer.q_gram)
QGRAMS_DIR (in module text_quality.settings)
QGRAMS_FILE (in module text_quality.settings)
R
Reason (class in text_quality.classifier.pipeline)
REMOVE_CHARACTERS (text_quality.language.classifier.LanguageClassifier attribute)
S
score() (text_quality.feature.scorer.dictionary.Dictionary method)
(text_quality.feature.scorer.garbage.GarbageDetector method)
(text_quality.feature.scorer.q_gram.QGram method)
(text_quality.feature.scorer.scorer.Scorer method)
Scorer (class in text_quality.feature.scorer.scorer)
Scorers (class in text_quality.feature.featurize)
SHORT_COLUMN_WIDTH (in module text_quality.settings)
SHORT_COLUMNS (text_quality.classifier.pipeline.Reason attribute)
SOURCE_DIR (in module text_quality.settings)
T
text_quality
module
text_quality.classifier
module
text_quality.classifier.pipeline
module
text_quality.feature
module
text_quality.feature.featurize
module
text_quality.feature.scorer
module
text_quality.feature.scorer.dictionary
module
text_quality.feature.scorer.garbage
module
text_quality.feature.scorer.q_gram
module
text_quality.feature.scorer.scorer
module
text_quality.feature.tokenizer
module
text_quality.language
module
text_quality.language.classifier
module
text_quality.language.fasttext
module
text_quality.page
module
text_quality.page.page
module
text_quality.settings
module
to_file() (text_quality.feature.scorer.dictionary.TokenDictionary method)
(text_quality.feature.scorer.q_gram.QGram method)
TOKEN_DICT_FILE (in module text_quality.settings)
TokenDictionary (class in text_quality.feature.scorer.dictionary)
tokenize() (text_quality.feature.tokenizer.NautilusOcrTokenizer method)
(text_quality.feature.tokenizer.Tokenizer method)
Tokenizer (class in text_quality.feature.tokenizer)