Source code for nlpboost.augmentation.augmenter_config

from dataclasses import dataclass, field
from typing import Dict, Any
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas

class_translator = {
    "ocr": nac.OcrAug,
    "contextual_w_e": naw.ContextualWordEmbsAug,
    "synonym": naw.SynonymAug,
    "backtranslation": naw.BackTranslationAug,
    "contextual_s_e": nas.ContextualWordEmbsForSentenceAug,
    "abstractive_summ": nas.AbstSummAug,
}


[docs]@dataclass class NLPAugConfig: """ Configuration for augmenters. Parameters ---------- name : str Name of the data augmentation technique. Possible values currently are `ocr` (for OCR augmentation), `contextual_w_e` for Contextual Word Embedding augmentation, `synonym`, `backtranslation`, `contextual_s_e` for Contextual Word Embeddings for Sentence Augmentation, `abstractive_summ`. If using a custom augmenter class this can be a random name. augmenter_cls: Any An optional augmenter class, from `nlpaug` library. Can be used instead of using an identifier name for loading the class (see param `name` of this class). proportion : float Proportion of data augmentation. aug_kwargs : Dict Arguments for the data augmentation class. See https://github.com/makcedward/nlpaug/blob/master/example/textual_augmenter.ipynb """ name: str = field(metadata={"help": "Name of the data augmentation technique. If using a custom augmenter class this can be a random name."}) augmenter_cls: Any = field( default=None, metadata={"help": "An optional augmenter class, from `nlpaug` library. Can be used instead of using an identifier name for loading the class (see param `name` of this class)."} ) proportion: float = field( default=0.1, metadata={"help": "proportion of data augmentation"} ) aug_kwargs: Dict = field( default=None, metadata={ "help": "Arguments for the data augmentation class. See https://github.com/makcedward/nlpaug/blob/master/example/textual_augmenter.ipynb" }, )