Module a2t.base
The module base
implements all the basic methods to perform the inference, including the EntailmentClassifier
.
Expand source code
"""The module `base` implements all the basic methods to perform the inference, including the `EntailmentClassifier`.
"""
import os
import sys
import gc
from typing import List
import numpy as np
import torch
try:
from tqdm import tqdm
_use_tqdm = True
except ImportError:
_use_tqdm = False
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from .tasks import Features, Task
try:
import transformers
transformers.logging.set_verbosity_error()
except ImportError:
pass
def np_softmax(x, dim=-1):
e = np.exp(x)
return e / np.sum(e, axis=dim, keepdims=True)
def np_sigmoid(x, dim=-1):
return 1 / (1 + np.exp(-x))
class Classifier(object):
"""Abstact classifier class."""
def __init__(
self, labels: List[str], pretrained_model: str = "roberta-large-mnli", use_cuda=True, half=False, verbose=True
):
super().__init__()
self.device = torch.device("cuda" if torch.cuda.is_available() and use_cuda else "cpu")
self.labels = labels
self.use_cuda = use_cuda
self.half = half
self.verbose = verbose
# Supress stdout printing for model downloads
if not verbose:
sys.stdout = open(os.devnull, "w")
self._initialize(pretrained_model)
sys.stdout = sys.__stdout__
else:
self._initialize(pretrained_model)
self.model = self.model.to(self.device)
self.model = self.model.eval()
if self.use_cuda and self.half and torch.cuda.is_available():
self.model = self.model.half()
def _initialize(self, pretrained_model):
raise NotImplementedError
def __call__(self, context, batch_size=1):
raise NotImplementedError
def clear_gpu_memory(self):
self.model.cpu()
del self.model
gc.collect()
torch.cuda.empty_cache()
class EntailmentClassifier(Classifier):
"""General purpose Entailment based classifier.
This class contains the code for entailment-based zero-shot classification inference. It is pretended to be
task and data independent.
"""
def __init__(
self,
pretrained_model: str = "roberta-large-mnli",
use_cuda: bool = True,
half: bool = False,
verbose: bool = True,
use_tqdm: bool = True,
**kwargs
):
"""
Args:
pretrained_model (str, optional): The name or path of the pretrained model. Defaults to "roberta-large-mnli".
use_cuda (bool, optional): Use the GPU if possible. Defaults to True.
half (bool, optional): Use half precision if possible. Defaults to False.
verbose (bool, optional): Output log information. Defaults to True.
"""
super().__init__(None, pretrained_model, use_cuda, half, verbose)
self.use_tqdm = use_tqdm and _use_tqdm
def _initialize(self, pretrained_model: str):
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model)
self.config = AutoConfig.from_pretrained(pretrained_model)
self.ent_pos = self.config.label2id.get("ENTAILMENT", self.config.label2id.get("entailment", None))
if self.ent_pos is None:
raise ValueError("The model config must contain ENTAILMENT label in the label2id dict.")
else:
self.ent_pos = int(self.ent_pos)
@staticmethod
def apply_threshold(
output: np.ndarray,
threshold: float = 0.0,
ignore_negative_prediction: bool = True,
application_type: str = "prediction",
) -> np.ndarray:
"""
Args:
output (ndarray): (batch_size, n_labels) The predicted probabilities.
threshold (float): The threshold value to apply.
ignore_negative_prediction (bool): Ignore the negative prediction probabilites. Default to True.
application_type (str): How to apply the threshold: Options:
* **"prediction"**: Set to 1.0 the probability of the negative class if the no prediction is higher than the threshold.
* **"mask"**: Set to 0.0 the probabilities of the positive classes that are lower or equal to the threshold.
"""
output_ = output.copy()
if ignore_negative_prediction:
output_[:, 0] = 0.0
if application_type == "prediction":
activations = (output_ >= threshold).sum(-1).astype(int)
output_[activations == 0, 0] = 1.00
elif application_type == "mask":
activations = output_ < threshold
output_[activations] = 0.0
else:
raise ValueError("""application_type argument must be "prediction" or "mask".""")
return output_
def __call__(
self,
task: Task,
features: List[Features],
negative_threshold: float = 0.5,
topk: int = 1,
return_labels: bool = False,
return_confidences: bool = False,
ignore_negative_prediction: bool = False,
return_raw_output: bool = False,
**kwargs
) -> List:
"""Call method for the EntailmentClassifier.
TODO: Add output documentation.
Args:
task (Task): The task instance used for inference.
features (List[Features]): The list of features to classify.
negative_threshold (float, optional): The threshold used if necessary. Defaults to 0.5.
topk (int, optional): Return the first `k` predictions with higher probabilities. Defaults to 1.
return_labels (bool, optional): Whether to return the label ids or names. Defaults to False (ids).
return_confidences (bool, optional): Whether to return prediction confidences or not. Defaults to False.
ignore_negative_prediction (bool, optional): Whether to ignore the predictions of the negative class. Defaults to False.
return_raw_output (bool, optional): Return the raw output along with the processed one. Defaults to False.
Returns:
List: A list with the predictions.
"""
task.assert_features_class(features)
outputs = []
iterator = features if not self.use_tqdm else tqdm(features, total=len(features))
with torch.no_grad():
for feature in iterator:
sentence_pairs = task.generate_premise_hypotheses_pairs([feature], self.tokenizer.sep_token)
data = self.tokenizer(sentence_pairs, return_tensors="pt", padding=True).input_ids
data = data.to(self.device)
output = self.model(data)[0].detach().cpu().numpy()
outputs.append(output)
outputs = np.vstack(outputs)
if task.multi_label:
outputs = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
outputs = outputs[..., self.ent_pos].reshape(len(features), -1)
preds = task.reverse_to_labels(outputs)
if not task.multi_label:
preds = np_softmax(preds)
preds = task.apply_valid_conditions(features, preds)
apply_threshold = task.multi_label and negative_threshold > 0
if apply_threshold:
preds = self.apply_threshold(
preds, threshold=negative_threshold, ignore_negative_prediction=ignore_negative_prediction
)
predictions = np.argsort(preds, -1)[:, ::-1]
if topk > 0:
predictions = predictions[:, :topk]
if return_labels:
predictions = task.idx2label(predictions)
if return_confidences:
confidences = np.sort(preds, -1)[:, ::-1]
if topk > 0:
confidences = confidences[:, :topk]
predictions = np.stack((predictions, confidences), -1).tolist()
predictions = [
[(int(label), float(conf)) if not return_labels else (label, float(conf)) for label, conf in row]
for row in predictions
]
else:
predictions = predictions.tolist()
if topk == 1:
predictions = [row[0] for row in predictions]
if return_raw_output:
return (predictions, preds)
else:
return predictions
__pdoc__ = {"EntailmentClassifier.__call__": True}
Classes
class EntailmentClassifier (pretrained_model: str = 'roberta-large-mnli', use_cuda: bool = True, half: bool = False, verbose: bool = True, use_tqdm: bool = True, **kwargs)
-
General purpose Entailment based classifier.
This class contains the code for entailment-based zero-shot classification inference. It is pretended to be task and data independent.
Args
pretrained_model
:str
, optional- The name or path of the pretrained model. Defaults to "roberta-large-mnli".
use_cuda
:bool
, optional- Use the GPU if possible. Defaults to True.
half
:bool
, optional- Use half precision if possible. Defaults to False.
verbose
:bool
, optional- Output log information. Defaults to True.
Expand source code
class EntailmentClassifier(Classifier): """General purpose Entailment based classifier. This class contains the code for entailment-based zero-shot classification inference. It is pretended to be task and data independent. """ def __init__( self, pretrained_model: str = "roberta-large-mnli", use_cuda: bool = True, half: bool = False, verbose: bool = True, use_tqdm: bool = True, **kwargs ): """ Args: pretrained_model (str, optional): The name or path of the pretrained model. Defaults to "roberta-large-mnli". use_cuda (bool, optional): Use the GPU if possible. Defaults to True. half (bool, optional): Use half precision if possible. Defaults to False. verbose (bool, optional): Output log information. Defaults to True. """ super().__init__(None, pretrained_model, use_cuda, half, verbose) self.use_tqdm = use_tqdm and _use_tqdm def _initialize(self, pretrained_model: str): self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model) self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model) self.config = AutoConfig.from_pretrained(pretrained_model) self.ent_pos = self.config.label2id.get("ENTAILMENT", self.config.label2id.get("entailment", None)) if self.ent_pos is None: raise ValueError("The model config must contain ENTAILMENT label in the label2id dict.") else: self.ent_pos = int(self.ent_pos) @staticmethod def apply_threshold( output: np.ndarray, threshold: float = 0.0, ignore_negative_prediction: bool = True, application_type: str = "prediction", ) -> np.ndarray: """ Args: output (ndarray): (batch_size, n_labels) The predicted probabilities. threshold (float): The threshold value to apply. ignore_negative_prediction (bool): Ignore the negative prediction probabilites. Default to True. application_type (str): How to apply the threshold: Options: * **"prediction"**: Set to 1.0 the probability of the negative class if the no prediction is higher than the threshold. * **"mask"**: Set to 0.0 the probabilities of the positive classes that are lower or equal to the threshold. """ output_ = output.copy() if ignore_negative_prediction: output_[:, 0] = 0.0 if application_type == "prediction": activations = (output_ >= threshold).sum(-1).astype(int) output_[activations == 0, 0] = 1.00 elif application_type == "mask": activations = output_ < threshold output_[activations] = 0.0 else: raise ValueError("""application_type argument must be "prediction" or "mask".""") return output_ def __call__( self, task: Task, features: List[Features], negative_threshold: float = 0.5, topk: int = 1, return_labels: bool = False, return_confidences: bool = False, ignore_negative_prediction: bool = False, return_raw_output: bool = False, **kwargs ) -> List: """Call method for the EntailmentClassifier. TODO: Add output documentation. Args: task (Task): The task instance used for inference. features (List[Features]): The list of features to classify. negative_threshold (float, optional): The threshold used if necessary. Defaults to 0.5. topk (int, optional): Return the first `k` predictions with higher probabilities. Defaults to 1. return_labels (bool, optional): Whether to return the label ids or names. Defaults to False (ids). return_confidences (bool, optional): Whether to return prediction confidences or not. Defaults to False. ignore_negative_prediction (bool, optional): Whether to ignore the predictions of the negative class. Defaults to False. return_raw_output (bool, optional): Return the raw output along with the processed one. Defaults to False. Returns: List: A list with the predictions. """ task.assert_features_class(features) outputs = [] iterator = features if not self.use_tqdm else tqdm(features, total=len(features)) with torch.no_grad(): for feature in iterator: sentence_pairs = task.generate_premise_hypotheses_pairs([feature], self.tokenizer.sep_token) data = self.tokenizer(sentence_pairs, return_tensors="pt", padding=True).input_ids data = data.to(self.device) output = self.model(data)[0].detach().cpu().numpy() outputs.append(output) outputs = np.vstack(outputs) if task.multi_label: outputs = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True) outputs = outputs[..., self.ent_pos].reshape(len(features), -1) preds = task.reverse_to_labels(outputs) if not task.multi_label: preds = np_softmax(preds) preds = task.apply_valid_conditions(features, preds) apply_threshold = task.multi_label and negative_threshold > 0 if apply_threshold: preds = self.apply_threshold( preds, threshold=negative_threshold, ignore_negative_prediction=ignore_negative_prediction ) predictions = np.argsort(preds, -1)[:, ::-1] if topk > 0: predictions = predictions[:, :topk] if return_labels: predictions = task.idx2label(predictions) if return_confidences: confidences = np.sort(preds, -1)[:, ::-1] if topk > 0: confidences = confidences[:, :topk] predictions = np.stack((predictions, confidences), -1).tolist() predictions = [ [(int(label), float(conf)) if not return_labels else (label, float(conf)) for label, conf in row] for row in predictions ] else: predictions = predictions.tolist() if topk == 1: predictions = [row[0] for row in predictions] if return_raw_output: return (predictions, preds) else: return predictions
Ancestors
- a2t.base.Classifier
Static methods
def apply_threshold(output: numpy.ndarray, threshold: float = 0.0, ignore_negative_prediction: bool = True, application_type: str = 'prediction') ‑> numpy.ndarray
-
Args
output
:ndarray
- (batch_size, n_labels) The predicted probabilities.
threshold
:float
- The threshold value to apply.
ignore_negative_prediction
:bool
- Ignore the negative prediction probabilites. Default to True.
application_type
:str
-
How to apply the threshold: Options:
- "prediction": Set to 1.0 the probability of the negative class if the no prediction is higher than the threshold.
- "mask": Set to 0.0 the probabilities of the positive classes that are lower or equal to the threshold.
Expand source code
@staticmethod def apply_threshold( output: np.ndarray, threshold: float = 0.0, ignore_negative_prediction: bool = True, application_type: str = "prediction", ) -> np.ndarray: """ Args: output (ndarray): (batch_size, n_labels) The predicted probabilities. threshold (float): The threshold value to apply. ignore_negative_prediction (bool): Ignore the negative prediction probabilites. Default to True. application_type (str): How to apply the threshold: Options: * **"prediction"**: Set to 1.0 the probability of the negative class if the no prediction is higher than the threshold. * **"mask"**: Set to 0.0 the probabilities of the positive classes that are lower or equal to the threshold. """ output_ = output.copy() if ignore_negative_prediction: output_[:, 0] = 0.0 if application_type == "prediction": activations = (output_ >= threshold).sum(-1).astype(int) output_[activations == 0, 0] = 1.00 elif application_type == "mask": activations = output_ < threshold output_[activations] = 0.0 else: raise ValueError("""application_type argument must be "prediction" or "mask".""") return output_
Methods
def __call__(self, task: a2t.tasks.base.Task, features: List[a2t.tasks.base.Features], negative_threshold: float = 0.5, topk: int = 1, return_labels: bool = False, return_confidences: bool = False, ignore_negative_prediction: bool = False, return_raw_output: bool = False, **kwargs) ‑> List
-
Call method for the EntailmentClassifier.
TODO: Add output documentation.
Args
task
:Task
- The task instance used for inference.
features
:List[Features]
- The list of features to classify.
negative_threshold
:float
, optional- The threshold used if necessary. Defaults to 0.5.
topk
:int
, optional- Return the first
k
predictions with higher probabilities. Defaults to 1. return_labels
:bool
, optional- Whether to return the label ids or names. Defaults to False (ids).
return_confidences
:bool
, optional- Whether to return prediction confidences or not. Defaults to False.
ignore_negative_prediction
:bool
, optional- Whether to ignore the predictions of the negative class. Defaults to False.
return_raw_output
:bool
, optional- Return the raw output along with the processed one. Defaults to False.
Returns
List
- A list with the predictions.
Expand source code
def __call__( self, task: Task, features: List[Features], negative_threshold: float = 0.5, topk: int = 1, return_labels: bool = False, return_confidences: bool = False, ignore_negative_prediction: bool = False, return_raw_output: bool = False, **kwargs ) -> List: """Call method for the EntailmentClassifier. TODO: Add output documentation. Args: task (Task): The task instance used for inference. features (List[Features]): The list of features to classify. negative_threshold (float, optional): The threshold used if necessary. Defaults to 0.5. topk (int, optional): Return the first `k` predictions with higher probabilities. Defaults to 1. return_labels (bool, optional): Whether to return the label ids or names. Defaults to False (ids). return_confidences (bool, optional): Whether to return prediction confidences or not. Defaults to False. ignore_negative_prediction (bool, optional): Whether to ignore the predictions of the negative class. Defaults to False. return_raw_output (bool, optional): Return the raw output along with the processed one. Defaults to False. Returns: List: A list with the predictions. """ task.assert_features_class(features) outputs = [] iterator = features if not self.use_tqdm else tqdm(features, total=len(features)) with torch.no_grad(): for feature in iterator: sentence_pairs = task.generate_premise_hypotheses_pairs([feature], self.tokenizer.sep_token) data = self.tokenizer(sentence_pairs, return_tensors="pt", padding=True).input_ids data = data.to(self.device) output = self.model(data)[0].detach().cpu().numpy() outputs.append(output) outputs = np.vstack(outputs) if task.multi_label: outputs = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True) outputs = outputs[..., self.ent_pos].reshape(len(features), -1) preds = task.reverse_to_labels(outputs) if not task.multi_label: preds = np_softmax(preds) preds = task.apply_valid_conditions(features, preds) apply_threshold = task.multi_label and negative_threshold > 0 if apply_threshold: preds = self.apply_threshold( preds, threshold=negative_threshold, ignore_negative_prediction=ignore_negative_prediction ) predictions = np.argsort(preds, -1)[:, ::-1] if topk > 0: predictions = predictions[:, :topk] if return_labels: predictions = task.idx2label(predictions) if return_confidences: confidences = np.sort(preds, -1)[:, ::-1] if topk > 0: confidences = confidences[:, :topk] predictions = np.stack((predictions, confidences), -1).tolist() predictions = [ [(int(label), float(conf)) if not return_labels else (label, float(conf)) for label, conf in row] for row in predictions ] else: predictions = predictions.tolist() if topk == 1: predictions = [row[0] for row in predictions] if return_raw_output: return (predictions, preds) else: return predictions