Commit ec5d6c6a authored by Morgan Funtowicz's avatar Morgan Funtowicz
Browse files

Adressing issue with NER task omitting first and last word.

parent d0724d07
...@@ -318,8 +318,6 @@ class NerPipeline(Pipeline): ...@@ -318,8 +318,6 @@ class NerPipeline(Pipeline):
""" """
Named Entity Recognition pipeline using ModelForTokenClassification head. Named Entity Recognition pipeline using ModelForTokenClassification head.
""" """
def __init__(self, model, tokenizer: PreTrainedTokenizer):
super().__init__(model, tokenizer)
def __call__(self, *texts, **kwargs): def __call__(self, *texts, **kwargs):
inputs, answers = self._args_parser(*texts, **kwargs), [] inputs, answers = self._args_parser(*texts, **kwargs), []
...@@ -344,14 +342,16 @@ class NerPipeline(Pipeline): ...@@ -344,14 +342,16 @@ class NerPipeline(Pipeline):
# Normalize scores # Normalize scores
answer, token_start = [], 1 answer, token_start = [], 1
for idx, word in groupby(token_to_word[1:-1]): for idx, word in groupby(token_to_word):
# Sum log prob over token, then normalize across labels # Sum log prob over token, then normalize across labels
score = np.exp(entities[token_start]) / np.exp(entities[token_start]).sum(-1, keepdims=True) score = np.exp(entities[token_start]) / np.exp(entities[token_start]).sum(-1, keepdims=True)
label_idx = score.argmax() label_idx = score.argmax()
answer += [{ answer += [{
'word': words[idx - 1], 'score': score[label_idx].item(), 'entity': self.model.config.id2label[label_idx] 'word': words[idx],
'score': score[label_idx].item(),
'entity': self.model.config.id2label[label_idx]
}] }]
# Update token start # Update token start
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment