Commit d58d67b3 authored by Baber's avatar Baber
Browse files

refactor masakhapos

parent 7f04db12
......@@ -73,3 +73,5 @@ HomePage: https://github.com/masakhane-io/masakhane-pos
abstract = "In this paper, we present AfricaPOS, the largest part-of-speech (POS) dataset for 20 typologically diverse African languages. We discuss the challenges in annotating POS for these languages using the universal dependencies (UD) guidelines. We conducted extensive POS baseline experiments using both conditional random field and several multilingual pre-trained language models. We applied various cross-lingual transfer models trained with data available in the UD. Evaluating on the AfricaPOS dataset, we show that choosing the best transfer language(s) in both single-source and multi-source setups greatly improves the POS tagging performance of the target languages, in particular when combined with parameter-fine-tuning methods. Crucially, transferring knowledge from a language that matches the language family and morphosyntactic properties seems to be more effective for POS tagging in unseen languages."
}
```
## Changelog
- 2025-07-21: Refactored. Scores should not be affected.
......@@ -14,19 +14,18 @@ validation_split: validation
test_split: test
fewshot_split: train
doc_to_target: !function utils.doc_to_target
process_results: !function utils.process_results
should_decontaminate: true
doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
filter_list:
- filter:
- function: regex_pos
- function: "custom"
filter_fn: !function utils.extract_pos
- function: "take_first"
name: flexible-extract
metric_list:
- metric: acc
aggregation: !function utils.acc_score
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
metadata:
version: 1.0
from itertools import chain
import re
from collections.abc import Iterable
from typing import Any
from sklearn.metrics import accuracy_score
from lm_eval.utils import weighted_f1_score
def doc_to_target(doc):
pos_tag_map = {
......@@ -29,27 +29,40 @@ def doc_to_target(doc):
return [pos_tag_map[tag] for tag in doc["upos"]]
def acc_score(items):
unzipped_list = list(zip(*items))
def extract_pos(resps: Iterable[list[str]], *args) -> Iterable[list[str]]:
def extract_tagged_tokens(text: str) -> list[tuple[str, str]]:
# Extract tagged tokens list from text input using regex
tokens = re.findall(
r"\('([^']*)', '([^']*)'\)",
"Here are some tuples: ('apple', 'red'), ('banana', 'yellow'), ('grape', 'purple')",
)
return [(token, pos) for token, pos in tokens]
def extract_pos_tags(result: str):
pos_tags = []
if isinstance(result, str):
result_ = extract_tagged_tokens(result)
pos_tags.extend(pos for _, pos in result_)
return pos_tags if pos_tags else ["invalid"]
def filter_set(inst: list[str]) -> list[str]:
filtered = []
for resp in inst:
match = extract_pos_tags(resp)
filtered.append(match)
return filtered
golds, preds = unzipped_list[0], unzipped_list[1]
filtered_resps = map(lambda x: filter_set(x), resps)
# Flatten preds' inner lists
flattened_preds = [list(chain.from_iterable(p)) for p in preds]
return filtered_resps
# Calculate the accuracy for each gold-pred pair
accuracy_scores = []
for gold, pred in zip(golds, flattened_preds):
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(gold), len(pred))
gold = gold[:min_length]
pred = pred[:min_length]
# Calculate accuracy for the current pair and add to the list
accuracy = accuracy_score(gold, pred)
accuracy_scores.append(accuracy)
def process_results(doc: dict[str, Any], results: list[list[str]]):
golds, preds = doc_to_target(doc), results[0]
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(golds), len(preds))
gold = golds[:min_length]
pred = preds[:min_length]
accuracy = accuracy_score(gold, pred)
mean_accuracy = (
sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
)
return mean_accuracy
return {"acc": accuracy}
......@@ -16,17 +16,16 @@ fewshot_split: train
doc_to_target: !function utils.doc_to_target
should_decontaminate: true
doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
process_results: !function utils.process_results
filter_list:
- filter:
- function: regex_pos
- function: "custom"
filter_fn: !function utils.extract_pos
- function: "take_first"
name: flexible-extract
metric_list:
- metric: acc
aggregation: !function utils.acc_score
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
metadata:
version: 1.0
from itertools import chain
import re
from collections.abc import Iterable
from typing import Any
from sklearn.metrics import accuracy_score
from lm_eval.utils import weighted_f1_score
def doc_to_target(doc):
pos_tag_map = {
......@@ -29,27 +29,40 @@ def doc_to_target(doc):
return [pos_tag_map[tag] for tag in doc["upos"]]
def acc_score(items):
unzipped_list = list(zip(*items))
def extract_pos(resps: Iterable[list[str]], *args) -> Iterable[list[str]]:
def extract_tagged_tokens(text: str) -> list[tuple[str, str]]:
# Extract tagged tokens list from text input using regex
tokens = re.findall(
r"\('([^']*)', '([^']*)'\)",
"Here are some tuples: ('apple', 'red'), ('banana', 'yellow'), ('grape', 'purple')",
)
return [(token, pos) for token, pos in tokens]
def extract_pos_tags(result: str):
pos_tags = []
if isinstance(result, str):
result_ = extract_tagged_tokens(result)
pos_tags.extend(pos for _, pos in result_)
return pos_tags if pos_tags else ["invalid"]
def filter_set(inst: list[str]) -> list[str]:
filtered = []
for resp in inst:
match = extract_pos_tags(resp)
filtered.append(match)
return filtered
golds, preds = unzipped_list[0], unzipped_list[1]
filtered_resps = map(lambda x: filter_set(x), resps)
# Flatten preds' inner lists
flattened_preds = [list(chain.from_iterable(p)) for p in preds]
return filtered_resps
# Calculate the accuracy for each gold-pred pair
accuracy_scores = []
for gold, pred in zip(golds, flattened_preds):
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(gold), len(pred))
gold = gold[:min_length]
pred = pred[:min_length]
# Calculate accuracy for the current pair and add to the list
accuracy = accuracy_score(gold, pred)
accuracy_scores.append(accuracy)
def process_results(doc: dict[str, Any], results: list[list[str]]):
golds, preds = doc_to_target(doc), results[0]
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(golds), len(preds))
gold = golds[:min_length]
pred = preds[:min_length]
accuracy = accuracy_score(gold, pred)
mean_accuracy = (
sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
)
return mean_accuracy
return {"acc": accuracy}
......@@ -16,17 +16,16 @@ fewshot_split: train
doc_to_target: !function utils.doc_to_target
should_decontaminate: true
doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
process_results: !function utils.process_results
filter_list:
- filter:
- function: regex_pos
- function: "custom"
filter_fn: !function utils.extract_pos
- function: "take_first"
name: flexible-extract
metric_list:
- metric: acc
aggregation: !function utils.acc_score
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
metadata:
version: 1.0
from itertools import chain
import re
from collections.abc import Iterable
from typing import Any
from sklearn.metrics import accuracy_score
from lm_eval.utils import weighted_f1_score
def doc_to_target(doc):
pos_tag_map = {
......@@ -29,27 +29,40 @@ def doc_to_target(doc):
return [pos_tag_map[tag] for tag in doc["upos"]]
def acc_score(items):
unzipped_list = list(zip(*items))
def extract_pos(resps: Iterable[list[str]], *args) -> Iterable[list[str]]:
def extract_tagged_tokens(text: str) -> list[tuple[str, str]]:
# Extract tagged tokens list from text input using regex
tokens = re.findall(
r"\('([^']*)', '([^']*)'\)",
"Here are some tuples: ('apple', 'red'), ('banana', 'yellow'), ('grape', 'purple')",
)
return [(token, pos) for token, pos in tokens]
def extract_pos_tags(result: str):
pos_tags = []
if isinstance(result, str):
result_ = extract_tagged_tokens(result)
pos_tags.extend(pos for _, pos in result_)
return pos_tags if pos_tags else ["invalid"]
def filter_set(inst: list[str]) -> list[str]:
filtered = []
for resp in inst:
match = extract_pos_tags(resp)
filtered.append(match)
return filtered
golds, preds = unzipped_list[0], unzipped_list[1]
filtered_resps = map(lambda x: filter_set(x), resps)
# Flatten preds' inner lists
flattened_preds = [list(chain.from_iterable(p)) for p in preds]
return filtered_resps
# Calculate the accuracy for each gold-pred pair
accuracy_scores = []
for gold, pred in zip(golds, flattened_preds):
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(gold), len(pred))
gold = gold[:min_length]
pred = pred[:min_length]
# Calculate accuracy for the current pair and add to the list
accuracy = accuracy_score(gold, pred)
accuracy_scores.append(accuracy)
def process_results(doc: dict[str, Any], results: list[list[str]]):
golds, preds = doc_to_target(doc), results[0]
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(golds), len(preds))
gold = golds[:min_length]
pred = preds[:min_length]
accuracy = accuracy_score(gold, pred)
mean_accuracy = (
sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
)
return mean_accuracy
return {"acc": accuracy}
......@@ -16,17 +16,16 @@ fewshot_split: train
doc_to_target: !function utils.doc_to_target
should_decontaminate: true
doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
process_results: !function utils.process_results
filter_list:
- filter:
- function: regex_pos
- function: "custom"
filter_fn: !function utils.extract_pos
- function: "take_first"
name: flexible-extract
metric_list:
- metric: acc
aggregation: !function utils.acc_score
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
metadata:
version: 1.0
from itertools import chain
import re
from collections.abc import Iterable
from typing import Any
from sklearn.metrics import accuracy_score
from lm_eval.utils import weighted_f1_score
def doc_to_target(doc):
pos_tag_map = {
......@@ -29,27 +29,40 @@ def doc_to_target(doc):
return [pos_tag_map[tag] for tag in doc["upos"]]
def acc_score(items):
unzipped_list = list(zip(*items))
def extract_pos(resps: Iterable[list[str]], *args) -> Iterable[list[str]]:
def extract_tagged_tokens(text: str) -> list[tuple[str, str]]:
# Extract tagged tokens list from text input using regex
tokens = re.findall(
r"\('([^']*)', '([^']*)'\)",
"Here are some tuples: ('apple', 'red'), ('banana', 'yellow'), ('grape', 'purple')",
)
return [(token, pos) for token, pos in tokens]
def extract_pos_tags(result: str):
pos_tags = []
if isinstance(result, str):
result_ = extract_tagged_tokens(result)
pos_tags.extend(pos for _, pos in result_)
return pos_tags if pos_tags else ["invalid"]
def filter_set(inst: list[str]) -> list[str]:
filtered = []
for resp in inst:
match = extract_pos_tags(resp)
filtered.append(match)
return filtered
golds, preds = unzipped_list[0], unzipped_list[1]
filtered_resps = map(lambda x: filter_set(x), resps)
# Flatten preds' inner lists
flattened_preds = [list(chain.from_iterable(p)) for p in preds]
return filtered_resps
# Calculate the accuracy for each gold-pred pair
accuracy_scores = []
for gold, pred in zip(golds, flattened_preds):
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(gold), len(pred))
gold = gold[:min_length]
pred = pred[:min_length]
# Calculate accuracy for the current pair and add to the list
accuracy = accuracy_score(gold, pred)
accuracy_scores.append(accuracy)
def process_results(doc: dict[str, Any], results: list[list[str]]):
golds, preds = doc_to_target(doc), results[0]
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(golds), len(preds))
gold = golds[:min_length]
pred = preds[:min_length]
accuracy = accuracy_score(gold, pred)
mean_accuracy = (
sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
)
return mean_accuracy
return {"acc": accuracy}
......@@ -16,17 +16,16 @@ fewshot_split: train
doc_to_target: !function utils.doc_to_target
should_decontaminate: true
doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
process_results: !function utils.process_results
filter_list:
- filter:
- function: regex_pos
- function: "custom"
filter_fn: !function utils.extract_pos
- function: "take_first"
name: flexible-extract
metric_list:
- metric: acc
aggregation: !function utils.acc_score
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- ","
metadata:
version: 1.0
from itertools import chain
import re
from collections.abc import Iterable
from typing import Any
from sklearn.metrics import accuracy_score
from lm_eval.utils import weighted_f1_score
def doc_to_target(doc):
pos_tag_map = {
......@@ -29,27 +29,40 @@ def doc_to_target(doc):
return [pos_tag_map[tag] for tag in doc["upos"]]
def acc_score(items):
unzipped_list = list(zip(*items))
def extract_pos(resps: Iterable[list[str]], *args) -> Iterable[list[str]]:
def extract_tagged_tokens(text: str) -> list[tuple[str, str]]:
# Extract tagged tokens list from text input using regex
tokens = re.findall(
r"\('([^']*)', '([^']*)'\)",
"Here are some tuples: ('apple', 'red'), ('banana', 'yellow'), ('grape', 'purple')",
)
return [(token, pos) for token, pos in tokens]
def extract_pos_tags(result: str):
pos_tags = []
if isinstance(result, str):
result_ = extract_tagged_tokens(result)
pos_tags.extend(pos for _, pos in result_)
return pos_tags if pos_tags else ["invalid"]
def filter_set(inst: list[str]) -> list[str]:
filtered = []
for resp in inst:
match = extract_pos_tags(resp)
filtered.append(match)
return filtered
golds, preds = unzipped_list[0], unzipped_list[1]
filtered_resps = map(lambda x: filter_set(x), resps)
# Flatten preds' inner lists
flattened_preds = [list(chain.from_iterable(p)) for p in preds]
return filtered_resps
# Calculate the accuracy for each gold-pred pair
accuracy_scores = []
for gold, pred in zip(golds, flattened_preds):
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(gold), len(pred))
gold = gold[:min_length]
pred = pred[:min_length]
# Calculate accuracy for the current pair and add to the list
accuracy = accuracy_score(gold, pred)
accuracy_scores.append(accuracy)
def process_results(doc: dict[str, Any], results: list[list[str]]):
golds, preds = doc_to_target(doc), results[0]
# Ensure both lists are of the same length, otherwise truncate to match
min_length = min(len(golds), len(preds))
gold = golds[:min_length]
pred = preds[:min_length]
accuracy = accuracy_score(gold, pred)
mean_accuracy = (
sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
)
return mean_accuracy
return {"acc": accuracy}
from lm_eval.utils import weighted_f1_score
def doc_to_text(doc):
output = """Please provide the POS tags for each word in the input sentence. The input will be a list of words in
the sentence. The output format should be a list of tuples, where each tuple consists of a word from the input text
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment