import yaml import argparse class FunctionTag: def __init__(self, value): self.value = value def function_representer(dumper, data): return dumper.represent_scalar('!function', data.value, style='') yaml.add_representer(FunctionTag, function_representer) def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: """ Generate a yaml file for each language. :param output_dir: The directory to output the files to. :param overwrite: Whether to overwrite files if they already exist. """ err = [] languages = ['amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm', 'yor', 'hau', 'sot', 'swa'] for lang in languages: file_name = f"afrixnli_{lang}.yaml" try: with open( f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8" ) as f: f.write("# Generated by utils.py\n") yaml.dump( { "include": "afrixnli_common_yaml", "task": f"afrixnli_{lang}", "dataset_name": lang, "doc_to_target": "{{label}}", #FunctionTag('preprocess_wikitext.wikitext_detokenizer'), "doc_to_text": "Premise: {{premise}} \nHypothesis: {{hypothesis}} \nIs it entailment, " "contradiction, or neutral?" }, f, allow_unicode=True, ) except FileExistsError: err.append(file_name) if len(err) > 0: raise FileExistsError( "Files were not created because they already exist (use --overwrite flag):" f" {', '.join(err)}" ) def main() -> None: """Parse CLI args and generate language-specific yaml files.""" parser = argparse.ArgumentParser() parser.add_argument( "--overwrite", default=True, action="store_true", help="Overwrite files if they already exist", ) parser.add_argument( "--output-dir", default=".", help="Directory to write yaml files to" ) args = parser.parse_args() gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite) if __name__ == "__main__": main()