"docs/source/vscode:/vscode.git/clone" did not exist on "f16ff0f07e3867db5feda00a661572a190f404e6"
Unverified Commit 3cab9027 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Add examples telemetry (#17552)

* Add examples telemetry

* Alternative approach

* Add to all other examples

* Add to templates as well

* Put framework separately

* Same for TensorFlow
parent 9e72eb44
...@@ -51,7 +51,7 @@ from transformers import ( ...@@ -51,7 +51,7 @@ from transformers import (
default_data_collator, default_data_collator,
get_scheduler, get_scheduler,
) )
from transformers.utils import get_full_repo_name from transformers.utils import get_full_repo_name, send_example_telemetry
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
...@@ -305,6 +305,10 @@ def main(): ...@@ -305,6 +305,10 @@ def main():
# Parse the arguments # Parse the arguments
args = parse_args() args = parse_args()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_translation_no_trainer", args)
# Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
# If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers
# in the environment # in the environment
......
...@@ -53,6 +53,7 @@ from transformers import ( ...@@ -53,6 +53,7 @@ from transformers import (
create_optimizer, create_optimizer,
set_seed, set_seed,
) )
from transformers.utils import send_example_telemetry
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
...@@ -232,6 +233,10 @@ def main(): ...@@ -232,6 +233,10 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow")
# Sanity checks # Sanity checks
if data_args.dataset_name is None and data_args.train_file is None and data_args.validation_file is None: if data_args.dataset_name is None and data_args.train_file is None and data_args.validation_file is None:
raise ValueError("Need either a dataset name or a training/validation file.") raise ValueError("Need either a dataset name or a training/validation file.")
......
...@@ -55,6 +55,7 @@ from transformers import ( ...@@ -55,6 +55,7 @@ from transformers import (
create_optimizer, create_optimizer,
set_seed, set_seed,
) )
from transformers.utils import send_example_telemetry
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
...@@ -242,6 +243,10 @@ def main(): ...@@ -242,6 +243,10 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow")
# Sanity checks # Sanity checks
if data_args.dataset_name is None and data_args.train_file is None and data_args.validation_file is None: if data_args.dataset_name is None and data_args.train_file is None and data_args.validation_file is None:
raise ValueError("Need either a dataset name or a training/validation file.") raise ValueError("Need either a dataset name or a training/validation file.")
......
...@@ -44,7 +44,7 @@ from transformers import ( ...@@ -44,7 +44,7 @@ from transformers import (
set_seed, set_seed,
) )
from transformers.tokenization_utils_base import PreTrainedTokenizerBase from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.utils import PaddingStrategy, check_min_version from transformers.utils import PaddingStrategy, check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks. # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
...@@ -246,6 +246,10 @@ def main(): ...@@ -246,6 +246,10 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow")
output_dir = Path(training_args.output_dir) output_dir = Path(training_args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
# endregion # endregion
......
...@@ -41,7 +41,7 @@ from transformers import ( ...@@ -41,7 +41,7 @@ from transformers import (
TFTrainingArguments, TFTrainingArguments,
set_seed, set_seed,
) )
from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, check_min_version from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, check_min_version, send_example_telemetry
from utils_qa import postprocess_qa_predictions from utils_qa import postprocess_qa_predictions
...@@ -242,6 +242,10 @@ def main(): ...@@ -242,6 +242,10 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow")
output_dir = Path(training_args.output_dir) output_dir = Path(training_args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
# endregion # endregion
......
...@@ -44,7 +44,7 @@ from transformers import ( ...@@ -44,7 +44,7 @@ from transformers import (
set_seed, set_seed,
) )
from transformers.trainer_utils import get_last_checkpoint from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version, is_offline_mode from transformers.utils import check_min_version, is_offline_mode, send_example_telemetry
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
...@@ -348,6 +348,10 @@ def main(): ...@@ -348,6 +348,10 @@ def main():
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow")
# endregion # endregion
# region Logging # region Logging
......
...@@ -39,7 +39,7 @@ from transformers import ( ...@@ -39,7 +39,7 @@ from transformers import (
set_seed, set_seed,
) )
from transformers.trainer_utils import get_last_checkpoint, is_main_process from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.utils import check_min_version from transformers.utils import check_min_version, send_example_telemetry
# region Helper functions # region Helper functions
...@@ -206,6 +206,10 @@ def main(): ...@@ -206,6 +206,10 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow")
if not (training_args.do_train or training_args.do_eval or training_args.do_predict): if not (training_args.do_train or training_args.do_eval or training_args.do_predict):
exit("Must specify at least one of --do_train, --do_eval or --do_predict!") exit("Must specify at least one of --do_train, --do_eval or --do_predict!")
# endregion # endregion
......
...@@ -37,7 +37,7 @@ from transformers import ( ...@@ -37,7 +37,7 @@ from transformers import (
TFTrainingArguments, TFTrainingArguments,
set_seed, set_seed,
) )
from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, send_example_telemetry
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" # Reduce the amount of console output from TF os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" # Reduce the amount of console output from TF
...@@ -196,6 +196,11 @@ def main(): ...@@ -196,6 +196,11 @@ def main():
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_text_classification", model_args, data_args, framework="tensorflow")
output_dir = Path(training_args.output_dir) output_dir = Path(training_args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
# endregion # endregion
......
...@@ -41,6 +41,7 @@ from transformers import ( ...@@ -41,6 +41,7 @@ from transformers import (
create_optimizer, create_optimizer,
set_seed, set_seed,
) )
from transformers.utils import send_example_telemetry
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
...@@ -252,6 +253,10 @@ def main(): ...@@ -252,6 +253,10 @@ def main():
# region Argument Parsing # region Argument Parsing
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments)) parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow")
# endregion # endregion
# region Setup logging # region Setup logging
......
...@@ -47,7 +47,7 @@ from transformers import ( ...@@ -47,7 +47,7 @@ from transformers import (
set_seed, set_seed,
) )
from transformers.trainer_utils import get_last_checkpoint from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version from transformers.utils import check_min_version, send_example_telemetry
from transformers.utils.versions import require_version from transformers.utils.versions import require_version
...@@ -318,6 +318,10 @@ def main(): ...@@ -318,6 +318,10 @@ def main():
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow")
# endregion # endregion
# region Logging # region Logging
......
...@@ -74,6 +74,7 @@ from .hub import ( ...@@ -74,6 +74,7 @@ from .hub import (
is_local_clone, is_local_clone,
is_offline_mode, is_offline_mode,
is_remote_url, is_remote_url,
send_example_telemetry,
url_to_filename, url_to_filename,
) )
from .import_utils import ( from .import_utils import (
......
...@@ -109,6 +109,7 @@ if os.environ.get("HUGGINGFACE_CO_RESOLVE_ENDPOINT", None) is not None: ...@@ -109,6 +109,7 @@ if os.environ.get("HUGGINGFACE_CO_RESOLVE_ENDPOINT", None) is not None:
HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HUGGINGFACE_CO_RESOLVE_ENDPOINT", None) HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HUGGINGFACE_CO_RESOLVE_ENDPOINT", None)
HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", HUGGINGFACE_CO_RESOLVE_ENDPOINT) HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", HUGGINGFACE_CO_RESOLVE_ENDPOINT)
HUGGINGFACE_CO_PREFIX = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/{model_id}/resolve/{revision}/{filename}" HUGGINGFACE_CO_PREFIX = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/{model_id}/resolve/{revision}/{filename}"
HUGGINGFACE_CO_EXAMPLES_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/telemetry/examples"
def is_remote_url(url_or_filename): def is_remote_url(url_or_filename):
...@@ -1028,3 +1029,41 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: ...@@ -1028,3 +1029,41 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token:
return f"{username}/{model_id}" return f"{username}/{model_id}"
else: else:
return f"{organization}/{model_id}" return f"{organization}/{model_id}"
def send_example_telemetry(example_name, *example_args, framework="pytorch"):
"""
Sends telemetry that helps tracking the examples use.
Args:
example_name (`str`): The name of the example.
*example_args (dataclasses or `argparse.ArgumentParser`): The arguments to the script. This function will only
try to extract the model and dataset name from those. Nothing else is tracked.
framework (`str`, *optional*, defaults to `"pytorch"`): The framework for the example.
"""
if is_offline_mode():
return
data = {"example": example_name, "framework": framework}
for args in example_args:
args_as_dict = {k: v for k, v in args.__dict__.items() if not k.startswith("_") and v is not None}
if "model_name_or_path" in args_as_dict:
model_name = args_as_dict["model_name_or_path"]
# Filter out local paths
if not os.path.isdir(model_name):
data["model_name"] = args_as_dict["model_name_or_path"]
if "dataset_name" in args_as_dict:
data["dataset_name"] = args_as_dict["dataset_name"]
elif "task_name" in args_as_dict:
# Extract script name from the example_name
script_name = example_name.replace("tf_", "").replace("flax_", "").replace("run_", "")
script_name = script_name.replace("_no_trainer", "")
data["dataset_name"] = f"{script_name}-{args_as_dict['task_name']}"
headers = {"user-agent": http_user_agent(data)}
try:
r = requests.head(HUGGINGFACE_CO_EXAMPLES_TELEMETRY, headers=headers)
r.raise_for_status()
except Exception:
# We don't want to error in case of connection errors of any kind.
pass
...@@ -46,6 +46,7 @@ from transformers import ( ...@@ -46,6 +46,7 @@ from transformers import (
set_seed, set_seed,
) )
from transformers.trainer_utils import get_last_checkpoint from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import send_example_telemetry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -207,6 +208,10 @@ def main(): ...@@ -207,6 +208,10 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_{{cookiecutter.example_shortcut}}", model_args, data_args)
# Detecting last checkpoint. # Detecting last checkpoint.
last_checkpoint = None last_checkpoint = None
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
...@@ -519,6 +524,7 @@ from transformers import ( ...@@ -519,6 +524,7 @@ from transformers import (
get_scheduler, get_scheduler,
set_seed, set_seed,
) )
from transformers.utils import send_example_telemetry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -662,6 +668,10 @@ def parse_args(): ...@@ -662,6 +668,10 @@ def parse_args():
def main(): def main():
args = parse_args() args = parse_args()
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_{{cookiecutter.example_shortcut}", args)
# Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
accelerator = Accelerator() accelerator = Accelerator()
# Make one log on every process with the configuration for debugging. # Make one log on every process with the configuration for debugging.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment