Unverified Commit b923871b authored by Doug Blank's avatar Doug Blank Committed by GitHub
Browse files

Adds comet_ml to the list of auto-experiment loggers (#6176)



* Support for Comet.ml

* Need to import comet first

* Log this model, not the one in the backprop step

* Log args as hyperparameters; use framework to allow fine control

* Log hyperparameters with context

* Apply black formatting

* isort fix integrations

* isort fix __init__

* Update src/transformers/trainer.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/trainer.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/trainer_tf.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Address review comments

* Style + Quality, remove Tensorboard import test
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: default avatarLysandre <lysandre.debut@reseau.eseo.fr>
parent d5bc32ce
......@@ -81,7 +81,13 @@ Feedback and more use cases and benchmarks involving TPUs are welcome, please sh
## Logging & Experiment tracking
You can easily log and monitor your runs code. [TensorBoard](https://www.tensorflow.org/tensorboard) and [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface) are currently supported.
You can easily log and monitor your runs code. The following are currently supported:
* [TensorBoard](https://www.tensorflow.org/tensorboard)
* [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface)
* [Comet ML](https://www.comet.ml/docs/python-sdk/huggingface/)
### Weights & Biases
To use Weights & Biases, install the wandb package with:
......@@ -104,6 +110,18 @@ wandb.login()
Whenever you use `Trainer` or `TFTrainer` classes, your losses, evaluation metrics, model topology and gradients (for `Trainer` only) will automatically be logged.
For advanced configuration and examples, refer to the [W&B documentation](https://docs.wandb.com/library/integrations/huggingface).
When using 🤗 Transformers with PyTorch Lightning, runs can be tracked through `WandbLogger`. Refer to related [documentation & examples](https://docs.wandb.com/library/frameworks/pytorch/lightning).
### Comet.ml
To use `comet_ml`, install the Python package with:
```bash
pip install comet_ml
```
or if in a Conda environment:
```bash
conda install -c comet_ml -c anaconda -c conda-forge comet_ml
```
......@@ -88,6 +88,9 @@ from .file_utils import (
)
from .hf_argparser import HfArgumentParser
# Integrations
from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
# Model Cards
from .modelcard import ModelCard
......
# Integrations with other Python libraries
import os
try:
import comet_ml # noqa: F401
_has_comet = True
except (ImportError):
_has_comet = False
try:
import wandb
wandb.ensure_configured()
if wandb.api.api_key is None:
_has_wandb = False
wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.")
else:
_has_wandb = False if os.getenv("WANDB_DISABLED") else True
except (ImportError, AttributeError):
_has_wandb = False
try:
from torch.utils.tensorboard import SummaryWriter # noqa: F401
_has_tensorboard = True
except ImportError:
try:
from tensorboardX import SummaryWriter # noqa: F401
_has_tensorboard = True
except ImportError:
_has_tensorboard = False
def is_wandb_available():
return _has_wandb
def is_comet_available():
return _has_comet
def is_tensorboard_available():
return _has_tensorboard
......@@ -20,16 +20,10 @@ from tqdm.auto import tqdm, trange
from .data.data_collator import DataCollator, default_data_collator
from .file_utils import is_torch_tpu_available
from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
from .modeling_utils import PreTrainedModel
from .optimization import AdamW, get_linear_schedule_with_warmup
from .trainer_utils import (
PREFIX_CHECKPOINT_DIR,
EvalPrediction,
PredictionOutput,
TrainOutput,
is_wandb_available,
set_seed,
)
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, TrainOutput, set_seed
from .training_args import TrainingArguments
......@@ -53,26 +47,17 @@ if is_torch_tpu_available():
import torch_xla.debug.metrics as met
import torch_xla.distributed.parallel_loader as pl
try:
from torch.utils.tensorboard import SummaryWriter
_has_tensorboard = True
except ImportError:
if is_tensorboard_available():
try:
from tensorboardX import SummaryWriter
_has_tensorboard = True
from torch.utils.tensorboard import SummaryWriter
except ImportError:
_has_tensorboard = False
def is_tensorboard_available():
return _has_tensorboard
from tensorboardX import SummaryWriter
if is_wandb_available():
import wandb
if is_comet_available():
import comet_ml
logger = logging.getLogger(__name__)
......@@ -210,6 +195,13 @@ class Trainer:
"You are instantiating a Trainer but W&B is not installed. To use wandb logging, "
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
)
if is_comet_available():
self.setup_comet()
elif os.environ.get("COMET_MODE") != "DISABLED":
logger.info(
"To use comet_ml logging, run `pip/conda install comet_ml` "
"see https://www.comet.ml/docs/python-sdk/huggingface/"
)
set_seed(self.args.seed)
# Create output directory if needed
if self.is_world_process_zero():
......@@ -393,6 +385,37 @@ class Trainer:
self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps)
)
def setup_comet(self):
"""
Setup the optional Comet.ml integration.
Environment:
COMET_MODE:
(Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
COMET_PROJECT_NAME:
(Optional): str - Comet.ml project name for experiments
COMET_OFFLINE_DIRECTORY:
(Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
For a number of configurable items in the environment,
see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
"""
if self.is_world_master():
comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
experiment = None
if comet_mode == "ONLINE":
experiment = comet_ml.Experiment(**args)
logger.info("Automatic Comet.ml online logging enabled")
elif comet_mode == "OFFLINE":
args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
experiment = comet_ml.OfflineExperiment(**args)
logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
if experiment is not None:
experiment._set_model_graph(self.model, framework="transformers")
experiment._log_parameters(self.args, prefix="args/", framework="transformers")
experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
def num_examples(self, dataloader: DataLoader) -> int:
"""
Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
......@@ -655,6 +678,11 @@ class Trainer:
if is_wandb_available():
if self.is_world_process_zero():
wandb.log(logs, step=self.global_step)
if is_comet_available():
if self.is_world_process_zero():
experiment = comet_ml.config.get_global_experiment()
if experiment is not None:
experiment._log_metrics(logs, step=self.global_step, epoch=self.epoch, framework="transformers")
output = {**logs, **{"step": self.global_step}}
if iterator is not None:
iterator.write(output)
......
......@@ -11,15 +11,18 @@ import numpy as np
import tensorflow as tf
from packaging.version import parse
from .integrations import is_comet_available, is_wandb_available
from .modeling_tf_utils import TFPreTrainedModel
from .optimization_tf import GradientAccumulator, create_optimizer
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, is_wandb_available, set_seed
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
from .training_args_tf import TFTrainingArguments
if is_wandb_available():
import wandb
if is_comet_available():
import comet_ml
logger = logging.getLogger(__name__)
......@@ -96,6 +99,14 @@ class TFTrainer:
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
)
if is_comet_available():
self.setup_comet()
elif os.environ.get("COMET_MODE") != "DISABLED":
logger.info(
"To use comet_ml logging, run `pip/conda install comet_ml` "
"see https://www.comet.ml/docs/python-sdk/huggingface/"
)
set_seed(self.args.seed)
def get_train_tfdataset(self) -> tf.data.Dataset:
......@@ -218,6 +229,36 @@ class TFTrainer:
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=combined_dict, name=self.args.run_name)
def setup_comet(self):
"""
Setup the optional Comet.ml integration.
Environment:
COMET_MODE:
(Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
COMET_PROJECT_NAME:
(Optional): str - Comet.ml project name for experiments
COMET_OFFLINE_DIRECTORY:
(Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
For a number of configurable items in the environment,
see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
"""
comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
experiment = None
if comet_mode == "ONLINE":
experiment = comet_ml.Experiment(**args)
logger.info("Automatic Comet.ml online logging enabled")
elif comet_mode == "OFFLINE":
args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
experiment = comet_ml.OfflineExperiment(**args)
logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
if experiment is not None:
experiment._set_model_graph(self.model, framework="transformers")
experiment._log_parameters(self.args, prefix="args/", framework="transformers")
experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
def prediction_loop(
self,
dataset: tf.data.Dataset,
......@@ -336,6 +377,13 @@ class TFTrainer:
if is_wandb_available():
wandb.log(logs, step=self.global_step)
if is_comet_available():
experiment = comet_ml.config.get_global_experiment()
if experiment is not None:
experiment._log_metrics(
logs, step=self.global_step, epoch=self.epoch_logging, framework="transformers"
)
output = {**logs, **{"step": self.global_step}}
logger.info(output)
......
import os
import random
from typing import Dict, NamedTuple, Optional
......@@ -7,23 +6,6 @@ import numpy as np
from .file_utils import is_tf_available, is_torch_available
try:
import wandb
wandb.ensure_configured()
if wandb.api.api_key is None:
_has_wandb = False
wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.")
else:
_has_wandb = False if os.getenv("WANDB_DISABLED") else True
except (ImportError, AttributeError):
_has_wandb = False
def is_wandb_available():
return _has_wandb
def set_seed(seed: int):
"""
Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf``
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment