Adds comet_ml to the list of auto-experiment loggers (#6176)

* Support for Comet.ml * Need to import comet first * Log this model, not the one in the backprop step * Log args as hyperparameters; use framework to allow fine control * Log hyperparameters with context * Apply black formatting * isort fix integrations * isort fix __init__ * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer_tf.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Address review comments * Style + Quality, remove Tensorboard import test Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>

Adds comet_ml to the list of auto-experiment loggers (#6176)
* Support for Comet.ml * Need to import comet first * Log this model, not the one in the backprop step * Log args as hyperparameters; use framework to allow fine control * Log hyperparameters with context * Apply black formatting * isort fix integrations * isort fix __init__ * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer_tf.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Address review comments * Style + Quality, remove Tensorboard import test Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
b923871b · Doug Blank · GitHub · d5bc32ce · b923871b · b923871b
Unverified Commit b923871b authored Aug 06, 2020 by Doug Blank Committed by GitHub Aug 06, 2020
6 changed files
--- a/examples/README.md
+++ b/examples/README.md
@@ -81,7 +81,13 @@ Feedback and more use cases and benchmarks involving TPUs are welcome, please sh

 ## Logging & Experiment tracking

-You can easily log and monitor your runs code. [TensorBoard](https://www.tensorflow.org/tensorboard) and [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface) are currently supported.
+You can easily log and monitor your runs code. The following are currently supported:
+
+* [TensorBoard](https://www.tensorflow.org/tensorboard)
+* [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface)
+* [Comet ML](https://www.comet.ml/docs/python-sdk/huggingface/)
+
+### Weights & Biases

 To use Weights & Biases, install the wandb package with:

@@ -104,6 +110,18 @@ wandb.login()

 Whenever you use `Trainer` or `TFTrainer` classes, your losses, evaluation metrics, model topology and gradients (for `Trainer` only) will automatically be logged.

-For advanced configuration and examples, refer to the [W&B documentation](https://docs.wandb.com/library/integrations/huggingface).
-
 When using 🤗 Transformers with PyTorch Lightning, runs can be tracked through `WandbLogger`. Refer to related [documentation & examples](https://docs.wandb.com/library/frameworks/pytorch/lightning).
+
+### Comet.ml
+
+To use `comet_ml`, install the Python package with:
+
+```bash
+pip install comet_ml
+```
+
+or if in a Conda environment:
+
+```bash
+conda install -c comet_ml -c anaconda -c conda-forge comet_ml
+```
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -88,6 +88,9 @@ from .file_utils import (
 )
 from .hf_argparser import HfArgumentParser

+# Integrations
+from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
+
 # Model Cards
 from .modelcard import ModelCard


--- a/src/transformers/integrations.py
+++ b/src/transformers/integrations.py
+# Integrations with other Python libraries
+
+import os
+
+
+try:
+    import comet_ml  # noqa: F401
+
+    _has_comet = True
+except (ImportError):
+    _has_comet = False
+
+
+try:
+    import wandb
+
+    wandb.ensure_configured()
+    if wandb.api.api_key is None:
+        _has_wandb = False
+        wandb.termwarn("W&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable.")
+    else:
+        _has_wandb = False if os.getenv("WANDB_DISABLED") else True
+except (ImportError, AttributeError):
+    _has_wandb = False
+
+try:
+    from torch.utils.tensorboard import SummaryWriter  # noqa: F401
+
+    _has_tensorboard = True
+except ImportError:
+    try:
+        from tensorboardX import SummaryWriter  # noqa: F401
+
+        _has_tensorboard = True
+    except ImportError:
+        _has_tensorboard = False
+
+
+def is_wandb_available():
+    return _has_wandb
+
+
+def is_comet_available():
+    return _has_comet
+
+
+def is_tensorboard_available():
+    return _has_tensorboard
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -20,16 +20,10 @@ from tqdm.auto import tqdm, trange

 from .data.data_collator import DataCollator, default_data_collator
 from .file_utils import is_torch_tpu_available
+from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
 from .modeling_utils import PreTrainedModel
 from .optimization import AdamW, get_linear_schedule_with_warmup
-from .trainer_utils import (
-    PREFIX_CHECKPOINT_DIR,
-    EvalPrediction,
-    PredictionOutput,
-    TrainOutput,
-    is_wandb_available,
-    set_seed,
-)
+from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, TrainOutput, set_seed
 from .training_args import TrainingArguments


@@ -53,26 +47,17 @@ if is_torch_tpu_available():
    import torch_xla.debug.metrics as met
    import torch_xla.distributed.parallel_loader as pl

-try:
-    from torch.utils.tensorboard import SummaryWriter
-
-    _has_tensorboard = True
-except ImportError:
+if is_tensorboard_available():
    try:
-        from tensorboardX import SummaryWriter
-
-        _has_tensorboard = True
+        from torch.utils.tensorboard import SummaryWriter
    except ImportError:
-        _has_tensorboard = False
-
-
-def is_tensorboard_available():
-    return _has_tensorboard
-
+        from tensorboardX import SummaryWriter

 if is_wandb_available():
    import wandb

+if is_comet_available():
+    import comet_ml

 logger = logging.getLogger(__name__)

@@ -210,6 +195,13 @@ class Trainer:
                "You are instantiating a Trainer but W&B is not installed. To use wandb logging, "
                "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
            )
+        if is_comet_available():
+            self.setup_comet()
+        elif os.environ.get("COMET_MODE") != "DISABLED":
+            logger.info(
+                "To use comet_ml logging, run `pip/conda install comet_ml` "
+                "see https://www.comet.ml/docs/python-sdk/huggingface/"
+            )
        set_seed(self.args.seed)
        # Create output directory if needed
        if self.is_world_process_zero():
@@ -393,6 +385,37 @@ class Trainer:
                    self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps)
                )

+    def setup_comet(self):
+        """
+        Setup the optional Comet.ml integration.
+
+        Environment:
+            COMET_MODE:
+                (Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
+            COMET_PROJECT_NAME:
+                (Optional): str - Comet.ml project name for experiments
+            COMET_OFFLINE_DIRECTORY:
+                (Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
+
+        For a number of configurable items in the environment,
+        see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
+        """
+        if self.is_world_master():
+            comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
+            args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
+            experiment = None
+            if comet_mode == "ONLINE":
+                experiment = comet_ml.Experiment(**args)
+                logger.info("Automatic Comet.ml online logging enabled")
+            elif comet_mode == "OFFLINE":
+                args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
+                experiment = comet_ml.OfflineExperiment(**args)
+                logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
+            if experiment is not None:
+                experiment._set_model_graph(self.model, framework="transformers")
+                experiment._log_parameters(self.args, prefix="args/", framework="transformers")
+                experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
+
    def num_examples(self, dataloader: DataLoader) -> int:
        """
        Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
@@ -655,6 +678,11 @@ class Trainer:
        if is_wandb_available():
            if self.is_world_process_zero():
                wandb.log(logs, step=self.global_step)
+        if is_comet_available():
+            if self.is_world_process_zero():
+                experiment = comet_ml.config.get_global_experiment()
+                if experiment is not None:
+                    experiment._log_metrics(logs, step=self.global_step, epoch=self.epoch, framework="transformers")
        output = {**logs, **{"step": self.global_step}}
        if iterator is not None:
            iterator.write(output)

--- a/src/transformers/trainer_tf.py
+++ b/src/transformers/trainer_tf.py
@@ -11,15 +11,18 @@ import numpy as np
 import tensorflow as tf
 from packaging.version import parse

+from .integrations import is_comet_available, is_wandb_available
 from .modeling_tf_utils import TFPreTrainedModel
 from .optimization_tf import GradientAccumulator, create_optimizer
-from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, is_wandb_available, set_seed
+from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
 from .training_args_tf import TFTrainingArguments


 if is_wandb_available():
    import wandb

+if is_comet_available():
+    import comet_ml

 logger = logging.getLogger(__name__)

@@ -96,6 +99,14 @@ class TFTrainer:
                "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
            )

+        if is_comet_available():
+            self.setup_comet()
+        elif os.environ.get("COMET_MODE") != "DISABLED":
+            logger.info(
+                "To use comet_ml logging, run `pip/conda install comet_ml` "
+                "see https://www.comet.ml/docs/python-sdk/huggingface/"
+            )
+
        set_seed(self.args.seed)

    def get_train_tfdataset(self) -> tf.data.Dataset:
@@ -218,6 +229,36 @@ class TFTrainer:
        combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
        wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=combined_dict, name=self.args.run_name)

+    def setup_comet(self):
+        """
+        Setup the optional Comet.ml integration.
+
+        Environment:
+            COMET_MODE:
+                (Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
+            COMET_PROJECT_NAME:
+                (Optional): str - Comet.ml project name for experiments
+            COMET_OFFLINE_DIRECTORY:
+                (Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
+
+        For a number of configurable items in the environment,
+        see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
+        """
+        comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
+        args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
+        experiment = None
+        if comet_mode == "ONLINE":
+            experiment = comet_ml.Experiment(**args)
+            logger.info("Automatic Comet.ml online logging enabled")
+        elif comet_mode == "OFFLINE":
+            args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
+            experiment = comet_ml.OfflineExperiment(**args)
+            logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
+        if experiment is not None:
+            experiment._set_model_graph(self.model, framework="transformers")
+            experiment._log_parameters(self.args, prefix="args/", framework="transformers")
+            experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
+
    def prediction_loop(
        self,
        dataset: tf.data.Dataset,
@@ -336,6 +377,13 @@ class TFTrainer:
        if is_wandb_available():
            wandb.log(logs, step=self.global_step)

+        if is_comet_available():
+            experiment = comet_ml.config.get_global_experiment()
+            if experiment is not None:
+                experiment._log_metrics(
+                    logs, step=self.global_step, epoch=self.epoch_logging, framework="transformers"
+                )
+
        output = {**logs, **{"step": self.global_step}}

        logger.info(output)

--- a/src/transformers/trainer_utils.py
+++ b/src/transformers/trainer_utils.py
-import os
 import random
 from typing import Dict, NamedTuple, Optional

@@ -7,23 +6,6 @@ import numpy as np
 from .file_utils import is_tf_available, is_torch_available


-try:
-    import wandb
-
-    wandb.ensure_configured()
-    if wandb.api.api_key is None:
-        _has_wandb = False
-        wandb.termwarn("W&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable.")
-    else:
-        _has_wandb = False if os.getenv("WANDB_DISABLED") else True
-except (ImportError, AttributeError):
-    _has_wandb = False
-
-
-def is_wandb_available():
-    return _has_wandb
-
-
 def set_seed(seed: int):
    """
    Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf``