"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "571fa585b6b2e7d377c8cc6e3a15a07c7af1e368"
Unverified Commit 7eadfe16 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

When on sagemaker use their env variables for saves (#9876)

* When on sagemaker use their env variables for saves

* Address review comments

* Quality
parent fdcde144
...@@ -1366,6 +1366,11 @@ class Trainer: ...@@ -1366,6 +1366,11 @@ class Trainer:
elif self.is_world_process_zero(): elif self.is_world_process_zero():
self._save(output_dir) self._save(output_dir)
# If on sagemaker and we are saving the main model (not a checkpoint so output_dir=None), save a copy to
# SM_MODEL_DIR for easy deployment.
if output_dir is None and os.getenv("SM_MODEL_DIR") is not None:
self.save_model(output_dir=os.getenv("SM_MODEL_DIR"))
def _save_tpu(self, output_dir: Optional[str] = None): def _save_tpu(self, output_dir: Optional[str] = None):
output_dir = output_dir if output_dir is not None else self.args.output_dir output_dir = output_dir if output_dir is not None else self.args.output_dir
logger.info("Saving model checkpoint to %s", output_dir) logger.info("Saving model checkpoint to %s", output_dir)
......
...@@ -248,8 +248,9 @@ class TrainingArguments: ...@@ -248,8 +248,9 @@ class TrainingArguments:
Whether you want to pin memory in data loaders or not. Will default to :obj:`True`. Whether you want to pin memory in data loaders or not. Will default to :obj:`True`.
""" """
output_dir: str = field( output_dir: Optional[str] = field(
metadata={"help": "The output directory where the model predictions and checkpoints will be written."} default=None,
metadata={"help": "The output directory where the model predictions and checkpoints will be written."},
) )
overwrite_output_dir: bool = field( overwrite_output_dir: bool = field(
default=False, default=False,
...@@ -444,6 +445,18 @@ class TrainingArguments: ...@@ -444,6 +445,18 @@ class TrainingArguments:
_n_gpu: int = field(init=False, repr=False, default=-1) _n_gpu: int = field(init=False, repr=False, default=-1)
def __post_init__(self): def __post_init__(self):
if self.output_dir is None and os.getenv("SM_OUTPUT_DATA_DIR") is None:
raise ValueError(
"`output_dir` is only optional if it can get inferred from the environment. Please set a value for "
"`output_dir`."
)
elif os.getenv("SM_OUTPUT_DATA_DIR") is not None:
if self.output_dir is not None:
logger.warn(
"`output_dir` is overwritten by the env variable 'SM_OUTPUT_DATA_DIR' "
f"({os.getenv('SM_OUTPUT_DATA_DIR')})."
)
self.output_dir = os.getenv("SM_OUTPUT_DATA_DIR")
if self.disable_tqdm is None: if self.disable_tqdm is None:
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
self.evaluation_strategy = EvaluationStrategy(self.evaluation_strategy) self.evaluation_strategy = EvaluationStrategy(self.evaluation_strategy)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment