BenchmarkBigQueryLogger is never used.

The logger was probably replaced by perfzero(?). PiperOrigin-RevId: 307756692

BenchmarkBigQueryLogger is never used.
The logger was probably replaced by perfzero(?). PiperOrigin-RevId: 307756692
0e4029f0 · Hongkun Yu · A. Unique TensorFlower · 50dd4b4c · 50dd4b4c · 50dd4b4c
Commit 0e4029f0 authored Apr 21, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Apr 21, 2020
6 changed files
--- a/official/benchmark/benchmark_uploader.py
+++ b/official/benchmark/benchmark_uploader.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Library to upload benchmark generated by BenchmarkLogger to remote repo.
-This library require google cloud bigquery lib as dependency, which can be
-installed with:
-  > pip install --upgrade google-cloud-bigquery
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import json
-from google.cloud import bigquery
-from google.cloud import exceptions
-import tensorflow as tf
-class BigQueryUploader(object):
-  """Upload the benchmark and metric info from JSON input to BigQuery. """
-  def __init__(self, gcp_project=None, credentials=None):
-    """Initialized BigQueryUploader with proper setting.
-    Args:
-      gcp_project: string, the name of the GCP project that the log will be
-        uploaded to. The default project name will be detected from local
-        environment if no value is provided.
-      credentials: google.auth.credentials. The credential to access the
-        BigQuery service. The default service account credential will be
-        detected from local environment if no value is provided. Please use
-        google.oauth2.service_account.Credentials to load credential from local
-        file for the case that the test is run out side of GCP.
-    """
-    self._bq_client = bigquery.Client(
-        project=gcp_project, credentials=credentials)
-  def upload_benchmark_run_json(
-      self, dataset_name, table_name, run_id, run_json):
-    """Upload benchmark run information to Bigquery.
-    Args:
-      dataset_name: string, the name of bigquery dataset where the data will be
-        uploaded.
-      table_name: string, the name of bigquery table under the dataset where
-        the data will be uploaded.
-      run_id: string, a unique ID that will be attached to the data, usually
-        this is a UUID4 format.
-      run_json: dict, the JSON data that contains the benchmark run info.
-    """
-    run_json["model_id"] = run_id
-    self._upload_json(dataset_name, table_name, [run_json])
-  def upload_benchmark_metric_json(
-      self, dataset_name, table_name, run_id, metric_json_list):
-    """Upload metric information to Bigquery.
-    Args:
-      dataset_name: string, the name of bigquery dataset where the data will be
-        uploaded.
-      table_name: string, the name of bigquery table under the dataset where
-        the metric data will be uploaded. This is different from the
-        benchmark_run table.
-      run_id: string, a unique ID that will be attached to the data, usually
-        this is a UUID4 format. This should be the same as the benchmark run_id.
-      metric_json_list: list, a list of JSON object that record the metric info.
-    """
-    for m in metric_json_list:
-      m["run_id"] = run_id
-    self._upload_json(dataset_name, table_name, metric_json_list)
-  def upload_benchmark_run_file(
-      self, dataset_name, table_name, run_id, run_json_file):
-    """Upload benchmark run information to Bigquery from input json file.
-    Args:
-      dataset_name: string, the name of bigquery dataset where the data will be
-        uploaded.
-      table_name: string, the name of bigquery table under the dataset where
-        the data will be uploaded.
-      run_id: string, a unique ID that will be attached to the data, usually
-        this is a UUID4 format.
-      run_json_file: string, the file path that contains the run JSON data.
-    """
-    with tf.io.gfile.GFile(run_json_file) as f:
-      benchmark_json = json.load(f)
-      self.upload_benchmark_run_json(
-          dataset_name, table_name, run_id, benchmark_json)
-  def upload_metric_file(
-      self, dataset_name, table_name, run_id, metric_json_file):
-    """Upload metric information to Bigquery from input json file.
-    Args:
-      dataset_name: string, the name of bigquery dataset where the data will be
-        uploaded.
-      table_name: string, the name of bigquery table under the dataset where
-        the metric data will be uploaded. This is different from the
-        benchmark_run table.
-      run_id: string, a unique ID that will be attached to the data, usually
-        this is a UUID4 format. This should be the same as the benchmark run_id.
-      metric_json_file: string, the file path that contains the metric JSON
-        data.
-    """
-    with tf.io.gfile.GFile(metric_json_file) as f:
-      metrics = []
-      for line in f:
-        metrics.append(json.loads(line.strip()))
-      self.upload_benchmark_metric_json(
-          dataset_name, table_name, run_id, metrics)
-  def _upload_json(self, dataset_name, table_name, json_list):
-    # Find the unique table reference based on dataset and table name, so that
-    # the data can be inserted to it.
-    table_ref = self._bq_client.dataset(dataset_name).table(table_name)
-    errors = self._bq_client.insert_rows_json(table_ref, json_list)
-    if errors:
-      tf.logging.error(
-          "Failed to upload benchmark info to bigquery: {}".format(errors))
-  def insert_run_status(self, dataset_name, table_name, run_id, run_status):
-    """Insert the run status in to Bigquery run status table."""
-    query = ("INSERT {ds}.{tb} "
-             "(run_id, status) "
-             "VALUES('{rid}', '{status}')").format(
-                 ds=dataset_name, tb=table_name, rid=run_id, status=run_status)
-    try:
-      self._bq_client.query(query=query).result()
-    except exceptions.GoogleCloudError as e:
-      tf.logging.error("Failed to insert run status: %s", e)
-  def update_run_status(self, dataset_name, table_name, run_id, run_status):
-    """Update the run status in in Bigquery run status table."""
-    query = ("UPDATE {ds}.{tb} "
-             "SET status = '{status}' "
-             "WHERE run_id = '{rid}'").format(
-                 ds=dataset_name, tb=table_name, status=run_status, rid=run_id)
-    try:
-      self._bq_client.query(query=query).result()
-    except exceptions.GoogleCloudError as e:
-      tf.logging.error("Failed to update run status: %s", e)
--- a/official/benchmark/benchmark_uploader_main.py
+++ b/official/benchmark/benchmark_uploader_main.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Binary to upload benchmark generated by BenchmarkLogger to remote repo.
-This library require google cloud bigquery lib as dependency, which can be
-installed with:
-  > pip install --upgrade google-cloud-bigquery
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import uuid
-from absl import app as absl_app
-from absl import flags
-from official.benchmark import benchmark_uploader
-from official.utils.flags import core as flags_core
-from official.utils.logs import logger
-def main(_):
-  if not flags.FLAGS.benchmark_log_dir:
-    print("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir")
-    sys.exit(1)
-  uploader = benchmark_uploader.BigQueryUploader(
-      gcp_project=flags.FLAGS.gcp_project)
-  run_id = str(uuid.uuid4())
-  run_json_file = os.path.join(
-      flags.FLAGS.benchmark_log_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME)
-  metric_json_file = os.path.join(
-      flags.FLAGS.benchmark_log_dir, logger.METRIC_LOG_FILE_NAME)
-  uploader.upload_benchmark_run_file(
-      flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_run_table, run_id,
-      run_json_file)
-  uploader.upload_metric_file(
-      flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_metric_table, run_id,
-      metric_json_file)
-  # Assume the run finished successfully before user invoke the upload script.
-  uploader.insert_run_status(
-      flags.FLAGS.bigquery_data_set, flags.FLAGS.bigquery_run_status_table,
-      run_id, logger.RUN_STATUS_SUCCESS)
-if __name__ == "__main__":
-  flags_core.define_benchmark()
-  flags.adopt_module_key_flags(flags_core)
-  absl_app.run(main=main)
--- a/official/benchmark/benchmark_uploader_test.py
+++ b/official/benchmark/benchmark_uploader_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for benchmark_uploader."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import json
-import os
-import tempfile
-import unittest
-from mock import MagicMock
-from mock import patch
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-try:
-  from google.cloud import bigquery
-  from official.benchmark import benchmark_uploader
-except ImportError:
-  bigquery = None
-  benchmark_uploader = None
-@unittest.skipIf(bigquery is None, "Bigquery dependency is not installed.")
-class BigQueryUploaderTest(tf.test.TestCase):
-  @patch.object(bigquery, "Client")
-  def setUp(self, mock_bigquery):
-    self.mock_client = mock_bigquery.return_value
-    self.mock_dataset = MagicMock(name="dataset")
-    self.mock_table = MagicMock(name="table")
-    self.mock_client.dataset.return_value = self.mock_dataset
-    self.mock_dataset.table.return_value = self.mock_table
-    self.mock_client.insert_rows_json.return_value = []
-    self.benchmark_uploader = benchmark_uploader.BigQueryUploader()
-    self.benchmark_uploader._bq_client = self.mock_client
-    self.log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    with open(os.path.join(self.log_dir, "metric.log"), "a") as f:
-      json.dump({"name": "accuracy", "value": 1.0}, f)
-      f.write("\n")
-      json.dump({"name": "loss", "value": 0.5}, f)
-      f.write("\n")
-    with open(os.path.join(self.log_dir, "run.log"), "w") as f:
-      json.dump({"model_name": "value"}, f)
-  def tearDown(self):
-    tf.io.gfile.rmtree(self.get_temp_dir())
-  def test_upload_benchmark_run_json(self):
-    self.benchmark_uploader.upload_benchmark_run_json(
-        "dataset", "table", "run_id", {"model_name": "value"})
-    self.mock_client.insert_rows_json.assert_called_once_with(
-        self.mock_table, [{"model_name": "value", "model_id": "run_id"}])
-  def test_upload_benchmark_metric_json(self):
-    metric_json_list = [
-        {"name": "accuracy", "value": 1.0},
-        {"name": "loss", "value": 0.5}
-    ]
-    expected_params = [
-        {"run_id": "run_id", "name": "accuracy", "value": 1.0},
-        {"run_id": "run_id", "name": "loss", "value": 0.5}
-    ]
-    self.benchmark_uploader.upload_benchmark_metric_json(
-        "dataset", "table", "run_id", metric_json_list)
-    self.mock_client.insert_rows_json.assert_called_once_with(
-        self.mock_table, expected_params)
-  def test_upload_benchmark_run_file(self):
-    self.benchmark_uploader.upload_benchmark_run_file(
-        "dataset", "table", "run_id", os.path.join(self.log_dir, "run.log"))
-    self.mock_client.insert_rows_json.assert_called_once_with(
-        self.mock_table, [{"model_name": "value", "model_id": "run_id"}])
-  def test_upload_metric_file(self):
-    self.benchmark_uploader.upload_metric_file(
-        "dataset", "table", "run_id",
-        os.path.join(self.log_dir, "metric.log"))
-    expected_params = [
-        {"run_id": "run_id", "name": "accuracy", "value": 1.0},
-        {"run_id": "run_id", "name": "loss", "value": 0.5}
-    ]
-    self.mock_client.insert_rows_json.assert_called_once_with(
-        self.mock_table, expected_params)
-  def test_insert_run_status(self):
-    self.benchmark_uploader.insert_run_status(
-        "dataset", "table", "run_id", "status")
-    expected_query = ("INSERT dataset.table "
-                      "(run_id, status) "
-                      "VALUES('run_id', 'status')")
-    self.mock_client.query.assert_called_once_with(query=expected_query)
-  def test_update_run_status(self):
-    self.benchmark_uploader.update_run_status(
-        "dataset", "table", "run_id", "status")
-    expected_query = ("UPDATE dataset.table "
-                      "SET status = 'status' "
-                      "WHERE run_id = 'run_id'")
-    self.mock_client.query.assert_called_once_with(query=expected_query)
-if __name__ == "__main__":
-  tf.test.main()
--- a/official/utils/flags/_benchmark.py
+++ b/official/utils/flags/_benchmark.py
@@ -46,8 +46,7 @@ def define_benchmark(benchmark_log_dir=True, bigquery_uploader=True):
  flags.DEFINE_enum(
      name="benchmark_logger_type", default="BaseBenchmarkLogger",
-      enum_values=["BaseBenchmarkLogger", "BenchmarkFileLogger",
+      enum_values=["BaseBenchmarkLogger", "BenchmarkFileLogger"],
-                   "BenchmarkBigQueryLogger"],
      help=help_wrap("The type of benchmark logger to use. Defaults to using "
                     "BaseBenchmarkLogger which logs to STDOUT. Different "
                     "loggers will require other flags to be able to work."))

--- a/official/utils/logs/logger.py
+++ b/official/utils/logs/logger.py
@@ -68,16 +68,6 @@ def config_benchmark_logger(flag_obj=None):
      _benchmark_logger = BaseBenchmarkLogger()
    elif flag_obj.benchmark_logger_type == "BenchmarkFileLogger":
      _benchmark_logger = BenchmarkFileLogger(flag_obj.benchmark_log_dir)
-    elif flag_obj.benchmark_logger_type == "BenchmarkBigQueryLogger":
-      from official.benchmark import benchmark_uploader as bu  # pylint: disable=g-import-not-at-top
-      bq_uploader = bu.BigQueryUploader(gcp_project=flag_obj.gcp_project)
-      _benchmark_logger = BenchmarkBigQueryLogger(
-          bigquery_uploader=bq_uploader,
-          bigquery_data_set=flag_obj.bigquery_data_set,
-          bigquery_run_table=flag_obj.bigquery_run_table,
-          bigquery_run_status_table=flag_obj.bigquery_run_status_table,
-          bigquery_metric_table=flag_obj.bigquery_metric_table,
-          run_id=str(uuid.uuid4()))
    else:
      raise ValueError("Unrecognized benchmark_logger_type: %s"
                       % flag_obj.benchmark_logger_type)
@@ -219,86 +209,6 @@ class BenchmarkFileLogger(BaseBenchmarkLogger):
    self._metric_file_handler.close()
-class BenchmarkBigQueryLogger(BaseBenchmarkLogger):
-  """Class to log the benchmark information to BigQuery data store."""
-  def __init__(self,
-               bigquery_uploader,
-               bigquery_data_set,
-               bigquery_run_table,
-               bigquery_run_status_table,
-               bigquery_metric_table,
-               run_id):
-    super(BenchmarkBigQueryLogger, self).__init__()
-    self._bigquery_uploader = bigquery_uploader
-    self._bigquery_data_set = bigquery_data_set
-    self._bigquery_run_table = bigquery_run_table
-    self._bigquery_run_status_table = bigquery_run_status_table
-    self._bigquery_metric_table = bigquery_metric_table
-    self._run_id = run_id
-  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
-    """Log the benchmark metric information to bigquery.
-    Args:
-      name: string, the name of the metric to log.
-      value: number, the value of the metric. The value will not be logged if it
-        is not a number type.
-      unit: string, the unit of the metric, E.g "image per second".
-      global_step: int, the global_step when the metric is logged.
-      extras: map of string:string, the extra information about the metric.
-    """
-    metric = _process_metric_to_json(name, value, unit, global_step, extras)
-    if metric:
-      # Starting new thread for bigquery upload in case it might take long time
-      # and impact the benchmark and performance measurement. Starting a new
-      # thread might have potential performance impact for model that run on
-      # CPU.
-      thread.start_new_thread(
-          self._bigquery_uploader.upload_benchmark_metric_json,
-          (self._bigquery_data_set,
-           self._bigquery_metric_table,
-           self._run_id,
-           [metric]))
-  def log_run_info(self, model_name, dataset_name, run_params, test_id=None):
-    """Collect most of the TF runtime information for the local env.
-    The schema of the run info follows official/benchmark/datastore/schema.
-    Args:
-      model_name: string, the name of the model.
-      dataset_name: string, the name of dataset for training and evaluation.
-      run_params: dict, the dictionary of parameters for the run, it could
-        include hyperparameters or other params that are important for the run.
-      test_id: string, the unique name of the test run by the combination of key
-        parameters, eg batch size, num of GPU. It is hardware independent.
-    """
-    run_info = _gather_run_info(model_name, dataset_name, run_params, test_id)
-    # Starting new thread for bigquery upload in case it might take long time
-    # and impact the benchmark and performance measurement. Starting a new
-    # thread might have potential performance impact for model that run on CPU.
-    thread.start_new_thread(
-        self._bigquery_uploader.upload_benchmark_run_json,
-        (self._bigquery_data_set,
-         self._bigquery_run_table,
-         self._run_id,
-         run_info))
-    thread.start_new_thread(
-        self._bigquery_uploader.insert_run_status,
-        (self._bigquery_data_set,
-         self._bigquery_run_status_table,
-         self._run_id,
-         RUN_STATUS_RUNNING))
-  def on_finish(self, status):
-    self._bigquery_uploader.update_run_status(
-        self._bigquery_data_set,
-        self._bigquery_run_status_table,
-        self._run_id,
-        status)
 def _gather_run_info(model_name, dataset_name, run_params, test_id):
  """Collect the benchmark run information for the local environment."""
  run_info = {

--- a/official/utils/logs/logger_test.py
+++ b/official/utils/logs/logger_test.py
@@ -67,14 +67,6 @@ class BenchmarkLoggerTest(tf.test.TestCase):
        self.assertIsInstance(logger.get_benchmark_logger(),
                              logger.BenchmarkFileLogger)
-  @unittest.skipIf(bigquery is None, "Bigquery dependency is not installed.")
-  @mock.patch.object(bigquery, "Client")
-  def test_config_benchmark_bigquery_logger(self, mock_bigquery_client):
-    with flagsaver.flagsaver(benchmark_logger_type="BenchmarkBigQueryLogger"):
-      logger.config_benchmark_logger()
-      self.assertIsInstance(logger.get_benchmark_logger(),
-                            logger.BenchmarkBigQueryLogger)
  @mock.patch("official.utils.logs.logger.config_benchmark_logger")
  def test_benchmark_context(self, mock_config_benchmark_logger):
    mock_logger = mock.MagicMock()
@@ -299,68 +291,5 @@ class BenchmarkFileLoggerTest(tf.test.TestCase):
    self.assertIsNotNone(run_info["machine_config"]["memory_available"])
-@unittest.skipIf(bigquery is None, "Bigquery dependency is not installed.")
-class BenchmarkBigQueryLoggerTest(tf.test.TestCase):
-  def setUp(self):
-    super(BenchmarkBigQueryLoggerTest, self).setUp()
-    # Avoid pulling extra env vars from test environment which affects the test
-    # result, eg. Kokoro test has a TF_PKG env which affect the test case
-    # test_collect_tensorflow_environment_variables()
-    self.original_environ = dict(os.environ)
-    os.environ.clear()
-    self.mock_bq_uploader = mock.MagicMock()
-    self.logger = logger.BenchmarkBigQueryLogger(
-        self.mock_bq_uploader, "dataset", "run_table", "run_status_table",
-        "metric_table", "run_id")
-  def tearDown(self):
-    super(BenchmarkBigQueryLoggerTest, self).tearDown()
-    tf.io.gfile.rmtree(self.get_temp_dir())
-    os.environ.clear()
-    os.environ.update(self.original_environ)
-  def test_log_metric(self):
-    self.logger.log_metric(
-        "accuracy", 0.999, global_step=1e4, extras={"name": "value"})
-    expected_metric_json = [{
-        "name": "accuracy",
-        "value": 0.999,
-        "unit": None,
-        "global_step": 1e4,
-        "timestamp": mock.ANY,
-        "extras": [{"name": "name", "value": "value"}]
-    }]
-    # log_metric will call upload_benchmark_metric_json in a separate thread.
-    # Give it some grace period for the new thread before assert.
-    time.sleep(1)
-    self.mock_bq_uploader.upload_benchmark_metric_json.assert_called_once_with(
-        "dataset", "metric_table", "run_id", expected_metric_json)
-  @mock.patch("official.utils.logs.logger._gather_run_info")
-  def test_log_run_info(self, mock_gather_run_info):
-    run_info = {"model_name": "model_name",
-                "dataset": "dataset_name",
-                "run_info": "run_value"}
-    mock_gather_run_info.return_value = run_info
-    self.logger.log_run_info("model_name", "dataset_name", {})
-    # log_metric will call upload_benchmark_metric_json in a separate thread.
-    # Give it some grace period for the new thread before assert.
-    time.sleep(1)
-    self.mock_bq_uploader.upload_benchmark_run_json.assert_called_once_with(
-        "dataset", "run_table", "run_id", run_info)
-    self.mock_bq_uploader.insert_run_status.assert_called_once_with(
-        "dataset", "run_status_table", "run_id", "running")
-  def test_on_finish(self):
-    self.logger.on_finish(logger.RUN_STATUS_SUCCESS)
-    # log_metric will call upload_benchmark_metric_json in a separate thread.
-    # Give it some grace period for the new thread before assert.
-    time.sleep(1)
-    self.mock_bq_uploader.update_run_status.assert_called_once_with(
-        "dataset", "run_status_table", "run_id", logger.RUN_STATUS_SUCCESS)
 if __name__ == "__main__":
  tf.test.main()