"...git@developer.sourcefind.cn:chenpangpang/open-webui.git" did not exist on "776bb2892c169d6c96d19750f0f61b7f77f3f170"
Commit 9dafea91 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'qianyj_tf' into 'main'

update tf code

See merge request dcutoolkit/deeplearing/dlexamples_new!35
parents 92a2ca36 a4146470
Tue Jan 9 09:34:25 2018
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.81 Driver Version: 384.81 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-SXM2... On | 00000000:06:00.0 Off | 0 |
| N/A 50C P0 196W / 300W | 15643MiB / 16276MiB | 97% Default |
+-------------------------------+----------------------+----------------------+
| 1 Tesla P100-SXM2... On | 00000000:07:00.0 Off | 0 |
| N/A 41C P0 50W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 2 Tesla P100-SXM2... On | 00000000:0A:00.0 Off | 0 |
| N/A 33C P0 48W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 3 Tesla P100-SXM2... On | 00000000:0B:00.0 Off | 0 |
| N/A 34C P0 49W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 4 Tesla P100-SXM2... On | 00000000:85:00.0 Off | 0 |
| N/A 36C P0 50W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 5 Tesla P100-SXM2... On | 00000000:86:00.0 Off | 0 |
| N/A 33C P0 48W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 6 Tesla P100-SXM2... On | 00000000:89:00.0 Off | 0 |
| N/A 38C P0 48W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 7 Tesla P100-SXM2... On | 00000000:8A:00.0 Off | 0 |
| N/A 34C P0 49W / 300W | 15483MiB / 16276MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 44454 C /usr/bin/python 15631MiB |
| 1 44454 C /usr/bin/python 15471MiB |
| 2 44454 C /usr/bin/python 15471MiB |
| 3 44454 C /usr/bin/python 15471MiB |
+-----------------------------------------------------------------------------+
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
nvme0n8 259:7 0 375G 0 disk
nvme0n6 259:5 0 375G 0 disk
sdb 8:16 0 50G 0 disk
└─sdb1 8:17 0 50G 0 part /tmpfs
nvme0n4 259:3 0 375G 0 disk
nvme0n2 259:1 0 375G 0 disk
nvme0n7 259:6 0 375G 0 disk
nvme0n5 259:4 0 375G 0 disk
sda 8:0 0 100G 0 disk
└─sda1 8:1 0 100G 0 part /
nvme0n3 259:2 0 375G 0 disk
nvme0n1 259:0 0 375G 0 disk
"""Utility to manage the tpu version before starting the benchmark."""
import json
from absl import logging
from six.moves.urllib import request
try:
from cloud_tpu_client import client # pylint: disable=g-import-not-at-top
except ImportError:
print(
'Falling back to TensorFlow client; we recommended you install the Cloud '
'TPU client directly with pip install cloud-tpu-client.')
from tensorflow.python.tpu.client import client # pylint: disable=g-import-not-at-top
def _as_text(s):
"""Converts a byte/string into string."""
if isinstance(s, bytes):
return s.decode('utf-8')
return s
def _get_content(url):
"""Opens the url and loads the response into json."""
logging.info('opening url %s', url)
req = request.Request(url)
resp = request.urlopen(req)
resp_text = _as_text(resp.read())
logging.info('response text = %s', resp_text)
return json.loads(resp_text)
def _get_version_info(url, version_label):
"""Constructs a version info from the response."""
json_data = _get_content(url)
logging.info('json_data = %s', json_data)
if 'currentVersion' in json_data:
commit_id = json_data['currentVersion']
elif 'buildLabel' in json_data:
commit_id = json_data['buildLabel']
else:
commit_id = ''
info = {
'url': '',
'hash': commit_id,
'branch': version_label,
'piper_id': json_data.get('piperOriginRevId', '')
}
return info
def _configure_tpu_version(tpu_name, version_label, new_version_id):
"""Returns the current tpu version after resetting to an optional version."""
# The tpu_name is arbitrary / user chosen unique string for this tpu.
logging.info('Trying to connect to tpu %s', tpu_name)
tpu_client = client.Client(tpu=tpu_name)
tpu_client.wait_for_healthy()
if new_version_id:
logging.info('Trying to reset tpu version to %s', new_version_id)
tpu_client.configure_tpu_version(version=new_version_id)
tpu_client.wait_for_healthy()
logging.info('TPU healthy after version reset.')
else:
logging.info('Using the default tpu version id.')
workers = tpu_client.network_endpoints()
if workers:
ip_addr = workers[0]['ipAddress']
url = 'http://{}:8475/requestversion'.format(ip_addr)
return _get_version_info(url, version_label)
else:
logging.error('No tpu endpoint info')
return {
'url': '',
'hash': '',
'branch': version_label,
'piper_id': '',
}
def configure_tpu(tpu_params):
return _configure_tpu_version(
tpu_params.get('name'),
version_label=tpu_params.get('version'),
new_version_id=tpu_params.get('version_id'))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment