add Chinese translation (#661)

df4f05c7 · Chi Song · GitHub · b38c0431 · df4f05c7 · df4f05c7
Unverified Commit df4f05c7 authored Jan 31, 2019 by Chi Song Committed by GitHub Jan 31, 2019
20 changed files
--- a/zh_CN/docs/conf.py
+++ b/zh_CN/docs/conf.py
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+from recommonmark.parser import CommonMarkParser
+from recommonmark.transform import AutoStructify
+# -- Project information ---------------------------------------------------
+project = 'Neural Network Intelligence'
+copyright = '2019, Microsoft'
+author = 'Microsoft'
+# The short X.Y version
+version = ''
+# The full version, including alpha/beta/rc tags
+release = 'v0.5'
+# -- General configuration ---------------------------------------------------
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.mathjax',
+    'sphinx_markdown_tables',
+    'sphinxarg.ext',
+]
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_parsers = {
+    '.md': CommonMarkParser
+}
+source_suffix = ['.rst', '.md']
+# The master toctree document.
+master_doc = 'index'
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = None
+# -- Options for HTML output -------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+html_theme_options = {
+    'logo_only': True,
+}
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# html_static_path = ['_static']
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself.  Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+html_logo = './img/nni_logo_dark.png'
+# -- Options for HTMLHelp output ---------------------------------------------
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'NeuralNetworkIntelligencedoc'
+# -- Options for LaTeX output ------------------------------------------------
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'NeuralNetworkIntelligence.tex', 'Neural Network Intelligence Documentation',
+     'Microsoft', 'manual'),
+]
+# -- Options for manual page output ------------------------------------------
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'neuralnetworkintelligence', 'Neural Network Intelligence Documentation',
+     [author], 1)
+]
+# -- Options for Texinfo output ----------------------------------------------
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'NeuralNetworkIntelligence', 'Neural Network Intelligence Documentation',
+     author, 'NeuralNetworkIntelligence', 'One line description of project.',
+     'Miscellaneous'),
+]
+# -- Options for Epub output -------------------------------------------------
+# Bibliographic Dublin Core info.
+epub_title = project
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+# A unique identification for the text.
+#
+# epub_uid = ''
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+# -- Extension configuration -------------------------------------------------
+github_doc_root = 'https://github.com/Microsoft/nni/tree/master/doc/'
+def setup(app):
+    app.add_config_value('recommonmark_config', {
+        'url_resolver': lambda url: github_doc_root + url if url.startswith('..') else url,
+        'enable_auto_toc_tree': False,
+    }, True)
+    app.add_transform(AutoStructify)
--- a/zh_CN/docs/gbdt_example.md
+++ b/zh_CN/docs/gbdt_example.md
+# GBDT
+梯度提升是机器学习中回归和分类问题的一种方法。它由一组弱分类模型所组成，决策树是其中的典型。 像其它提升方法一样，它也分步来构建模型，并使用可微分的损失函数来优化。
+梯度决策树（gradient boosting decision tree，GBDT）有很多流行的实现，如：[LightGBM](https://github.com/Microsoft/LightGBM), [xgboost](https://github.com/dmlc/xgboost), 和 [catboost](https://github.com/catboost/catboost)，等等。 GBDT 是解决经典机器学习问题的重要工具。 GBDT 也是一种鲁棒的算法，可以使用在很多领域。 GBDT 的超参越好，就能获得越好的性能。
+NNI 是用来调优超参的平台，可以在 NNI 中尝试各种内置的搜索算法，并行运行多个 Trial。
+## 1. GBDT 的搜索空间
+GBDT 有很多超参，但哪些才会影响性能或计算速度呢？ 基于实践经验，建议如下（以 lightgbm 为例）：
+> * 获得更好的精度
+* `learning_rate`. `学习率`的范围应该是 [0.001, 0.9]。
+* `num_leaves`. `num_leaves` 与 `max_depth` 有关，不必两个值同时调整。
+* `bagging_freq`. `bagging_freq` 可以是 [1, 2, 4, 8, 10]。
+* `num_iterations`. 如果达到期望的拟合精度，可以调整得大一些。
+> * 加速
+* `bagging_fraction`. `bagging_fraction` 的范围应该是 [0.7, 1.0]。
+* `feature_fraction`. `feature_fraction` 的范围应该是 [0.6, 1.0]。
+* `max_bin`.
+> * 避免过拟合
+* `min_data_in_leaf`. 取决于数据集。
+* `min_sum_hessian_in_leaf`. 取决于数据集。
+* `lambda_l1` 和 `lambda_l2`.
+* `min_gain_to_split`.
+* `num_leaves`.
+更多信息可参考： [lightgbm](https://lightgbm.readthedocs.io/en/latest/Parameters-Tuning.html) 和 [autoxgoboost](https://github.com/ja-thomas/autoxgboost/blob/master/poster_2018.pdf)
+## 2. 任务描述
+"auto-gbdt" 基于 LightGBM 和 NNI。 数据集有[训练数据](https://github.com/Microsoft/nni/blob/master/examples/trials/auto-gbdt/data/regression.train)和[测试数据](https://github.com/Microsoft/nni/blob/master/examples/trials/auto-gbdt/data/regression.train)。 根据数据中的特征和标签，训练一个 GBDT 回归模型，用来做预测。
+## 3. 如何运行 NNI
+### 3.1 准备 Trial 代码
+基础代码如下：
+```python
+...
+def get_default_parameters():
+    ...
+    return params
+def load_data(train_path='./data/regression.train', test_path='./data/regression.test'):
+    '''
+    读取或创建数据集
+    '''
+    ...
+    return lgb_train, lgb_eval, X_test, y_test
+def run(lgb_train, lgb_eval, params, X_test, y_test):
+    # 训练
+    gbm = lgb.train(params,
+                    lgb_train,
+                    num_boost_round=20,
+                    valid_sets=lgb_eval,
+                    early_stopping_rounds=5)
+    # 预测
+    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
+    # 评估
+    rmse = mean_squared_error(y_test, y_pred) ** 0.5
+    print('The rmse of prediction is:', rmse)
+if __name__ == '__main__':
+    lgb_train, lgb_eval, X_test, y_test = load_data()
+    PARAMS = get_default_parameters()
+    # train
+    run(lgb_train, lgb_eval, PARAMS, X_test, y_test)
+```
+### 3.2 准备搜索空间
+如果要调优 `num_leaves`, `learning_rate`, `bagging_fraction` 和 `bagging_freq`, 可创建一个 [search_space.json](https://github.com/Microsoft/nni/blob/master/examples/trials/auto-gbdt/search_space.json) 文件：
+```json
+{
+    "num_leaves":{"_type":"choice","_value":[31, 28, 24, 20]},
+    "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]},
+    "bagging_fraction":{"_type":"uniform","_value":[0.7, 1.0]},
+    "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]}
+}
+```
+参考[这里](./SearchSpaceSpec.md)，了解更多变量类型。
+### 3.3 在代码中使用 NNI SDK
+```diff
+import nni
+...
+def get_default_parameters():
+    ...
+    return params
+def load_data(train_path='./data/regression.train', test_path='./data/regression.test'):
+    '''
+    读取或创建数据集
+    '''
+    ...
+    return lgb_train, lgb_eval, X_test, y_test
+def run(lgb_train, lgb_eval, params, X_test, y_test):
+    # 训练
+    gbm = lgb.train(params,
+                    lgb_train,
+                    num_boost_round=20,
+                    valid_sets=lgb_eval,
+                    early_stopping_rounds=5)
+    # 预测
+    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
+    # 评估
+    rmse = mean_squared_error(y_test, y_pred) ** 0.5
+    print('The rmse of prediction is:', rmse)
+   nni.report_final_result(rmse)
+if __name__ == '__main__':
+    lgb_train, lgb_eval, X_test, y_test = load_data()
+   RECEIVED_PARAMS = nni.get_next_parameter()
+    PARAMS = get_default_parameters()
+   PARAMS.update(RECEIVED_PARAMS)
+    PARAMS = get_default_parameters()
+    PARAMS.update(RECEIVED_PARAMS)
+    # 训练
+    run(lgb_train, lgb_eval, PARAMS, X_test, y_test)
+```
+### 3.4 编写配置文件并运行
+在配置文件中，可以设置如下内容：
+* Experiment 设置：`trialConcurrency`, `maxExecDuration`, `maxTrialNum`, `trial gpuNum`, 等等。
+* 平台设置：`trainingServicePlatform`，等等。
+* 路径设置：`searchSpacePath`, `trial codeDir`，等等。
+* 算法设置：选择 `Tuner` 算法，`优化方向`，等等。
+config.yml 样例：
+```yaml
+authorName: default
+experimentName: example_auto-gbdt
+trialConcurrency: 1
+maxExecDuration: 10h
+maxTrialNum: 10
+#可选项: local, remote, pai
+trainingServicePlatform: local
+searchSpacePath: search_space.json
+#可选项: true, false
+useAnnotation: false
+tuner:
+  #可选项: TPE, Random, Anneal, Evolution, BatchTuner
+  #SMAC (SMAC 需要先通过 nnictl 来安装)
+  builtinTunerName: TPE
+  classArgs:
+    #可选项: maximize, minimize
+    optimize_mode: minimize
+trial:
+  command: python3 main.py
+  codeDir: .
+  gpuNum: 0
+```
+使用下面的命令启动 Experiment：
+```bash
+nnictl create --config ./config.yml
+```
\ No newline at end of file
--- a/zh_CN/docs/img/3_steps.jpg
+++ b/zh_CN/docs/img/3_steps.jpg
--- a/zh_CN/docs/img/Assessor.png
+++ b/zh_CN/docs/img/Assessor.png
--- a/zh_CN/docs/img/QuickStart1.png
+++ b/zh_CN/docs/img/QuickStart1.png
--- a/zh_CN/docs/img/QuickStart2.png
+++ b/zh_CN/docs/img/QuickStart2.png
--- a/zh_CN/docs/img/QuickStart3.png
+++ b/zh_CN/docs/img/QuickStart3.png
--- a/zh_CN/docs/img/QuickStart4.png
+++ b/zh_CN/docs/img/QuickStart4.png
--- a/zh_CN/docs/img/QuickStart5.png
+++ b/zh_CN/docs/img/QuickStart5.png
--- a/zh_CN/docs/img/QuickStart6.png
+++ b/zh_CN/docs/img/QuickStart6.png
--- a/zh_CN/docs/img/QuickStart7.png
+++ b/zh_CN/docs/img/QuickStart7.png
--- a/zh_CN/docs/img/accuracy.png
+++ b/zh_CN/docs/img/accuracy.png
--- a/zh_CN/docs/img/experiment_process.jpg
+++ b/zh_CN/docs/img/experiment_process.jpg
--- a/zh_CN/docs/img/highlevelarchi.png
+++ b/zh_CN/docs/img/highlevelarchi.png
--- a/zh_CN/docs/img/hyperPara.png
+++ b/zh_CN/docs/img/hyperPara.png
--- a/zh_CN/docs/img/intermediate.png
+++ b/zh_CN/docs/img/intermediate.png
--- a/zh_CN/docs/img/kubeflow_training_design.png
+++ b/zh_CN/docs/img/kubeflow_training_design.png
--- a/zh_CN/docs/img/nni_arch_overview.png
+++ b/zh_CN/docs/img/nni_arch_overview.png
--- a/zh_CN/docs/img/nni_logo.png
+++ b/zh_CN/docs/img/nni_logo.png
--- a/zh_CN/docs/img/nni_logo_dark.png
+++ b/zh_CN/docs/img/nni_logo_dark.png