[python] added f-string to python-package/lightgbm/basic.py (#4143)

* added f-string * fix missing parentheses and other string formatting * remove extra trailing parenthesis * one more missing parenthesis * fix pandas categoricals * update uses of + * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: James Lamb <jaylamb20@gmail.com> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

[python] added f-string to python-package/lightgbm/basic.py (#4143)
* added f-string * fix missing parentheses and other string formatting * remove extra trailing parenthesis * one more missing parenthesis * fix pandas categoricals * update uses of + * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: James Lamb <jaylamb20@gmail.com> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
bffa6ca5 · NovusEdge · GitHub · 3a657c8b · bffa6ca5
Unverified Commit bffa6ca5 authored May 15, 2021 by NovusEdge Committed by GitHub May 14, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 65 deletions

python-package/lightgbm/basic.py python-package/lightgbm/basic.py +49 -65

No files found.
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -75,7 +75,7 @@ def _log_native(msg):

 def _log_callback(msg):
    """Redirect logs from native library into Python."""
-    _log_native("{0:s}".format(msg.decode('utf-8')))
+    _log_native(str(msg.decode('utf-8')))


 def _load_lib():
@@ -161,8 +161,8 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
            raise ValueError('Series.dtypes must be int, float or bool')
        return np.array(data, dtype=dtype, copy=False)  # SparseArray should be supported as well
    else:
-        raise TypeError("Wrong type({0}) for {1}.\n"
-                        "It should be list, numpy 1-D array or pandas Series".format(type(data).__name__, name))
+        raise TypeError(f"Wrong type({type(data).__name__}) for {name}.\n"
+                        "It should be list, numpy 1-D array or pandas Series")


 def cfloat32_array_to_numpy(cptr, length):
@@ -226,15 +226,14 @@ def param_dict_to_str(data):
        if isinstance(val, (list, tuple, set)) or is_numpy_1d_array(val):
            def to_string(x):
                if isinstance(x, list):
-                    return "[{}]".format(','.join(map(str, x)))
+                    return f"[{','.join(map(str, x))}]"
                else:
                    return str(x)
-            pairs.append(str(key) + '=' + ','.join(map(to_string, val)))
+            pairs.append(f"{key}={','.join(map(to_string, val))}")
        elif isinstance(val, (str, NUMERIC_TYPES)) or is_numeric(val):
-            pairs.append(str(key) + '=' + str(val))
+            pairs.append(f"{key}={val}")
        elif val is not None:
-            raise TypeError('Unknown type of parameter:%s, got:%s'
-                            % (key, type(val).__name__))
+            raise TypeError(f'Unknown type of parameter:{key}, got:{type(val).__name__}')
    return ' '.join(pairs)


@@ -465,10 +464,9 @@ def c_float_array(data):
            ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
            type_data = C_API_DTYPE_FLOAT64
        else:
-            raise TypeError("Expected np.float32 or np.float64, met type({})"
-                            .format(data.dtype))
+            raise TypeError(f"Expected np.float32 or np.float64, met type({data.dtype})")
    else:
-        raise TypeError("Unknown type({})".format(type(data).__name__))
+        raise TypeError(f"Unknown type({type(data).__name__})")
    return (ptr_data, type_data, data)  # return `data` to avoid the temporary copy is freed


@@ -486,10 +484,9 @@ def c_int_array(data):
            ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
            type_data = C_API_DTYPE_INT64
        else:
-            raise TypeError("Expected np.int32 or np.int64, met type({})"
-                            .format(data.dtype))
+            raise TypeError(f"Expected np.int32 or np.int64, met type({data.dtype})")
    else:
-        raise TypeError("Unknown type({})".format(type(data).__name__))
+        raise TypeError(f"Unknown type({type(data).__name__})")
    return (ptr_data, type_data, data)  # return `data` to avoid the temporary copy is freed


@@ -534,9 +531,10 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
            feature_name = list(data.columns)
        bad_indices = _get_bad_pandas_dtypes(data.dtypes)
        if bad_indices:
+            bad_index_cols_str = ', '.join(data.columns[bad_indices])
            raise ValueError("DataFrame.dtypes for data must be int, float or bool.\n"
                             "Did not expect the data types in the following fields: "
-                             + ', '.join(data.columns[bad_indices]))
+                             f"{bad_index_cols_str}")
        data = data.values
        if data.dtype != np.float32 and data.dtype != np.float64:
            data = data.astype(np.float32)
@@ -559,9 +557,8 @@ def _label_from_pandas(label):


 def _dump_pandas_categorical(pandas_categorical, file_name=None):
-    pandas_str = ('\npandas_categorical:'
-                  + json.dumps(pandas_categorical, default=json_default_with_numpy)
-                  + '\n')
+    categorical_json = json.dumps(pandas_categorical, default=json_default_with_numpy)
+    pandas_str = f'\npandas_categorical:{categorical_json}\n'
    if file_name is not None:
        with open(file_name, 'a') as f:
            f.write(pandas_str)
@@ -739,7 +736,7 @@ class _InnerPredictor:
                _log_warning('Converting data to scipy sparse matrix.')
                csr = scipy.sparse.csr_matrix(data)
            except BaseException:
-                raise TypeError('Cannot predict data for type {}'.format(type(data).__name__))
+                raise TypeError(f'Cannot predict data for type {type(data).__name__}')
            preds, nrow = self.__pred_for_csr(csr, start_iteration, num_iteration, predict_type)
        if pred_leaf:
            preds = preds.astype(np.int32)
@@ -748,17 +745,16 @@ class _InnerPredictor:
            if preds.size % nrow == 0:
                preds = preds.reshape(nrow, -1)
            else:
-                raise ValueError('Length of predict result (%d) cannot be divide nrow (%d)'
-                                 % (preds.size, nrow))
+                raise ValueError(f'Length of predict result ({preds.size}) cannot be divide nrow ({nrow})')
        return preds

    def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type):
        """Get size of prediction result."""
        if nrow > MAX_INT32:
            raise LightGBMError('LightGBM cannot perform prediction for data'
-                                'with number of rows greater than MAX_INT32 (%d).\n'
+                                f'with number of rows greater than MAX_INT32 ({MAX_INT32}).\n'
                                'You can split your data into chunks'
-                                'and then concatenate predictions for them' % MAX_INT32)
+                                'and then concatenate predictions for them')
        n_preds = ctypes.c_int64(0)
        _safe_call(_LIB.LGBM_BoosterCalcNumPredict(
            self.handle,
@@ -1219,9 +1215,8 @@ class Dataset:
                      .co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount])
        for key, _ in params.items():
            if key in args_names:
-                _log_warning('{0} keyword has been found in `params` and will be ignored.\n'
-                             'Please use {0} argument of the Dataset constructor to pass this parameter.'
-                             .format(key))
+                _log_warning(f'{key} keyword has been found in `params` and will be ignored.\n'
+                             f'Please use {key} argument of the Dataset constructor to pass this parameter.')
        # user can set verbose with params, it has higher priority
        if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
            params["verbose"] = -1
@@ -1237,12 +1232,11 @@ class Dataset:
                elif isinstance(name, int):
                    categorical_indices.add(name)
                else:
-                    raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
-                                    .format(type(name).__name__, name))
+                    raise TypeError(f"Wrong type({type(name).__name__}) or unknown name({name}) in categorical_feature")
            if categorical_indices:
                for cat_alias in _ConfigAliases.get("categorical_feature"):
                    if cat_alias in params:
-                        _log_warning('{} in param dict is overridden.'.format(cat_alias))
+                        _log_warning(f'{cat_alias} in param dict is overridden.')
                        params.pop(cat_alias, None)
                params['categorical_column'] = sorted(categorical_indices)

@@ -1277,7 +1271,7 @@ class Dataset:
                csr = scipy.sparse.csr_matrix(data)
                self.__init_from_csr(csr, params_str, ref_dataset)
            except BaseException:
-                raise TypeError('Cannot initialize Dataset from {}'.format(type(data).__name__))
+                raise TypeError(f'Cannot initialize Dataset from {type(data).__name__}')
        if label is not None:
            self.set_label(label)
        if self.get_label() is None:
@@ -1293,7 +1287,7 @@ class Dataset:
        elif init_score is not None:
            self.set_init_score(init_score)
        elif predictor is not None:
-            raise TypeError('Wrong predictor type {}'.format(type(predictor).__name__))
+            raise TypeError(f'Wrong predictor type {type(predictor).__name__}')
        # set feature names
        return self.set_feature_name(feature_name)

@@ -1369,7 +1363,7 @@ class Dataset:
    def __init_from_csr(self, csr, params_str, ref_dataset):
        """Initialize data from a CSR matrix."""
        if len(csr.indices) != len(csr.data):
-            raise ValueError('Length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
+            raise ValueError(f'Length mismatch: {len(csr.indices)} vs {len(csr.data)}')
        self.handle = ctypes.c_void_p()

        ptr_indptr, type_ptr_indptr, __ = c_int_array(csr.indptr)
@@ -1395,7 +1389,7 @@ class Dataset:
    def __init_from_csc(self, csc, params_str, ref_dataset):
        """Initialize data from a CSC matrix."""
        if len(csc.indices) != len(csc.data):
-            raise ValueError('Length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
+            raise ValueError(f'Length mismatch: {len(csc.indices)} vs {len(csc.data)}')
        self.handle = ctypes.c_void_p()

        ptr_indptr, type_ptr_indptr, __ = c_int_array(csc.indptr)
@@ -1609,7 +1603,7 @@ class Dataset:
            Dataset with set property.
        """
        if self.handle is None:
-            raise Exception("Cannot set %s before construct dataset" % field_name)
+            raise Exception(f"Cannot set {field_name} before construct dataset")
        if data is None:
            # set to None
            _safe_call(_LIB.LGBM_DatasetSetField(
@@ -1630,7 +1624,7 @@ class Dataset:
        elif data.dtype == np.int32:
            ptr_data, type_data, _ = c_int_array(data)
        else:
-            raise TypeError("Expected np.float32/64 or np.int32, met type({})".format(data.dtype))
+            raise TypeError(f"Expected np.float32/64 or np.int32, met type({data.dtype})")
        if type_data != FIELD_TYPE_MAPPER[field_name]:
            raise TypeError("Input type error for set_field")
        _safe_call(_LIB.LGBM_DatasetSetField(
@@ -1656,7 +1650,7 @@ class Dataset:
            A numpy array with information from the Dataset.
        """
        if self.handle is None:
-            raise Exception("Cannot get %s before construct Dataset" % field_name)
+            raise Exception(f"Cannot get {field_name} before construct Dataset")
        tmp_out_len = ctypes.c_int()
        out_type = ctypes.c_int()
        ret = ctypes.POINTER(ctypes.c_void_p)()
@@ -1703,7 +1697,7 @@ class Dataset:
                return self
            else:
                _log_warning('categorical_feature in Dataset is overridden.\n'
-                             'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
+                             f'New categorical_feature is {sorted(list(categorical_feature))}')
                self.categorical_feature = categorical_feature
                return self._free_handle()
        else:
@@ -1774,8 +1768,7 @@ class Dataset:
            self.feature_name = feature_name
        if self.handle is not None and feature_name is not None and feature_name != 'auto':
            if len(feature_name) != self.num_feature():
-                raise ValueError("Length of feature_name({}) and num_feature({}) don't match"
-                                 .format(len(feature_name), self.num_feature()))
+                raise ValueError(f"Length of feature_name({len(feature_name)}) and num_feature({self.num_feature()}) don't match")
            c_feature_name = [c_str(name) for name in feature_name]
            _safe_call(_LIB.LGBM_DatasetSetFeatureNames(
                self.handle,
@@ -1895,8 +1888,8 @@ class Dataset:
            raise ValueError("Length of feature names doesn't equal with num_feature")
        if reserved_string_buffer_size < required_string_buffer_size.value:
            raise BufferError(
-                "Allocated feature name buffer size ({}) was inferior to the needed size ({})."
-                .format(reserved_string_buffer_size, required_string_buffer_size.value)
+                f"Allocated feature name buffer size ({reserved_string_buffer_size}) was"
+                f"inferior to the needed size ({required_string_buffer_size.value})."
            )
        return [string_buffers[i].value.decode('utf-8') for i in range(num_feature)]

@@ -1956,8 +1949,8 @@ class Dataset:
                elif isinstance(self.data, dt_DataTable):
                    self.data = self.data[self.used_indices, :]
                else:
-                    _log_warning("Cannot subset {} type of raw data.\n"
-                                 "Returning original raw data".format(type(self.data).__name__))
+                    _log_warning(f"Cannot subset {type(self.data).__name__} type of raw data.\n"
+                                 "Returning original raw data")
            self.need_slice = False
        if self.data is None:
            raise LightGBMError("Cannot call `get_data` after freed raw data, "
@@ -2121,9 +2114,8 @@ class Dataset:
            else:
                self.data = None
        if self.data is None:
-            err_msg = ("Cannot add features from {} type of raw data to "
-                       "{} type of raw data.\n").format(type(other.data).__name__,
-                                                        old_self_data_type)
+            err_msg = (f"Cannot add features from {type(other.data).__name__} type of raw data to "
+                       f"{old_self_data_type} type of raw data.\n")
            err_msg += ("Set free_raw_data=False when construct Dataset to avoid this"
                        if was_none else "Freeing raw data")
            _log_warning(err_msg)
@@ -2189,8 +2181,7 @@ class Booster:
        if train_set is not None:
            # Training task
            if not isinstance(train_set, Dataset):
-                raise TypeError('Training data should be Dataset instance, met {}'
-                                .format(type(train_set).__name__))
+                raise TypeError(f'Training data should be Dataset instance, met {type(train_set).__name__}')
            params = _choose_param_value(
                main_param_name="machines",
                params=params,
@@ -2423,12 +2414,12 @@ class Booster:
                               feature_names=None, parent_node=None):

            def _get_node_index(tree, tree_index):
-                tree_num = str(tree_index) + '-' if tree_index is not None else ''
+                tree_num = f'{tree_index}-' if tree_index is not None else ''
                is_split = _is_split_node(tree)
                node_type = 'S' if is_split else 'L'
                # if a single node tree it won't have `leaf_index` so return 0
-                node_num = str(tree.get('split_index' if is_split else 'leaf_index', 0))
-                return tree_num + node_type + node_num
+                node_num = tree.get('split_index' if is_split else 'leaf_index', 0)
+                return f"{tree_num}{node_type}{node_num}"

            def _get_split_feature(tree, feature_names):
                if _is_split_node(tree):
@@ -2549,8 +2540,7 @@ class Booster:
            Booster with set validation data.
        """
        if not isinstance(data, Dataset):
-            raise TypeError('Validation data should be Dataset instance, met {}'
-                            .format(type(data).__name__))
+            raise TypeError(f'Validation data should be Dataset instance, met {type(data).__name__}')
        if data._predictor is not self.__init_predictor:
            raise LightGBMError("Add validation data failed, "
                                "you should use same predictor for these data")
@@ -2628,8 +2618,7 @@ class Booster:
            is_the_same_train_set = train_set is self.train_set and self.train_set_version == train_set.version
        if train_set is not None and not is_the_same_train_set:
            if not isinstance(train_set, Dataset):
-                raise TypeError('Training data should be Dataset instance, met {}'
-                                .format(type(train_set).__name__))
+                raise TypeError(f'Training data should be Dataset instance, met {type(train_set).__name__}')
            if train_set._predictor is not self.__init_predictor:
                raise LightGBMError("Replace training data failed, "
                                    "you should use same predictor for these data")
@@ -2684,8 +2673,7 @@ class Booster:
        assert grad.flags.c_contiguous
        assert hess.flags.c_contiguous
        if len(grad) != len(hess):
-            raise ValueError("Lengths of gradient({}) and hessian({}) don't match"
-                             .format(len(grad), len(hess)))
+            raise ValueError(f"Lengths of gradient({len(grad)}) and hessian({len(hess)}) don't match")
        is_finished = ctypes.c_int(0)
        _safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
            self.handle,
@@ -2983,7 +2971,7 @@ class Booster:
            self.handle,
            ctypes.byref(out_num_class)))
        if verbose:
-            _log_info('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
+            _log_info(f'Finished loading model, total used {int(out_num_iterations.value)} iterations')
        self.__num_class = out_num_class.value
        self.pandas_categorical = _load_pandas_categorical(model_str=model_str)
        return self
@@ -3272,9 +3260,7 @@ class Booster:
            raise ValueError("Length of feature names doesn't equal with num_feature")
        if reserved_string_buffer_size < required_string_buffer_size.value:
            raise BufferError(
-                "Allocated feature name buffer size ({}) was inferior to the needed size ({})."
-                .format(reserved_string_buffer_size, required_string_buffer_size.value)
-            )
+                f"Allocated feature name buffer size ({reserved_string_buffer_size}) was inferior to the needed size ({required_string_buffer_size.value}).")
        return [string_buffers[i].value.decode('utf-8') for i in range(num_feature)]

    def feature_importance(self, importance_type='split', iteration=None):
@@ -3437,7 +3423,7 @@ class Booster:
                ctypes.byref(tmp_out_len),
                data_ptr))
            if tmp_out_len.value != len(self.__inner_predict_buffer[data_idx]):
-                raise ValueError("Wrong length of predict results for data %d" % (data_idx))
+                raise ValueError(f"Wrong length of predict results for data {data_idx}")
            self.__is_predicted_cur_iter[data_idx] = True
        return self.__inner_predict_buffer[data_idx]

@@ -3471,9 +3457,7 @@ class Booster:
                    raise ValueError("Length of eval names doesn't equal with num_evals")
                if reserved_string_buffer_size < required_string_buffer_size.value:
                    raise BufferError(
-                        "Allocated eval name buffer size ({}) was inferior to the needed size ({})."
-                        .format(reserved_string_buffer_size, required_string_buffer_size.value)
-                    )
+                        f"Allocated eval name buffer size ({reserved_string_buffer_size}) was inferior to the needed size ({required_string_vuffer_size.value}).")
                self.__name_inner_eval = \
                    [string_buffers[i].value.decode('utf-8') for i in range(self.__num_inner_eval)]
                self.__higher_better_inner_eval = \