build-cran-package.sh 7.44 KB
Newer Older
1
2
3
4
5
6
#!/bin/sh

# [description]
#     Prepare a source distribution of the R package
#     to be submitted to CRAN.
#
7
8
9
10
11
12
13
# [arguments] 
#
#     --r-executable Customize the R executable used by `R CMD build`.
#                    Useful if building the R package in an environment with
#                    non-standard builds of R, such as those provided in
#                    https://github.com/wch/r-debug.
#
14
15
16
17
18
#     --no-build-vignettes Pass this flag to skip creating vignettes.
#                          You might want to do this to avoid installing
#                          vignette-only dependencies, or to avoid
#                          portability issues.
#
19
# [usage]
20
21
#
#     # default usage
22
#     sh build-cran-package.sh
23
24
25
#
#     # custom R build
#     sh build-cran-package.sh --r-executable=RDvalgrind
26
27
28
#
#     # skip vignette building
#     sh build-cran-package.sh --no-build-vignettes
29
30
31

set -e

32
33
# Default values of arguments
BUILD_VIGNETTES=true
34
35
36
37
38
39
40
LGB_R_EXECUTABLE=R

while [ $# -gt 0 ]; do
  case "$1" in
    --r-executable=*)
      LGB_R_EXECUTABLE="${1#*=}"
      ;;
41
    --no-build-vignettes*)
42
43
      BUILD_VIGNETTES=false
      ;;
44
45
46
47
48
49
50
51
52
53
    *)
      echo "invalid argument '${1}'"
      exit -1
      ;;
  esac
  shift
done

echo "Building lightgbm with R executable: ${LGB_R_EXECUTABLE}"

54
55
ORIG_WD="$(pwd)"
TEMP_R_DIR="$(pwd)/lightgbm_r"
56

57
58
if test -d "${TEMP_R_DIR}"; then
    rm -r "${TEMP_R_DIR}"
59
fi
60
mkdir -p "${TEMP_R_DIR}"
61

62
63
64
65
66
67
CURRENT_DATE=$(date +'%Y-%m-%d')

# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")

68
# move relevant files
69
70
71
cp -R R-package/* "${TEMP_R_DIR}"
cp -R include "${TEMP_R_DIR}/src/"
cp -R src/* "${TEMP_R_DIR}/src/"
72

73
74
75
76
if ${BUILD_VIGNETTES} ; then
    cp docs/logo/LightGBM_logo_black_text.svg "${TEMP_R_DIR}/vignettes/"
fi

77
78
cp \
    external_libs/fast_double_parser/include/fast_double_parser.h \
79
    "${TEMP_R_DIR}/src/include/LightGBM"
80

81
mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
82
83
cp \
    external_libs/fmt/include/fmt/*.h \
84
    "${TEMP_R_DIR}/src/include/LightGBM/fmt/"
85

86
87
88
# including only specific files from Eigen, to keep the R package
# small and avoid redistributing code with licenses incompatible with
# LightGBM's license
89
90
EIGEN_R_DIR="${TEMP_R_DIR}/src/include/Eigen"
mkdir -p "${EIGEN_R_DIR}"
91
92
93

modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
for eigen_module in ${modules}; do
94
    cp external_libs/eigen/Eigen/${eigen_module} "${EIGEN_R_DIR}/${eigen_module}"
95
    if [ ${eigen_module} != "Dense" ]; then
96
97
        mkdir -p "${EIGEN_R_DIR}/src/${eigen_module}/"
        cp -R external_libs/eigen/Eigen/src/${eigen_module}/* "${EIGEN_R_DIR}/src/${eigen_module}/"
98
99
100
    fi
done

101
102
mkdir -p "${EIGEN_R_DIR}/src/misc"
cp -R external_libs/eigen/Eigen/src/misc/* "${EIGEN_R_DIR}/src/misc/"
103

104
105
mkdir -p "${EIGEN_R_DIR}/src/plugins"
cp -R external_libs/eigen/Eigen/src/plugins/* "${EIGEN_R_DIR}/src/plugins/"
106

107
cd "${TEMP_R_DIR}"
108
109
110
111
112
113

    # Remove files not needed for CRAN
    echo "Removing files not needed for CRAN"
    rm src/install.libs.R
    rm -r inst/
    rm -r pkgdown/
114
    rm cran-comments.md
115
116
117
    rm AUTOCONF_UBUNTU_VERSION
    rm recreate-configure.sh

118
119
120
121
    # files only used by the lightgbm CLI aren't needed for
    # the R package
    rm src/application/application.cpp
    rm src/include/LightGBM/application.h
122
123
    rm src/main.cpp

124
125
126
127
128
129
    # configure.ac and DESCRIPTION have placeholders for version
    # and date so they don't have to be updated manually
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
    sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION

130
131
132
133
134
    # Rtools35 (used with R 3.6 on Windows) doesn't support C++17
    LGB_CXX_STD="C++17"
    using_windows_and_r3=$(
        Rscript -e 'cat(.Platform$OS.type == "windows" && R.version[["major"]] < 4)'
    )
135
    if test "${using_windows_and_r3}" = "TRUE"; then
136
137
138
139
        LGB_CXX_STD="C++11"
    fi
    sed -i.bak -e "s/~~CXXSTD~~/${LGB_CXX_STD}/" DESCRIPTION

140
141
142
    # Remove 'region', 'endregion', and 'warning' pragmas.
    # This won't change the correctness of the code. CRAN does
    # not allow you to use compiler flag '-Wno-unknown-pragmas' or
143
144
    # pragmas that suppress warnings.
    echo "Removing unknown pragmas in headers"
145
    for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
146
147
      sed \
        -i.bak \
148
149
150
        -e 's/^.*#pragma clang diagnostic.*$//' \
        -e 's/^.*#pragma diag_suppress.*$//' \
        -e 's/^.*#pragma GCC diagnostic.*$//' \
151
152
        -e 's/^.*#pragma region.*$//' \
        -e 's/^.*#pragma endregion.*$//' \
153
        -e 's/^.*#pragma warning.*$//' \
154
155
        "${file}"
    done
156
    find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
157

158
159
160
161
162
163
164
165
166
167
    sed \
        -i.bak \
        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
        src/include/LightGBM/utils/common.h

    sed \
        -i.bak \
        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
        src/include/LightGBM/utils/common.h

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
    # When building an R package with 'configure', it seems
    # you're guaranteed to get a shared library called
    #  <packagename>.so/dll. The package source code expects
    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
    # this project has historically handled installation
    echo "Changing lib_lightgbm to lightgbm"
    for file in R/*.R; do
        sed \
            -i.bak \
            -e 's/lib_lightgbm/lightgbm/' \
            "${file}"
    done
    sed \
        -i.bak \
        -e 's/lib_lightgbm/lightgbm/' \
        NAMESPACE

    # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
    # because it is used in install.libs.R, a file that is not
    # included in the CRAN distribution of the package
    sed \
        -i.bak \
        '/processx/d' \
        DESCRIPTION

    echo "Cleaning sed backup files"
    rm R/*.R.bak
    rm NAMESPACE.bak

197
cd "${ORIG_WD}"
198

199
200
201
202
203
204
205
206
207
208
209
210
211
if ${BUILD_VIGNETTES} ; then
    "${LGB_R_EXECUTABLE}" CMD build \
        --keep-empty-dirs \
        lightgbm_r

    echo "removing object files created by vignettes"
    rm -rf ./_tmp
    mkdir _tmp
    TARBALL_NAME="lightgbm_${LGB_VERSION}.tar.gz"
    mv "${TARBALL_NAME}" _tmp/

    echo "untarring ${TARBALL_NAME}"
    cd _tmp
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
        tar -xf "${TARBALL_NAME}" > /dev/null 2>&1
        rm -f "${TARBALL_NAME}"
        echo "done untarring ${TARBALL_NAME}"

        # Object files are left behind from compiling the library to generate vignettes.
        # Approaches like using tar --exclude=*.so to exclude them are not portable
        # (for example, don't work with some versions of tar on Windows).
        #
        # Removing them manually here removes the need to use tar --exclude.
        #
        # For background, see https://github.com/microsoft/LightGBM/pull/3946#pullrequestreview-799415812.
        rm -f ./lightgbm/src/*.o
        rm -f ./lightgbm/src/boosting/*.o
        rm -f ./lightgbm/src/io/*.o
        rm -f ./lightgbm/src/metric/*.o
        rm -f ./lightgbm/src/network/*.o
        rm -f ./lightgbm/src/objective/*.o
        rm -f ./lightgbm/src/treelearner/*.o

        echo "re-tarring ${TARBALL_NAME}"
        tar \
            -cz \
            -f "${TARBALL_NAME}" \
            lightgbm \
        > /dev/null 2>&1
        mv "${TARBALL_NAME}" ../
238
239
240
241
242
243
244
245
246
247
    cd ..
    echo "Done creating ${TARBALL_NAME}"

    rm -rf ./_tmp
else
    "${LGB_R_EXECUTABLE}" CMD build \
        --keep-empty-dirs \
        --no-build-vignettes \
        lightgbm_r
fi
248
249

echo "Done building R package"