build-cran-package.sh 7.49 KB
Newer Older
1
2
3
4
5
6
#!/bin/sh

# [description]
#     Prepare a source distribution of the R package
#     to be submitted to CRAN.
#
7
# [arguments]
8
9
10
11
12
13
#
#     --r-executable Customize the R executable used by `R CMD build`.
#                    Useful if building the R package in an environment with
#                    non-standard builds of R, such as those provided in
#                    https://github.com/wch/r-debug.
#
14
15
16
17
18
#     --no-build-vignettes Pass this flag to skip creating vignettes.
#                          You might want to do this to avoid installing
#                          vignette-only dependencies, or to avoid
#                          portability issues.
#
19
# [usage]
20
21
#
#     # default usage
22
#     sh build-cran-package.sh
23
24
25
#
#     # custom R build
#     sh build-cran-package.sh --r-executable=RDvalgrind
26
27
28
#
#     # skip vignette building
#     sh build-cran-package.sh --no-build-vignettes
29

30
set -e -E -u
31

32
33
# Default values of arguments
BUILD_VIGNETTES=true
34
35
36
37
38
39
40
LGB_R_EXECUTABLE=R

while [ $# -gt 0 ]; do
  case "$1" in
    --r-executable=*)
      LGB_R_EXECUTABLE="${1#*=}"
      ;;
41
    --no-build-vignettes*)
42
43
      BUILD_VIGNETTES=false
      ;;
44
45
    *)
      echo "invalid argument '${1}'"
46
      exit 1
47
48
49
50
51
52
53
      ;;
  esac
  shift
done

echo "Building lightgbm with R executable: ${LGB_R_EXECUTABLE}"

54
55
ORIG_WD="$(pwd)"
TEMP_R_DIR="$(pwd)/lightgbm_r"
56

57
58
if test -d "${TEMP_R_DIR}"; then
    rm -r "${TEMP_R_DIR}"
59
fi
60
mkdir -p "${TEMP_R_DIR}"
61

62
63
64
65
66
67
CURRENT_DATE=$(date +'%Y-%m-%d')

# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")

68
# move relevant files
69
70
71
cp -R R-package/* "${TEMP_R_DIR}"
cp -R include "${TEMP_R_DIR}/src/"
cp -R src/* "${TEMP_R_DIR}/src/"
72

73
74
75
76
if ${BUILD_VIGNETTES} ; then
    cp docs/logo/LightGBM_logo_black_text.svg "${TEMP_R_DIR}/vignettes/"
fi

77
78
cp \
    external_libs/fast_double_parser/include/fast_double_parser.h \
79
    "${TEMP_R_DIR}/src/include/LightGBM"
80

81
mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
82
83
cp \
    external_libs/fmt/include/fmt/*.h \
84
    "${TEMP_R_DIR}/src/include/LightGBM/fmt/"
85

86
87
88
# including only specific files from Eigen, to keep the R package
# small and avoid redistributing code with licenses incompatible with
# LightGBM's license
89
90
EIGEN_R_DIR="${TEMP_R_DIR}/src/include/Eigen"
mkdir -p "${EIGEN_R_DIR}"
91
92
93

modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
for eigen_module in ${modules}; do
94
    cp external_libs/eigen/Eigen/${eigen_module} "${EIGEN_R_DIR}/${eigen_module}"
95
    if [ ${eigen_module} != "Dense" ]; then
96
97
        mkdir -p "${EIGEN_R_DIR}/src/${eigen_module}/"
        cp -R external_libs/eigen/Eigen/src/${eigen_module}/* "${EIGEN_R_DIR}/src/${eigen_module}/"
98
99
100
    fi
done

101
102
mkdir -p "${EIGEN_R_DIR}/src/misc"
cp -R external_libs/eigen/Eigen/src/misc/* "${EIGEN_R_DIR}/src/misc/"
103

104
105
mkdir -p "${EIGEN_R_DIR}/src/plugins"
cp -R external_libs/eigen/Eigen/src/plugins/* "${EIGEN_R_DIR}/src/plugins/"
106

107
cd "${TEMP_R_DIR}"
108
109
110
111
112
113

    # Remove files not needed for CRAN
    echo "Removing files not needed for CRAN"
    rm src/install.libs.R
    rm -r inst/
    rm -r pkgdown/
114
    rm cran-comments.md
115
116
117
    rm AUTOCONF_UBUNTU_VERSION
    rm recreate-configure.sh

118
119
120
121
    # files only used by the lightgbm CLI aren't needed for
    # the R package
    rm src/application/application.cpp
    rm src/include/LightGBM/application.h
122
123
    rm src/main.cpp

124
125
126
127
128
129
    # configure.ac and DESCRIPTION have placeholders for version
    # and date so they don't have to be updated manually
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
    sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION

130
131
132
133
134
    # Rtools35 (used with R 3.6 on Windows) doesn't support C++17
    LGB_CXX_STD="C++17"
    using_windows_and_r3=$(
        Rscript -e 'cat(.Platform$OS.type == "windows" && R.version[["major"]] < 4)'
    )
135
    if test "${using_windows_and_r3}" = "TRUE"; then
136
137
138
139
        LGB_CXX_STD="C++11"
    fi
    sed -i.bak -e "s/~~CXXSTD~~/${LGB_CXX_STD}/" DESCRIPTION

140
141
142
    # Remove 'region', 'endregion', and 'warning' pragmas.
    # This won't change the correctness of the code. CRAN does
    # not allow you to use compiler flag '-Wno-unknown-pragmas' or
143
144
    # pragmas that suppress warnings.
    echo "Removing unknown pragmas in headers"
145
    for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
146
147
      sed \
        -i.bak \
148
149
150
        -e 's/^.*#pragma clang diagnostic.*$//' \
        -e 's/^.*#pragma diag_suppress.*$//' \
        -e 's/^.*#pragma GCC diagnostic.*$//' \
151
152
        -e 's/^.*#pragma region.*$//' \
        -e 's/^.*#pragma endregion.*$//' \
153
        -e 's/^.*#pragma warning.*$//' \
154
155
        "${file}"
    done
156
    find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
157

158
159
160
161
162
163
164
165
166
167
    sed \
        -i.bak \
        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
        src/include/LightGBM/utils/common.h

    sed \
        -i.bak \
        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
        src/include/LightGBM/utils/common.h

168
169
    # When building an R package with 'configure', it seems
    # you're guaranteed to get a shared library called
170
    #  <packagename>.so/dll/dylib. The package source code expects
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
    # this project has historically handled installation
    echo "Changing lib_lightgbm to lightgbm"
    for file in R/*.R; do
        sed \
            -i.bak \
            -e 's/lib_lightgbm/lightgbm/' \
            "${file}"
    done
    sed \
        -i.bak \
        -e 's/lib_lightgbm/lightgbm/' \
        NAMESPACE

    # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
    # because it is used in install.libs.R, a file that is not
    # included in the CRAN distribution of the package
    sed \
        -i.bak \
        '/processx/d' \
        DESCRIPTION

    echo "Cleaning sed backup files"
    rm R/*.R.bak
    rm NAMESPACE.bak

197
cd "${ORIG_WD}"
198

199
200
201
202
203
204
205
206
207
208
209
210
211
if ${BUILD_VIGNETTES} ; then
    "${LGB_R_EXECUTABLE}" CMD build \
        --keep-empty-dirs \
        lightgbm_r

    echo "removing object files created by vignettes"
    rm -rf ./_tmp
    mkdir _tmp
    TARBALL_NAME="lightgbm_${LGB_VERSION}.tar.gz"
    mv "${TARBALL_NAME}" _tmp/

    echo "untarring ${TARBALL_NAME}"
    cd _tmp
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
        tar -xf "${TARBALL_NAME}" > /dev/null 2>&1
        rm -f "${TARBALL_NAME}"
        echo "done untarring ${TARBALL_NAME}"

        # Object files are left behind from compiling the library to generate vignettes.
        # Approaches like using tar --exclude=*.so to exclude them are not portable
        # (for example, don't work with some versions of tar on Windows).
        #
        # Removing them manually here removes the need to use tar --exclude.
        #
        # For background, see https://github.com/microsoft/LightGBM/pull/3946#pullrequestreview-799415812.
        rm -f ./lightgbm/src/*.o
        rm -f ./lightgbm/src/boosting/*.o
        rm -f ./lightgbm/src/io/*.o
        rm -f ./lightgbm/src/metric/*.o
        rm -f ./lightgbm/src/network/*.o
        rm -f ./lightgbm/src/objective/*.o
        rm -f ./lightgbm/src/treelearner/*.o
230
        rm -f ./lightgbm/src/utils/*.o
231
232
233
234
235
236
237
238

        echo "re-tarring ${TARBALL_NAME}"
        tar \
            -cz \
            -f "${TARBALL_NAME}" \
            lightgbm \
        > /dev/null 2>&1
        mv "${TARBALL_NAME}" ../
239
240
241
242
243
244
245
246
247
248
    cd ..
    echo "Done creating ${TARBALL_NAME}"

    rm -rf ./_tmp
else
    "${LGB_R_EXECUTABLE}" CMD build \
        --keep-empty-dirs \
        --no-build-vignettes \
        lightgbm_r
fi
249
250

echo "Done building R package"