build-cran-package.sh 6.53 KB
Newer Older
1
2
3
4
5
6
#!/bin/sh

# [description]
#     Prepare a source distribution of the R package
#     to be submitted to CRAN.
#
7
8
9
10
11
12
13
# [arguments] 
#
#     --r-executable Customize the R executable used by `R CMD build`.
#                    Useful if building the R package in an environment with
#                    non-standard builds of R, such as those provided in
#                    https://github.com/wch/r-debug.
#
14
15
16
17
18
#     --no-build-vignettes Pass this flag to skip creating vignettes.
#                          You might want to do this to avoid installing
#                          vignette-only dependencies, or to avoid
#                          portability issues.
#
19
# [usage]
20
21
#
#     # default usage
22
#     sh build-cran-package.sh
23
24
25
#
#     # custom R build
#     sh build-cran-package.sh --r-executable=RDvalgrind
26
27
28
#
#     # skip vignette building
#     sh build-cran-package.sh --no-build-vignettes
29
30
31

set -e

32
33
# Default values of arguments
BUILD_VIGNETTES=true
34
35
36
37
38
39
40
LGB_R_EXECUTABLE=R

while [ $# -gt 0 ]; do
  case "$1" in
    --r-executable=*)
      LGB_R_EXECUTABLE="${1#*=}"
      ;;
41
42
43
    --no-build-vignettes=*)
      BUILD_VIGNETTES=false
      ;;
44
45
46
47
48
49
50
51
52
53
    *)
      echo "invalid argument '${1}'"
      exit -1
      ;;
  esac
  shift
done

echo "Building lightgbm with R executable: ${LGB_R_EXECUTABLE}"

54
55
ORIG_WD="$(pwd)"
TEMP_R_DIR="$(pwd)/lightgbm_r"
56

57
58
if test -d "${TEMP_R_DIR}"; then
    rm -r "${TEMP_R_DIR}"
59
fi
60
mkdir -p "${TEMP_R_DIR}"
61

62
63
64
65
66
67
CURRENT_DATE=$(date +'%Y-%m-%d')

# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")

68
# move relevant files
69
70
71
cp -R R-package/* "${TEMP_R_DIR}"
cp -R include "${TEMP_R_DIR}/src/"
cp -R src/* "${TEMP_R_DIR}/src/"
72

73
74
75
76
if ${BUILD_VIGNETTES} ; then
    cp docs/logo/LightGBM_logo_black_text.svg "${TEMP_R_DIR}/vignettes/"
fi

77
78
cp \
    external_libs/fast_double_parser/include/fast_double_parser.h \
79
    "${TEMP_R_DIR}/src/include/LightGBM"
80

81
mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
82
83
cp \
    external_libs/fmt/include/fmt/*.h \
84
    "${TEMP_R_DIR}/src/include/LightGBM/fmt/"
85

86
87
88
# including only specific files from Eigen, to keep the R package
# small and avoid redistributing code with licenses incompatible with
# LightGBM's license
89
90
EIGEN_R_DIR="${TEMP_R_DIR}/src/include/Eigen"
mkdir -p "${EIGEN_R_DIR}"
91
92
93

modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
for eigen_module in ${modules}; do
94
    cp external_libs/eigen/Eigen/${eigen_module} "${EIGEN_R_DIR}/${eigen_module}"
95
    if [ ${eigen_module} != "Dense" ]; then
96
97
        mkdir -p "${EIGEN_R_DIR}/src/${eigen_module}/"
        cp -R external_libs/eigen/Eigen/src/${eigen_module}/* "${EIGEN_R_DIR}/src/${eigen_module}/"
98
99
100
    fi
done

101
102
mkdir -p "${EIGEN_R_DIR}/src/misc"
cp -R external_libs/eigen/Eigen/src/misc/* "${EIGEN_R_DIR}/src/misc/"
103

104
105
mkdir -p "${EIGEN_R_DIR}/src/plugins"
cp -R external_libs/eigen/Eigen/src/plugins/* "${EIGEN_R_DIR}/src/plugins/"
106

107
cd "${TEMP_R_DIR}"
108
109
110
111
112
113

    # Remove files not needed for CRAN
    echo "Removing files not needed for CRAN"
    rm src/install.libs.R
    rm -r inst/
    rm -r pkgdown/
114
    rm cran-comments.md
115
116
117
    rm AUTOCONF_UBUNTU_VERSION
    rm recreate-configure.sh

118
119
120
121
    # files only used by the lightgbm CLI aren't needed for
    # the R package
    rm src/application/application.cpp
    rm src/include/LightGBM/application.h
122
123
    rm src/main.cpp

124
125
126
127
128
129
    # configure.ac and DESCRIPTION have placeholders for version
    # and date so they don't have to be updated manually
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
    sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION

130
131
132
    # Remove 'region', 'endregion', and 'warning' pragmas.
    # This won't change the correctness of the code. CRAN does
    # not allow you to use compiler flag '-Wno-unknown-pragmas' or
133
134
    # pragmas that suppress warnings.
    echo "Removing unknown pragmas in headers"
135
    for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
136
137
      sed \
        -i.bak \
138
139
140
        -e 's/^.*#pragma clang diagnostic.*$//' \
        -e 's/^.*#pragma diag_suppress.*$//' \
        -e 's/^.*#pragma GCC diagnostic.*$//' \
141
142
        -e 's/^.*#pragma region.*$//' \
        -e 's/^.*#pragma endregion.*$//' \
143
        -e 's/^.*#pragma warning.*$//' \
144
145
        "${file}"
    done
146
    find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
147

148
149
150
151
152
153
154
155
156
157
    sed \
        -i.bak \
        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
        src/include/LightGBM/utils/common.h

    sed \
        -i.bak \
        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
        src/include/LightGBM/utils/common.h

158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    # When building an R package with 'configure', it seems
    # you're guaranteed to get a shared library called
    #  <packagename>.so/dll. The package source code expects
    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
    # this project has historically handled installation
    echo "Changing lib_lightgbm to lightgbm"
    for file in R/*.R; do
        sed \
            -i.bak \
            -e 's/lib_lightgbm/lightgbm/' \
            "${file}"
    done
    sed \
        -i.bak \
        -e 's/lib_lightgbm/lightgbm/' \
        NAMESPACE

    # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
    # because it is used in install.libs.R, a file that is not
    # included in the CRAN distribution of the package
    sed \
        -i.bak \
        '/processx/d' \
        DESCRIPTION

    echo "Cleaning sed backup files"
    rm R/*.R.bak
    rm NAMESPACE.bak

187
cd "${ORIG_WD}"
188

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
if ${BUILD_VIGNETTES} ; then
    "${LGB_R_EXECUTABLE}" CMD build \
        --keep-empty-dirs \
        lightgbm_r

    echo "removing object files created by vignettes"
    rm -rf ./_tmp
    mkdir _tmp
    TARBALL_NAME="lightgbm_${LGB_VERSION}.tar.gz"
    mv "${TARBALL_NAME}" _tmp/

    echo "untarring ${TARBALL_NAME}"
    cd _tmp
        tar -xvf "${TARBALL_NAME}" > /dev/null 2>&1
        rm -rf "${TARBALL_NAME}"
    cd ..
    echo "done untarring ${TARBALL_NAME}"

    echo "re-tarring ${TARBALL_NAME}"
    tar \
        -czv \
        -C ./_tmp \
        --exclude=*.a \
        --exclude=*.dll \
        --exclude=*.o \
        --exclude=*.so \
        --exclude=*.tar.gz \
        --exclude=**/conftest.c \
        --exclude=**/conftest.exe \
        -f "${TARBALL_NAME}" \
        lightgbm \
    > /dev/null 2>&1
    echo "Done creating ${TARBALL_NAME}"

    rm -rf ./_tmp
else
    "${LGB_R_EXECUTABLE}" CMD build \
        --keep-empty-dirs \
        --no-build-vignettes \
        lightgbm_r
fi
230
231

echo "Done building R package"