build-cran-package.sh 4.34 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/bin/sh

# [description]
#     Prepare a source distribution of the R package
#     to be submitted to CRAN.
#
# [usage]
#     sh build-cran-package.sh

set -e

ORIG_WD=$(pwd)
TEMP_R_DIR=$(pwd)/lightgbm_r

if test -d ${TEMP_R_DIR}; then
    rm -r ${TEMP_R_DIR}
fi
mkdir -p ${TEMP_R_DIR}

20
21
22
23
24
25
CURRENT_DATE=$(date +'%Y-%m-%d')

# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")

26
27
28
29
30
# move relevant files
cp -R R-package/* ${TEMP_R_DIR}
cp -R include ${TEMP_R_DIR}/src/
cp -R src/* ${TEMP_R_DIR}/src/

31
32
33
34
35
36
37
38
39
cp \
    external_libs/fast_double_parser/include/fast_double_parser.h \
    ${TEMP_R_DIR}/src/include/LightGBM

mkdir -p ${TEMP_R_DIR}/src/include/LightGBM/fmt
cp \
    external_libs/fmt/include/fmt/*.h \
    ${TEMP_R_DIR}/src/include/LightGBM/fmt/

40
41
42
43
44
45
46
47
# including only specific files from Eigen, to keep the R package
# small and avoid redistributing code with licenses incompatible with
# LightGBM's license
EIGEN_R_DIR=${TEMP_R_DIR}/src/include/Eigen
mkdir -p ${EIGEN_R_DIR}

modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
for eigen_module in ${modules}; do
48
    cp external_libs/eigen/Eigen/${eigen_module} ${EIGEN_R_DIR}/${eigen_module}
49
50
    if [ ${eigen_module} != "Dense" ]; then
        mkdir -p ${EIGEN_R_DIR}/src/${eigen_module}/
51
        cp -R external_libs/eigen/Eigen/src/${eigen_module}/* ${EIGEN_R_DIR}/src/${eigen_module}/
52
53
54
55
    fi
done

mkdir -p ${EIGEN_R_DIR}/src/misc
56
cp -R external_libs/eigen/Eigen/src/misc/* ${EIGEN_R_DIR}/src/misc/
57
58

mkdir -p ${EIGEN_R_DIR}/src/plugins
59
cp -R external_libs/eigen/Eigen/src/plugins/* ${EIGEN_R_DIR}/src/plugins/
60

61
62
63
64
65
66
67
cd ${TEMP_R_DIR}

    # Remove files not needed for CRAN
    echo "Removing files not needed for CRAN"
    rm src/install.libs.R
    rm -r inst/
    rm -r pkgdown/
68
    rm cran-comments.md
69
70
71
    rm AUTOCONF_UBUNTU_VERSION
    rm recreate-configure.sh

72
73
74
75
    # files only used by the lightgbm CLI aren't needed for
    # the R package
    rm src/application/application.cpp
    rm src/include/LightGBM/application.h
76
77
    rm src/main.cpp

78
79
80
81
82
83
    # configure.ac and DESCRIPTION have placeholders for version
    # and date so they don't have to be updated manually
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
    sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION

84
85
86
    # Remove 'region', 'endregion', and 'warning' pragmas.
    # This won't change the correctness of the code. CRAN does
    # not allow you to use compiler flag '-Wno-unknown-pragmas' or
87
88
    # pragmas that suppress warnings.
    echo "Removing unknown pragmas in headers"
89
    for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
90
91
      sed \
        -i.bak \
92
93
94
        -e 's/^.*#pragma clang diagnostic.*$//' \
        -e 's/^.*#pragma diag_suppress.*$//' \
        -e 's/^.*#pragma GCC diagnostic.*$//' \
95
96
        -e 's/^.*#pragma region.*$//' \
        -e 's/^.*#pragma endregion.*$//' \
97
        -e 's/^.*#pragma warning.*$//' \
98
99
        "${file}"
    done
100
    find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
101

102
103
104
105
106
107
108
109
110
111
    sed \
        -i.bak \
        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
        src/include/LightGBM/utils/common.h

    sed \
        -i.bak \
        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
        src/include/LightGBM/utils/common.h

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    # When building an R package with 'configure', it seems
    # you're guaranteed to get a shared library called
    #  <packagename>.so/dll. The package source code expects
    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
    # this project has historically handled installation
    echo "Changing lib_lightgbm to lightgbm"
    for file in R/*.R; do
        sed \
            -i.bak \
            -e 's/lib_lightgbm/lightgbm/' \
            "${file}"
    done
    sed \
        -i.bak \
        -e 's/lib_lightgbm/lightgbm/' \
        NAMESPACE

    # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
    # because it is used in install.libs.R, a file that is not
    # included in the CRAN distribution of the package
    sed \
        -i.bak \
        '/processx/d' \
        DESCRIPTION

    echo "Cleaning sed backup files"
    rm R/*.R.bak
    rm NAMESPACE.bak

cd ${ORIG_WD}

R CMD build \
    --keep-empty-dirs \
    lightgbm_r

echo "Done building R package"