build-cran-package.sh 4.39 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#!/bin/sh

# [description]
#     Prepare a source distribution of the R package
#     to be submitted to CRAN.
#
# [usage]
#     sh build-cran-package.sh

set -e

12
13
ORIG_WD="$(pwd)"
TEMP_R_DIR="$(pwd)/lightgbm_r"
14

15
16
if test -d "${TEMP_R_DIR}"; then
    rm -r "${TEMP_R_DIR}"
17
fi
18
mkdir -p "${TEMP_R_DIR}"
19

20
21
22
23
24
25
CURRENT_DATE=$(date +'%Y-%m-%d')

# R packages cannot have versions like 3.0.0rc1, but
# 3.0.0-1 is acceptable
LGB_VERSION=$(cat VERSION.txt | sed "s/rc/-/g")

26
# move relevant files
27
28
29
cp -R R-package/* "${TEMP_R_DIR}"
cp -R include "${TEMP_R_DIR}/src/"
cp -R src/* "${TEMP_R_DIR}/src/"
30

31
32
cp \
    external_libs/fast_double_parser/include/fast_double_parser.h \
33
    "${TEMP_R_DIR}/src/include/LightGBM"
34

35
mkdir -p "${TEMP_R_DIR}/src/include/LightGBM/fmt"
36
37
cp \
    external_libs/fmt/include/fmt/*.h \
38
    "${TEMP_R_DIR}/src/include/LightGBM/fmt/"
39

40
41
42
# including only specific files from Eigen, to keep the R package
# small and avoid redistributing code with licenses incompatible with
# LightGBM's license
43
44
EIGEN_R_DIR="${TEMP_R_DIR}/src/include/Eigen"
mkdir -p "${EIGEN_R_DIR}"
45
46
47

modules="Cholesky Core Dense Eigenvalues Geometry Householder Jacobi LU QR SVD"
for eigen_module in ${modules}; do
48
    cp external_libs/eigen/Eigen/${eigen_module} "${EIGEN_R_DIR}/${eigen_module}"
49
    if [ ${eigen_module} != "Dense" ]; then
50
51
        mkdir -p "${EIGEN_R_DIR}/src/${eigen_module}/"
        cp -R external_libs/eigen/Eigen/src/${eigen_module}/* "${EIGEN_R_DIR}/src/${eigen_module}/"
52
53
54
    fi
done

55
56
mkdir -p "${EIGEN_R_DIR}/src/misc"
cp -R external_libs/eigen/Eigen/src/misc/* "${EIGEN_R_DIR}/src/misc/"
57

58
59
mkdir -p "${EIGEN_R_DIR}/src/plugins"
cp -R external_libs/eigen/Eigen/src/plugins/* "${EIGEN_R_DIR}/src/plugins/"
60

61
cd "${TEMP_R_DIR}"
62
63
64
65
66
67

    # Remove files not needed for CRAN
    echo "Removing files not needed for CRAN"
    rm src/install.libs.R
    rm -r inst/
    rm -r pkgdown/
68
    rm cran-comments.md
69
70
71
    rm AUTOCONF_UBUNTU_VERSION
    rm recreate-configure.sh

72
73
74
75
    # files only used by the lightgbm CLI aren't needed for
    # the R package
    rm src/application/application.cpp
    rm src/include/LightGBM/application.h
76
77
    rm src/main.cpp

78
79
80
81
82
83
    # configure.ac and DESCRIPTION have placeholders for version
    # and date so they don't have to be updated manually
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" configure.ac
    sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" DESCRIPTION
    sed -i.bak -e "s/~~DATE~~/${CURRENT_DATE}/" DESCRIPTION

84
85
86
    # Remove 'region', 'endregion', and 'warning' pragmas.
    # This won't change the correctness of the code. CRAN does
    # not allow you to use compiler flag '-Wno-unknown-pragmas' or
87
88
    # pragmas that suppress warnings.
    echo "Removing unknown pragmas in headers"
89
    for file in $(find . -name '*.h' -o -name '*.hpp' -o -name '*.cpp'); do
90
91
      sed \
        -i.bak \
92
93
94
        -e 's/^.*#pragma clang diagnostic.*$//' \
        -e 's/^.*#pragma diag_suppress.*$//' \
        -e 's/^.*#pragma GCC diagnostic.*$//' \
95
96
        -e 's/^.*#pragma region.*$//' \
        -e 's/^.*#pragma endregion.*$//' \
97
        -e 's/^.*#pragma warning.*$//' \
98
99
        "${file}"
    done
100
    find . -name '*.h.bak' -o -name '*.hpp.bak' -o -name '*.cpp.bak' -exec rm {} \;
101

102
103
104
105
106
107
108
109
110
111
    sed \
        -i.bak \
        -e 's/\.\..*fmt\/format\.h/LightGBM\/fmt\/format\.h/' \
        src/include/LightGBM/utils/common.h

    sed \
        -i.bak \
        -e 's/\.\..*fast_double_parser\.h/LightGBM\/fast_double_parser\.h/' \
        src/include/LightGBM/utils/common.h

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
    # When building an R package with 'configure', it seems
    # you're guaranteed to get a shared library called
    #  <packagename>.so/dll. The package source code expects
    # 'lib_lightgbm.so', not 'lightgbm.so', to comply with the way
    # this project has historically handled installation
    echo "Changing lib_lightgbm to lightgbm"
    for file in R/*.R; do
        sed \
            -i.bak \
            -e 's/lib_lightgbm/lightgbm/' \
            "${file}"
    done
    sed \
        -i.bak \
        -e 's/lib_lightgbm/lightgbm/' \
        NAMESPACE

    # 'processx' is listed as a 'Suggests' dependency in DESCRIPTION
    # because it is used in install.libs.R, a file that is not
    # included in the CRAN distribution of the package
    sed \
        -i.bak \
        '/processx/d' \
        DESCRIPTION

    echo "Cleaning sed backup files"
    rm R/*.R.bak
    rm NAMESPACE.bak

141
cd "${ORIG_WD}"
142
143
144
145
146
147

R CMD build \
    --keep-empty-dirs \
    lightgbm_r

echo "Done building R package"