C API
- Copyright
Copyright (c) 2016 Microsoft Corporation. All rights reserved. Licensed under the MIT License. See LICENSE file in the project root for license information.
Note
To avoid type conversion on large data, the most of our exposed interface supports both float32 and float64, except the following:
gradient and Hessian;
current score for training and validation data.
Defines
-
C_API_DTYPE_FLOAT32 (0)
float32 (single precision float).
-
C_API_DTYPE_FLOAT64 (1)
float64 (double precision float).
-
C_API_DTYPE_INT32 (2)
int32.
-
C_API_DTYPE_INT64 (3)
int64.
-
C_API_FEATURE_IMPORTANCE_GAIN (1)
Gain type of feature importance.
-
C_API_FEATURE_IMPORTANCE_SPLIT (0)
Split type of feature importance.
-
C_API_MATRIX_TYPE_CSC (1)
CSC sparse matrix type.
-
C_API_MATRIX_TYPE_CSR (0)
CSR sparse matrix type.
-
C_API_PREDICT_CONTRIB (3)
Predict feature contributions (SHAP values).
-
C_API_PREDICT_LEAF_INDEX (2)
Predict leaf index.
-
C_API_PREDICT_NORMAL (0)
Normal prediction, with transform (if needed).
-
C_API_PREDICT_RAW_SCORE (1)
Predict raw score.
-
INLINE_FUNCTION inline
Inline specifier.
-
THREAD_LOCAL thread_local
Thread local specifier.
Typedefs
-
typedef void *BoosterHandle
Handle of booster.
-
typedef void *ByteBufferHandle
Handle of ByteBuffer.
-
typedef void *DatasetHandle
Handle of dataset.
-
typedef void *FastConfigHandle
Handle of FastConfig.
Functions
-
static char *LastErrorMsg()
Handle of error message.
- Returns:
Error message
-
LIGHTGBM_C_EXPORT int LGBM_BoosterAddValidData(BoosterHandle handle, const DatasetHandle valid_data)
Add new validation data to booster.
- Parameters:
handle – Handle of booster
valid_data – Validation dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterCalcNumPredict(BoosterHandle handle, int num_row, int predict_type, int start_iteration, int num_iteration, int64_t *out_len)
Get number of predictions.
- Parameters:
handle – Handle of booster
num_row – Number of rows
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
out_len – [out] Length of prediction
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterCreate(const DatasetHandle train_data, const char *parameters, BoosterHandle *out)
Create a new boosting learner.
- Parameters:
train_data – Training dataset
parameters – Parameters in format ‘key1=value1 key2=value2’
out – [out] Handle of created booster
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterCreateFromModelfile(const char *filename, int *out_num_iterations, BoosterHandle *out)
Load an existing booster from model file.
- Parameters:
filename – Filename of model
out_num_iterations – [out] Number of iterations of this booster
out – [out] Handle of created booster
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterDumpModel(BoosterHandle handle, int start_iteration, int num_iteration, int feature_importance_type, int64_t buffer_len, int64_t *out_len, char *out_str)
Dump model to JSON.
- Parameters:
handle – Handle of booster
start_iteration – Start index of the iteration that should be dumped
num_iteration – Index of the iteration that should be dumped, <= 0 means dump all
feature_importance_type – Type of feature importance, can be
C_API_FEATURE_IMPORTANCE_SPLIT
orC_API_FEATURE_IMPORTANCE_GAIN
buffer_len – String buffer length, if
buffer_len < out_len
, you should re-allocate bufferout_len – [out] Actual output length
out_str – [out] JSON format string of model, should pre-allocate memory
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterFeatureImportance(BoosterHandle handle, int num_iteration, int importance_type, double *out_results)
Get model feature importance.
- Parameters:
handle – Handle of booster
num_iteration – Number of iterations for which feature importance is calculated, <= 0 means use all
importance_type – Method of importance calculation:
C_API_FEATURE_IMPORTANCE_SPLIT
: result contains numbers of times the feature is used in a model;C_API_FEATURE_IMPORTANCE_GAIN
: result contains total gains of splits which use the feature
out_results – [out] Result array with feature importance
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterFree(BoosterHandle handle)
Free space for booster.
- Parameters:
handle – Handle of booster to be freed
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterFreePredictSparse(void *indptr, int32_t *indices, void *data, int indptr_type, int data_type)
Method corresponding to
LGBM_BoosterPredictSparseOutput
to free the allocated data.- Parameters:
indptr – Pointer to output row headers or column headers to be deallocated
indices – Pointer to sparse indices to be deallocated
data – Pointer to sparse data space to be deallocated
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int *out_iteration)
Get index of the current boosting iteration.
- Parameters:
handle – Handle of booster
out_iteration – [out] Index of the current boosting iteration
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEval(BoosterHandle handle, int data_idx, int *out_len, double *out_results)
Get evaluation for training data and validation data.
Note
You should call
LGBM_BoosterGetEvalNames
first to get the names of evaluation metrics.You should pre-allocate memory for
out_results
, you can get its length byLGBM_BoosterGetEvalCounts
.
- Parameters:
handle – Handle of booster
data_idx – Index of data, 0: training data, 1: 1st validation data, 2: 2nd validation data and so on
out_len – [out] Length of output result
out_results – [out] Array with evaluation results
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int *out_len)
Get number of evaluation metrics.
- Parameters:
handle – Handle of booster
out_len – [out] Total number of evaluation metrics
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalNames(BoosterHandle handle, const int len, int *out_len, const size_t buffer_len, size_t *out_buffer_len, char **out_strs)
Get names of evaluation metrics.
- Parameters:
handle – Handle of booster
len – Number of
char*
pointers stored atout_strs
. If smaller than the max size, only this many strings are copiedout_len – [out] Total number of evaluation metrics
buffer_len – Size of pre-allocated strings. Content is copied up to
buffer_len - 1
and null-terminatedout_buffer_len – [out] String sizes required to do the full string copies
out_strs – [out] Names of evaluation metrics, should pre-allocate memory
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetFeatureNames(BoosterHandle handle, const int len, int *out_len, const size_t buffer_len, size_t *out_buffer_len, char **out_strs)
Get names of features.
- Parameters:
handle – Handle of booster
len – Number of
char*
pointers stored atout_strs
. If smaller than the max size, only this many strings are copiedout_len – [out] Total number of features
buffer_len – Size of pre-allocated strings. Content is copied up to
buffer_len - 1
and null-terminatedout_buffer_len – [out] String sizes required to do the full string copies
out_strs – [out] Names of features, should pre-allocate memory
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetLeafValue(BoosterHandle handle, int tree_idx, int leaf_idx, double *out_val)
Get leaf value.
- Parameters:
handle – Handle of booster
tree_idx – Index of tree
leaf_idx – Index of leaf
out_val – [out] Output result from the specified leaf
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetLinear(BoosterHandle handle, int *out)
Get int representing whether booster is fitting linear trees.
- Parameters:
handle – Handle of booster
out – [out] The address to hold linear trees indicator
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetLoadedParam(BoosterHandle handle, int64_t buffer_len, int64_t *out_len, char *out_str)
Get parameters as JSON string.
- Parameters:
handle – Handle of booster
buffer_len – Allocated space for string
out_len – [out] Actual size of string
out_str – [out] JSON string containing parameters
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetLowerBoundValue(BoosterHandle handle, double *out_results)
Get model lower bound value.
- Parameters:
handle – Handle of booster
out_results – [out] Result pointing to min value
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetNumClasses(BoosterHandle handle, int *out_len)
Get number of classes.
- Parameters:
handle – Handle of booster
out_len – [out] Number of classes
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetNumFeature(BoosterHandle handle, int *out_len)
Get number of features.
- Parameters:
handle – Handle of booster
out_len – [out] Total number of features
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetNumPredict(BoosterHandle handle, int data_idx, int64_t *out_len)
Get number of predictions for training data and validation data (this can be used to support customized evaluation functions).
- Parameters:
handle – Handle of booster
data_idx – Index of data, 0: training data, 1: 1st validation data, 2: 2nd validation data and so on
out_len – [out] Number of predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetPredict(BoosterHandle handle, int data_idx, int64_t *out_len, double *out_result)
Get prediction for training data and validation data.
Note
You should pre-allocate memory for
out_result
, its length is equal tonum_class * num_data
.- Parameters:
handle – Handle of booster
data_idx – Index of data, 0: training data, 1: 1st validation data, 2: 2nd validation data and so on
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterGetUpperBoundValue(BoosterHandle handle, double *out_results)
Get model upper bound value.
- Parameters:
handle – Handle of booster
out_results – [out] Result pointing to max value
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterLoadModelFromString(const char *model_str, int *out_num_iterations, BoosterHandle *out)
Load an existing booster from string.
- Parameters:
model_str – Model string
out_num_iterations – [out] Number of iterations of this booster
out – [out] Handle of created booster
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterMerge(BoosterHandle handle, BoosterHandle other_handle)
Merge model from
other_handle
intohandle
.- Parameters:
handle – Handle of booster, will merge another booster into this one
other_handle – Other handle of booster
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterNumberOfTotalModel(BoosterHandle handle, int *out_models)
Get number of weak sub-models.
- Parameters:
handle – Handle of booster
out_models – [out] Number of weak sub-models
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterNumModelPerIteration(BoosterHandle handle, int *out_tree_per_iteration)
Get number of trees per iteration.
- Parameters:
handle – Handle of booster
out_tree_per_iteration – [out] Number of trees per iteration
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForArrow(BoosterHandle handle, int64_t n_chunks, const ArrowArray *chunks, const ArrowSchema *schema, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
n_chunks – The number of Arrow arrays passed to this function
chunks – Pointer to the list of Arrow arrays
schema – Pointer to the schema of all Arrow arrays
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iteration for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSC(BoosterHandle handle, const void *col_ptr, int col_ptr_type, const int32_t *indices, const void *data, int data_type, int64_t ncol_ptr, int64_t nelem, int64_t num_row, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset in CSC format.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
col_ptr – Pointer to column headers
col_ptr_type – Type of
col_ptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to row indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
ncol_ptr – Number of columns in the matrix + 1
nelem – Number of nonzero elements in the matrix
num_row – Number of rows
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iteration for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSR(BoosterHandle handle, const void *indptr, int indptr_type, const int32_t *indices, const void *data, int data_type, int64_t nindptr, int64_t nelem, int64_t num_col, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset in CSR format.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
indptr – Pointer to row headers
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nindptr – Number of rows in the matrix + 1
nelem – Number of nonzero elements in the matrix
num_col – Number of columns
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle, const void *indptr, int indptr_type, const int32_t *indices, const void *data, int data_type, int64_t nindptr, int64_t nelem, int64_t num_col, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset in CSR format. This method re-uses the internal predictor structure from previous calls and is optimized for single row invocation.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
indptr – Pointer to row headers
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nindptr – Number of rows in the matrix + 1
nelem – Number of nonzero elements in the matrix
num_col – Number of columns
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle, const void *indptr, const int indptr_type, const int32_t *indices, const void *data, const int64_t nindptr, const int64_t nelem, int64_t *out_len, double *out_result)
Faster variant of
LGBM_BoosterPredictForCSRSingleRow
.Score single rows after setup with
LGBM_BoosterPredictForCSRSingleRowFastInit
.By removing the setup steps from this call extra optimizations can be made like initializing the config only once, instead of once per call.
Note
Setting up the number of threads is only done once at
LGBM_BoosterPredictForCSRSingleRowFastInit
instead of at each prediction. If you use a different number of threads in other calls, you need to start the setup process over, or that number of threads will be used for these calls as well.Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
fastConfig_handle – FastConfig object handle returned by
LGBM_BoosterPredictForCSRSingleRowFastInit
indptr – Pointer to row headers
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices
data – Pointer to the data space
nindptr – Number of rows in the matrix + 1
nelem – Number of nonzero elements in the matrix
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle, const int predict_type, const int start_iteration, const int num_iteration, const int data_type, const int64_t num_col, const char *parameter, FastConfigHandle *out_fastConfig)
Initialize and return a
FastConfigHandle
for use withLGBM_BoosterPredictForCSRSingleRowFast
.Release the
FastConfig
by passing its handle toLGBM_FastConfigFree
when no longer needed.- Parameters:
handle – Booster handle
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
num_col – Number of columns
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_fastConfig – [out] FastConfig object with which you can call
LGBM_BoosterPredictForCSRSingleRowFast
- Returns:
0 when it succeeds, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForFile(BoosterHandle handle, const char *data_filename, int data_has_header, int predict_type, int start_iteration, int num_iteration, const char *parameter, const char *result_filename)
Make prediction for file.
- Parameters:
handle – Handle of booster
data_filename – Filename of file with data
data_has_header – Whether file has header or not
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
result_filename – Filename of result file in which predictions will be written
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle, const void *data, int data_type, int32_t nrow, int32_t ncol, int is_row_major, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nrow – Number of rows
ncol – Number of columns
is_row_major – 1 for row-major, 0 for column-major
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iteration for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle, const void **data, int data_type, int32_t nrow, int32_t ncol, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset presented in a form of array of pointers to rows.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nrow – Number of rows
ncol – Number columns
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iteration for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, const void *data, int data_type, int ncol, int is_row_major, int predict_type, int start_iteration, int num_iteration, const char *parameter, int64_t *out_len, double *out_result)
Make prediction for a new dataset. This method re-uses the internal predictor structure from previous calls and is optimized for single row invocation.
Note
You should pre-allocate memory for
out_result
:for normal and raw score, its length is equal to
num_class * num_data
;for leaf index, its length is equal to
num_class * num_data * num_iteration
;for feature contributions, its length is equal to
num_class * num_data * (num_feature + 1)
.
- Parameters:
handle – Handle of booster
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
ncol – Number columns
is_row_major – 1 for row-major, 0 for column-major
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iteration for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fastConfig_handle, const void *data, int64_t *out_len, double *out_result)
Faster variant of
LGBM_BoosterPredictForMatSingleRow
.Score a single row after setup with
LGBM_BoosterPredictForMatSingleRowFastInit
.By removing the setup steps from this call extra optimizations can be made like initializing the config only once, instead of once per call.
Note
Setting up the number of threads is only done once at
LGBM_BoosterPredictForMatSingleRowFastInit
instead of at each prediction. If you use a different number of threads in other calls, you need to start the setup process over, or that number of threads will be used for these calls as well.- Parameters:
fastConfig_handle – FastConfig object handle returned by
LGBM_BoosterPredictForMatSingleRowFastInit
data – Single-row array data (no other way than row-major form).
out_len – [out] Length of output result
out_result – [out] Pointer to array with predictions
- Returns:
0 when it succeeds, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle, const int predict_type, const int start_iteration, const int num_iteration, const int data_type, const int32_t ncol, const char *parameter, FastConfigHandle *out_fastConfig)
Initialize and return a
FastConfigHandle
for use withLGBM_BoosterPredictForMatSingleRowFast
.Release the
FastConfig
by passing its handle toLGBM_FastConfigFree
when no longer needed.- Parameters:
handle – Booster handle
predict_type – What should be predicted
C_API_PREDICT_NORMAL
: normal prediction, with transform (if needed);C_API_PREDICT_RAW_SCORE
: raw score;C_API_PREDICT_LEAF_INDEX
: leaf index;C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
ncol – Number of columns
parameter – Other parameters for prediction, e.g. early stopping for prediction
out_fastConfig – [out] FastConfig object with which you can call
LGBM_BoosterPredictForMatSingleRowFast
- Returns:
0 when it succeeds, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictSparseOutput(BoosterHandle handle, const void *indptr, int indptr_type, const int32_t *indices, const void *data, int data_type, int64_t nindptr, int64_t nelem, int64_t num_col_or_row, int predict_type, int start_iteration, int num_iteration, const char *parameter, int matrix_type, int64_t *out_len, void **out_indptr, int32_t **out_indices, void **out_data)
Make sparse prediction for a new dataset in CSR or CSC format. Currently only used for feature contributions.
Note
The outputs are pre-allocated, as they can vary for each invocation, but the shape should be the same:
for feature contributions, the shape of sparse matrix will be
num_class * num_data * (num_feature + 1)
. The output indptr_type for the sparse matrix will be the same as the given input indptr_type. CallLGBM_BoosterFreePredictSparse
to deallocate resources.
- Parameters:
handle – Handle of booster
indptr – Pointer to row headers for CSR or column headers for CSC
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices for CSR or row indices for CSC
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nindptr – Number of entries in
indptr
nelem – Number of nonzero elements in the matrix
num_col_or_row – Number of columns for CSR or number of rows for CSC
predict_type – What should be predicted, only feature contributions supported currently
C_API_PREDICT_CONTRIB
: feature contributions (SHAP values)
start_iteration – Start index of the iteration to predict
num_iteration – Number of iterations for prediction, <= 0 means no limit
parameter – Other parameters for prediction, e.g. early stopping for prediction
matrix_type – Type of matrix input and output, can be
C_API_MATRIX_TYPE_CSR
orC_API_MATRIX_TYPE_CSC
out_len – [out] Length of output data and output indptr (pointer to an array with two entries where to write them)
out_indptr – [out] Pointer to output row headers for CSR or column headers for CSC
out_indices – [out] Pointer to sparse column indices for CSR or row indices for CSC
out_data – [out] Pointer to sparse data space
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterRefit(BoosterHandle handle, const int32_t *leaf_preds, int32_t nrow, int32_t ncol)
Refit the tree model using the new data (online learning).
- Parameters:
handle – Handle of booster
leaf_preds – Pointer to predicted leaf indices
nrow – Number of rows of
leaf_preds
ncol – Number of columns of
leaf_preds
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterResetParameter(BoosterHandle handle, const char *parameters)
Reset config for booster.
- Parameters:
handle – Handle of booster
parameters – Parameters in format ‘key1=value1 key2=value2’
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterResetTrainingData(BoosterHandle handle, const DatasetHandle train_data)
Reset training data for booster.
- Parameters:
handle – Handle of booster
train_data – Training dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterRollbackOneIter(BoosterHandle handle)
Rollback one iteration.
- Parameters:
handle – Handle of booster
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterSaveModel(BoosterHandle handle, int start_iteration, int num_iteration, int feature_importance_type, const char *filename)
Save model into file.
- Parameters:
handle – Handle of booster
start_iteration – Start index of the iteration that should be saved
num_iteration – Index of the iteration that should be saved, <= 0 means save all
feature_importance_type – Type of feature importance, can be
C_API_FEATURE_IMPORTANCE_SPLIT
orC_API_FEATURE_IMPORTANCE_GAIN
filename – The name of the file
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterSaveModelToString(BoosterHandle handle, int start_iteration, int num_iteration, int feature_importance_type, int64_t buffer_len, int64_t *out_len, char *out_str)
Save model to string.
- Parameters:
handle – Handle of booster
start_iteration – Start index of the iteration that should be saved
num_iteration – Index of the iteration that should be saved, <= 0 means save all
feature_importance_type – Type of feature importance, can be
C_API_FEATURE_IMPORTANCE_SPLIT
orC_API_FEATURE_IMPORTANCE_GAIN
buffer_len – String buffer length, if
buffer_len < out_len
, you should re-allocate bufferout_len – [out] Actual output length
out_str – [out] String of model, should pre-allocate memory
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterSetLeafValue(BoosterHandle handle, int tree_idx, int leaf_idx, double val)
Set leaf value.
- Parameters:
handle – Handle of booster
tree_idx – Index of tree
leaf_idx – Index of leaf
val – Leaf value
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterShuffleModels(BoosterHandle handle, int start_iter, int end_iter)
Shuffle models.
- Parameters:
handle – Handle of booster
start_iter – The first iteration that will be shuffled
end_iter – The last iteration that will be shuffled
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int *is_finished)
Update the model for one iteration.
- Parameters:
handle – Handle of booster
is_finished – [out] 1 means the update was successfully finished (cannot split any more), 0 indicates failure
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle, const float *grad, const float *hess, int *is_finished)
Update the model by specifying gradient and Hessian directly (this can be used to support customized loss functions).
Note
The length of the arrays referenced by
grad
andhess
must be equal tonum_class * num_train_data
, this is not verified by the library, the caller must ensure this.- Parameters:
handle – Handle of booster
grad – The first order derivative (gradient) statistics
hess – The second order derivative (Hessian) statistics
is_finished – [out] 1 means the update was successfully finished (cannot split any more), 0 indicates failure
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_BoosterValidateFeatureNames(BoosterHandle handle, const char **data_names, int data_num_features)
Check that the feature names of the data match the ones used to train the booster.
- Parameters:
handle – Handle of booster
data_names – Array with the feature names in the data
data_num_features – Number of features in the data
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_ByteBufferFree(ByteBufferHandle handle)
Free space for byte buffer.
- Parameters:
handle – Handle of byte buffer to be freed
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_ByteBufferGetAt(ByteBufferHandle handle, int32_t index, uint8_t *out_val)
Get a ByteBuffer value at an index.
- Parameters:
handle – Handle of byte buffer to be read
index – Index of value to return
out_val – [out] Byte value at index to return
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetAddFeaturesFrom(DatasetHandle target, DatasetHandle source)
Add features from
source
totarget
.- Parameters:
target – The handle of the dataset to add features to
source – The handle of the dataset to take features from
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateByReference(const DatasetHandle reference, int64_t num_total_row, DatasetHandle *out)
Allocate the space for dataset and bucket feature bins according to reference dataset.
- Parameters:
reference – Used to align bin mapper with other dataset
num_total_row – Number of total rows
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromArrow(int64_t n_chunks, const ArrowArray *chunks, const ArrowSchema *schema, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Create dataset from Arrow.
- Parameters:
n_chunks – The number of Arrow arrays passed to this function
chunks – Pointer to the list of Arrow arrays
schema – Pointer to the schema of all Arrow arrays
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSC(const void *col_ptr, int col_ptr_type, const int32_t *indices, const void *data, int data_type, int64_t ncol_ptr, int64_t nelem, int64_t num_row, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Create a dataset from CSC format.
- Parameters:
col_ptr – Pointer to column headers
col_ptr_type – Type of
col_ptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to row indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
ncol_ptr – Number of columns in the matrix + 1
nelem – Number of nonzero elements in the matrix
num_row – Number of rows
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSR(const void *indptr, int indptr_type, const int32_t *indices, const void *data, int data_type, int64_t nindptr, int64_t nelem, int64_t num_col, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Create a dataset from CSR format.
- Parameters:
indptr – Pointer to row headers
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nindptr – Number of rows in the matrix + 1
nelem – Number of nonzero elements in the matrix
num_col – Number of columns
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSRFunc(void *get_row_funptr, int num_rows, int64_t num_col, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Create a dataset from CSR format through callbacks.
- Parameters:
get_row_funptr – Pointer to
std::function<void(int idx, std::vector<std::pair<int, double>>& ret)>
(called for every row and expected to clear and fillret
)num_rows – Number of rows
num_col – Number of columns
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromFile(const char *filename, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Load dataset from file (like LightGBM CLI version does).
- Parameters:
filename – The name of the file
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] A loaded dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromMat(const void *data, int data_type, int32_t nrow, int32_t ncol, int is_row_major, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Create dataset from dense matrix.
- Parameters:
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nrow – Number of rows
ncol – Number of columns
is_row_major – 1 for row-major, 0 for column-major
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromMats(int32_t nmat, const void **data, int data_type, int32_t *nrow, int32_t ncol, int is_row_major, const char *parameters, const DatasetHandle reference, DatasetHandle *out)
Create dataset from array of dense matrices.
- Parameters:
nmat – Number of dense matrices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nrow – Number of rows
ncol – Number of columns
is_row_major – 1 for row-major, 0 for column-major
parameters – Additional parameters
reference – Used to align bin mapper with other dataset, nullptr means isn’t used
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromSampledColumn(double **sample_data, int **sample_indices, int32_t ncol, const int *num_per_col, int32_t num_sample_row, int32_t num_local_row, int64_t num_dist_row, const char *parameters, DatasetHandle *out)
Allocate the space for dataset and bucket feature bins according to sampled data.
- Parameters:
sample_data – Sampled data, grouped by the column
sample_indices – Indices of sampled data
ncol – Number of columns
num_per_col – Size of each sampling column
num_sample_row – Number of sampled rows
num_local_row – Total number of rows local to machine
num_dist_row – Number of total distributed rows
parameters – Additional parameters
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromSerializedReference(const void *ref_buffer, int32_t ref_buffer_size, int64_t num_row, int32_t num_classes, const char *parameters, DatasetHandle *out)
Allocate the space for dataset and bucket feature bins according to serialized reference dataset.
- Parameters:
ref_buffer – A binary representation of the dataset schema (feature groups, bins, etc.)
ref_buffer_size – The size of the reference array in bytes
num_row – Number of total rows the dataset will contain
num_classes – Number of classes (will be used only in case of multiclass and specifying initial scores)
parameters – Additional parameters
out – [out] Created dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetDumpText(DatasetHandle handle, const char *filename)
Save dataset to text file, intended for debugging use only.
- Parameters:
handle – Handle of dataset
filename – The name of the file
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetFree(DatasetHandle handle)
Free space for dataset.
- Parameters:
handle – Handle of dataset to be freed
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetGetFeatureNames(DatasetHandle handle, const int len, int *num_feature_names, const size_t buffer_len, size_t *out_buffer_len, char **feature_names)
Get feature names of dataset.
- Parameters:
handle – Handle of dataset
len – Number of
char*
pointers stored atout_strs
. If smaller than the max size, only this many strings are copiednum_feature_names – [out] Number of feature names
buffer_len – Size of pre-allocated strings. Content is copied up to
buffer_len - 1
and null-terminatedout_buffer_len – [out] String sizes required to do the full string copies
feature_names – [out] Feature names, should pre-allocate memory
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetGetFeatureNumBin(DatasetHandle handle, int feature, int *out)
Get number of bins for feature.
- Parameters:
handle – Handle of dataset
feature – Index of the feature
out – [out] The address to hold number of bins
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetGetField(DatasetHandle handle, const char *field_name, int *out_len, const void **out_ptr, int *out_type)
Get info vector from dataset.
- Parameters:
handle – Handle of dataset
field_name – Field name
out_len – [out] Used to set result length
out_ptr – [out] Pointer to the result
out_type – [out] Type of result pointer, can be
C_API_DTYPE_INT32
,C_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetGetNumData(DatasetHandle handle, int *out)
Get number of data points.
- Parameters:
handle – Handle of dataset
out – [out] The address to hold number of data points
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetGetNumFeature(DatasetHandle handle, int *out)
Get number of features.
- Parameters:
handle – Handle of dataset
out – [out] The address to hold number of features
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetGetSubset(const DatasetHandle handle, const int32_t *used_row_indices, int32_t num_used_row_indices, const char *parameters, DatasetHandle *out)
Create subset of a data.
- Parameters:
handle – Handle of full dataset
used_row_indices – Indices used in subset
num_used_row_indices – Length of
used_row_indices
parameters – Additional parameters
out – [out] Subset of data
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetInitStreaming(DatasetHandle dataset, int32_t has_weights, int32_t has_init_scores, int32_t has_queries, int32_t nclasses, int32_t nthreads, int32_t omp_max_threads)
Initialize the Dataset for streaming.
- Parameters:
dataset – Handle of dataset
has_weights – Whether the dataset has Metadata weights
has_init_scores – Whether the dataset has Metadata initial scores
has_queries – Whether the dataset has Metadata queries/groups
nclasses – Number of initial score classes
nthreads – Number of external threads that will use the PushRows APIs
omp_max_threads – Maximum number of OpenMP threads (-1 for default)
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetMarkFinished(DatasetHandle dataset)
Mark the Dataset as complete by calling
dataset->FinishLoad
.- Parameters:
dataset – Handle of dataset
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRows(DatasetHandle dataset, const void *data, int data_type, int32_t nrow, int32_t ncol, int32_t start_row)
Push data to existing dataset, if
nrow + start_row == num_total_row
, will calldataset->FinishLoad
.- Parameters:
dataset – Handle of dataset
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nrow – Number of rows
ncol – Number of columns
start_row – Row start index
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset, const void *indptr, int indptr_type, const int32_t *indices, const void *data, int data_type, int64_t nindptr, int64_t nelem, int64_t num_col, int64_t start_row)
Push data to existing dataset, if
nrow + start_row == num_total_row
, will calldataset->FinishLoad
.- Parameters:
dataset – Handle of dataset
indptr – Pointer to row headers
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nindptr – Number of rows in the matrix + 1
nelem – Number of nonzero elements in the matrix
num_col – Number of columns
start_row – Row start index
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRowsByCSRWithMetadata(DatasetHandle dataset, const void *indptr, int indptr_type, const int32_t *indices, const void *data, int data_type, int64_t nindptr, int64_t nelem, int64_t start_row, const float *label, const float *weight, const double *init_score, const int32_t *query, int32_t tid)
Push CSR data to existing dataset. (See
LGBM_DatasetPushRowsWithMetadata
for more details.)- Parameters:
dataset – Handle of dataset
indptr – Pointer to row headers
indptr_type – Type of
indptr
, can beC_API_DTYPE_INT32
orC_API_DTYPE_INT64
indices – Pointer to column indices
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nindptr – Number of rows in the matrix + 1
nelem – Number of nonzero elements in the matrix
start_row – Row start index
label – Pointer to array with nindptr-1 labels
weight – Optional pointer to array with nindptr-1 weights
init_score – Optional pointer to array with (nindptr-1)*nclasses initial scores, in column format
query – Optional pointer to array with nindptr-1 query values
tid – The id of the calling thread, from 0…N-1 threads
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRowsWithMetadata(DatasetHandle dataset, const void *data, int data_type, int32_t nrow, int32_t ncol, int32_t start_row, const float *label, const float *weight, const double *init_score, const int32_t *query, int32_t tid)
Push data to existing dataset. The general flow for a streaming scenario is:
create Dataset “schema” (e.g.
LGBM_DatasetCreateFromSampledColumn
)init them for thread-safe streaming (
LGBM_DatasetInitStreaming
)push data (
LGBM_DatasetPushRowsWithMetadata
orLGBM_DatasetPushRowsByCSRWithMetadata
)call
LGBM_DatasetMarkFinished
- Parameters:
dataset – Handle of dataset
data – Pointer to the data space
data_type – Type of
data
pointer, can beC_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
nrow – Number of rows
ncol – Number of feature columns
start_row – Row start index, i.e., the index at which to start inserting data
label – Pointer to array with nrow labels
weight – Optional pointer to array with nrow weights
init_score – Optional pointer to array with nrow*nclasses initial scores, in column format
query – Optional pointer to array with nrow query values
tid – The id of the calling thread, from 0…N-1 threads
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetSaveBinary(DatasetHandle handle, const char *filename)
Save dataset to binary file.
- Parameters:
handle – Handle of dataset
filename – The name of the file
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetSerializeReferenceToBinary(DatasetHandle handle, ByteBufferHandle *out, int32_t *out_len)
Create a dataset schema representation as a binary byte array (excluding data).
- Parameters:
handle – Handle of dataset
out – [out] The output byte array
out_len – [out] The length of the output byte array (returned for convenience)
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetSetFeatureNames(DatasetHandle handle, const char **feature_names, int num_feature_names)
Save feature names to dataset.
- Parameters:
handle – Handle of dataset
feature_names – Feature names
num_feature_names – Number of feature names
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetSetField(DatasetHandle handle, const char *field_name, const void *field_data, int num_element, int type)
Set vector to a content in info.
Note
group only works for
C_API_DTYPE_INT32
;label and weight only work for
C_API_DTYPE_FLOAT32
;init_score only works for
C_API_DTYPE_FLOAT64
.
- Parameters:
handle – Handle of dataset
field_name – Field name, can be label, weight, init_score, group
field_data – Pointer to data vector
num_element – Number of elements in
field_data
type – Type of
field_data
pointer, can beC_API_DTYPE_INT32
,C_API_DTYPE_FLOAT32
orC_API_DTYPE_FLOAT64
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetSetFieldFromArrow(DatasetHandle handle, const char *field_name, int64_t n_chunks, const ArrowArray *chunks, const ArrowSchema *schema)
Set vector to a content in info.
Note
group converts input datatype into
int32
;label and weight convert input datatype into
float32
;init_score converts input datatype into
float64
.
- Parameters:
handle – Handle of dataset
field_name – Field name, can be label, weight, init_score, group
n_chunks – The number of Arrow arrays passed to this function
chunks – Pointer to the list of Arrow arrays
schema – Pointer to the schema of all Arrow arrays
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetSetWaitForManualFinish(DatasetHandle dataset, int wait)
Set whether or not the Dataset waits for a manual MarkFinished call or calls FinishLoad on itself automatically. Set to 1 for streaming scenario, and use
LGBM_DatasetMarkFinished
to manually finish the Dataset.- Parameters:
dataset – Handle of dataset
wait – Whether to wait or not (1 or 0)
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DatasetUpdateParamChecking(const char *old_parameters, const char *new_parameters)
Raise errors for attempts to update dataset parameters.
- Parameters:
old_parameters – Current dataset parameters
new_parameters – New dataset parameters
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len, int64_t *out_len, char *out_str)
Dump all parameter names with their aliases to JSON.
- Parameters:
buffer_len – String buffer length, if
buffer_len < out_len
, you should re-allocate bufferout_len – [out] Actual output length
out_str – [out] JSON format string of parameters, should pre-allocate memory
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_FastConfigFree(FastConfigHandle fastConfig)
Release FastConfig object.
- Parameters:
fastConfig – Handle to the FastConfig object acquired with a
*FastInit()
method.
- Returns:
0 when it succeeds, -1 when failure happens
-
LIGHTGBM_C_EXPORT const char *LGBM_GetLastError()
Get string message of the last error.
- Returns:
Error information
-
LIGHTGBM_C_EXPORT int LGBM_GetMaxThreads(int *out)
Get current maximum number of threads used by LightGBM routines in this process.
- Parameters:
out – [out] current maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_GetSampleCount(int32_t num_total_row, const char *parameters, int *out)
Get number of samples based on parameters and total number of rows of data.
- Parameters:
num_total_row – Number of total rows
parameters – Additional parameters, namely,
bin_construct_sample_cnt
is used to calculate returned valueout – [out] Number of samples. This value is used to pre-allocate memory to hold sample indices when calling
LGBM_SampleIndices
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_NetworkFree()
Finalize the network.
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_NetworkInit(const char *machines, int local_listen_port, int listen_time_out, int num_machines)
Initialize the network.
- Parameters:
machines – List of machines in format ‘ip1:port1,ip2:port2’
local_listen_port – TCP listen port for local machines
listen_time_out – Socket time-out in minutes
num_machines – Total number of machines
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_NetworkInitWithFunctions(int num_machines, int rank, void *reduce_scatter_ext_fun, void *allgather_ext_fun)
Initialize the network with external collective functions.
- Parameters:
num_machines – Total number of machines
rank – Rank of local machine
reduce_scatter_ext_fun – The external reduce-scatter function
allgather_ext_fun – The external allgather function
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_RegisterLogCallback(void (*callback)(const char*))
Register a callback function for log redirecting.
- Parameters:
callback – The callback function to register
- Returns:
0 when succeed, -1 when failure happens
-
LIGHTGBM_C_EXPORT int LGBM_SampleIndices(int32_t num_total_row, const char *parameters, void *out, int32_t *out_len)
Create sample indices for total number of rows.
Note
You should pre-allocate memory for
out
, you can get its length byLGBM_GetSampleCount
.- Parameters:
num_total_row – Number of total rows
parameters – Additional parameters, namely,
bin_construct_sample_cnt
anddata_random_seed
are used to produce the outputout – [out] Created indices, type is int32_t
out_len – [out] Number of indices
- Returns:
0 when succeed, -1 when failure happens
-
inline void LGBM_SetLastError(const char *msg)
Set string message of the last error.
Note
This will call unsafe
sprintf
when compiled using C standards before C99.- Parameters:
msg – Error message
-
LIGHTGBM_C_EXPORT int LGBM_SetMaxThreads(int num_threads)
Set maximum number of threads used by LightGBM routines in this process.
- Parameters:
num_threads – maximum number of threads used by LightGBM. -1 means defaulting to omp_get_num_threads().
- Returns:
0 when succeed, -1 when failure happens