Commit c454d419 authored by lisj's avatar lisj
Browse files

删除子模块的gitignore

parent 3359c1f1
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke, Sasikanth Avancha (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_DNN_FUSEDGROUPNORM_H
#define LIBXSMM_DNN_FUSEDGROUPNORM_H
#include "libxsmm_dnn.h"
#include "libxsmm_dnn_tensor.h"
/** Opaque handles which represents LIBXSMM fusedgroupnorm */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_fusedgroupnorm libxsmm_dnn_fusedgroupnorm;
LIBXSMM_API libxsmm_dnn_fusedgroupnorm* libxsmm_dnn_create_fusedgroupnorm(libxsmm_dnn_fusedgroupnorm_desc fusedgroupnorm_desc, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_fusedgroupnorm(const libxsmm_dnn_fusedgroupnorm* handle);
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_fusedgroupnorm_create_tensor_datalayout(const libxsmm_dnn_fusedgroupnorm* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API size_t libxsmm_dnn_fusedgroupnorm_get_scratch_size(const libxsmm_dnn_fusedgroupnorm* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_fusedgroupnorm_bind_scratch(libxsmm_dnn_fusedgroupnorm* handle, const void* scratch);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_fusedgroupnorm_release_scratch(libxsmm_dnn_fusedgroupnorm* handle);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_fusedgroupnorm_bind_tensor(libxsmm_dnn_fusedgroupnorm* handle, const libxsmm_dnn_tensor* tensor, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_fusedgroupnorm_get_tensor(libxsmm_dnn_fusedgroupnorm* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_fusedgroupnorm_release_tensor(libxsmm_dnn_fusedgroupnorm* handle, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_fusedgroupnorm_execute_st(libxsmm_dnn_fusedgroupnorm* handle, libxsmm_dnn_compute_kind kind,
/*unsigned*/int start_thread, /*unsigned*/int tid);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_fusedgroupnorm_reduce_stats_st(libxsmm_dnn_fusedgroupnorm** handles, int num_handles, libxsmm_dnn_compute_kind kind,
/*unsigned*/int start_thread, /*unsigned*/int tid);
#endif /*LIBXSMM_DNN_FUSEDGROUPNORM_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke, Sasikanth Avancha (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_DNN_SGD_H
#define LIBXSMM_DNN_SGD_H
#include "libxsmm_dnn.h"
#include "libxsmm_dnn_tensor.h"
/** Opaque handles which represents LIBXSMM optimizer */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_optimizer libxsmm_dnn_optimizer;
typedef enum libxsmm_dnn_optimizer_type {
LIBXSMM_DNN_OPTIMIZER_SGD = 1
} libxsmm_dnn_optimizer_type;
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_optimizer_desc {
int C; /* number of feature maps */
int K; /* number of feature maps */
int bc;
int bk;
float learning_rate; /* learning rate */
int threads; /* number of threads used */
libxsmm_dnn_optimizer_type opt_type;
libxsmm_dnn_datatype datatype_master; /* datatype used for all input related buffers */
libxsmm_dnn_datatype datatype; /* datatype used for all input related buffers */
libxsmm_dnn_tensor_format filter_format; /* format which is for filter buffers */
} libxsmm_dnn_optimizer_desc;
LIBXSMM_API libxsmm_dnn_optimizer* libxsmm_dnn_create_optimizer(libxsmm_dnn_optimizer_desc optimizer_desc, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_optimizer(const libxsmm_dnn_optimizer* handle);
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_optimizer_create_tensor_datalayout(const libxsmm_dnn_optimizer* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API void* libxsmm_dnn_optimizer_get_scratch_ptr (const libxsmm_dnn_optimizer* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API size_t libxsmm_dnn_optimizer_get_scratch_size(const libxsmm_dnn_optimizer* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_optimizer_bind_scratch(libxsmm_dnn_optimizer* handle, const void* scratch);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_optimizer_release_scratch(libxsmm_dnn_optimizer* handle);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_optimizer_bind_tensor(libxsmm_dnn_optimizer* handle, const libxsmm_dnn_tensor* tensor, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_optimizer_get_tensor(libxsmm_dnn_optimizer* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_optimizer_release_tensor(libxsmm_dnn_optimizer* handle, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_optimizer_execute_st(libxsmm_dnn_optimizer* handle, /*unsigned*/int start_thread, /*unsigned*/int tid);
#endif /*LIBXSMM_DNN_SGD_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke, Sasikanth Avancha (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_DNN_POOLING_H
#define LIBXSMM_DNN_POOLING_H
#include "libxsmm_dnn.h"
#include "libxsmm_dnn_tensor.h"
/** Opaque handles which represents LIBXSMM pooling */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_pooling libxsmm_dnn_pooling;
typedef enum libxsmm_dnn_pooling_type {
LIBXSMM_DNN_POOLING_MAX = 1,
LIBXSMM_DNN_POOLING_AVG = 2
} libxsmm_dnn_pooling_type;
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_pooling_desc {
int N; /* number of images in mini-batch */
int C; /* number of input feature maps */
int H; /* height of input image */
int W; /* width of input image */
int R; /* kernel height */
int S; /* kernel width */
int u; /* vertical stride */
int v; /* horizontal stride */
int pad_h; /* height of logical padding of input buffer */
int pad_w; /* width of logical padding of input buffer */
int pad_h_in; /* height of physical zero-padding in input buffer */
int pad_w_in; /* width of physical zero-padding in input buffer */
int pad_h_out; /* height of physical zero-padding in output buffer */
int pad_w_out; /* width of physical zero-padding in output buffer */
int threads; /* number of threads used */
libxsmm_dnn_datatype datatype_in; /* datatypes used for all input related buffer */
libxsmm_dnn_datatype datatype_out; /* datatypes used for all output related buffer */
libxsmm_dnn_datatype datatype_mask; /* datatypes used for the masks */
libxsmm_dnn_tensor_format buffer_format; /* format which is for activation buffers */
libxsmm_dnn_pooling_type pooling_type; /* type of pooling operation */
} libxsmm_dnn_pooling_desc;
LIBXSMM_API libxsmm_dnn_pooling* libxsmm_dnn_create_pooling(libxsmm_dnn_pooling_desc pooling_desc, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_pooling(const libxsmm_dnn_pooling* handle);
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_pooling_create_tensor_datalayout(const libxsmm_dnn_pooling* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API size_t libxsmm_dnn_pooling_get_scratch_size(const libxsmm_dnn_pooling* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_pooling_bind_scratch(libxsmm_dnn_pooling* handle, const void* scratch);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_pooling_release_scratch(libxsmm_dnn_pooling* handle);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_pooling_bind_tensor(libxsmm_dnn_pooling* handle, const libxsmm_dnn_tensor* tensor, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_pooling_get_tensor(libxsmm_dnn_pooling* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_pooling_release_tensor(libxsmm_dnn_pooling* handle, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_pooling_execute_st(libxsmm_dnn_pooling* handle, libxsmm_dnn_compute_kind kind,
/*unsigned*/int start_thread, /*unsigned*/int tid);
#endif /*LIBXSMM_DNN_POOLING_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke, Kunal Banerjee (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_DNN_RNNCELL_H
#define LIBXSMM_DNN_RNNCELL_H
#include "libxsmm_dnn.h"
#include "libxsmm_dnn_tensor.h"
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_rnncell libxsmm_dnn_rnncell;
/** Type of algorithm used for convolutions. */
typedef enum libxsmm_dnn_rnncell_type {
/** simple RNN cell with ReLU as activation function */
LIBXSMM_DNN_RNNCELL_RNN_RELU,
/** simple RNN cell with sigmoid as activation function */
LIBXSMM_DNN_RNNCELL_RNN_SIGMOID,
/** simple RNN cell with tanh as activation function */
LIBXSMM_DNN_RNNCELL_RNN_TANH,
/** LSTM cell */
LIBXSMM_DNN_RNNCELL_LSTM,
/** GRU cell */
LIBXSMM_DNN_RNNCELL_GRU
} libxsmm_dnn_rnncell_type;
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_rnncell_desc {
int threads;
libxsmm_blasint K; /* number of outputs */
libxsmm_blasint N; /* size of the minibatch */
libxsmm_blasint C; /* number of inputs */
libxsmm_blasint max_T; /* number of time steps */
libxsmm_blasint bk;
libxsmm_blasint bn;
libxsmm_blasint bc;
int use_fwd_fused_impl;
int fwd_block;
int bwdupd_block;
libxsmm_dnn_rnncell_type cell_type; /* cell type RNN ReLU, RNN Sigmoid, RNN Tanh, LSTM, GRU */
libxsmm_dnn_datatype datatype_in; /* datatypes used for all input related buffer */
libxsmm_dnn_datatype datatype_out; /* datatypes used for all output related buffer */
libxsmm_dnn_tensor_format buffer_format; /* format which is for activation buffers */
libxsmm_dnn_tensor_format filter_format; /* format which is for filter buffers */
} libxsmm_dnn_rnncell_desc;
LIBXSMM_API libxsmm_dnn_rnncell* libxsmm_dnn_create_rnncell(libxsmm_dnn_rnncell_desc rnncell_desc, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_rnncell(const libxsmm_dnn_rnncell* handle);
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_rnncell_create_tensor_datalayout(const libxsmm_dnn_rnncell* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API size_t libxsmm_dnn_rnncell_get_scratch_size(const libxsmm_dnn_rnncell* handle, const libxsmm_dnn_compute_kind kind, libxsmm_dnn_err_t* status);
LIBXSMM_API void* libxsmm_dnn_rnncell_get_scratch_ptr (const libxsmm_dnn_rnncell* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_bind_scratch(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_compute_kind kind, const void* scratch);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_release_scratch(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_compute_kind kind);
LIBXSMM_API size_t libxsmm_dnn_rnncell_get_internalstate_size(const libxsmm_dnn_rnncell* handle, const libxsmm_dnn_compute_kind kind, libxsmm_dnn_err_t* status);
LIBXSMM_API void* libxsmm_dnn_rnncell_get_internalstate_ptr (const libxsmm_dnn_rnncell* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_bind_internalstate(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_compute_kind kind, const void* internalstate);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_release_internalstate(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_compute_kind kind);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_allocate_forget_bias(libxsmm_dnn_rnncell* handle, const float forget_bias);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_bind_tensor(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_tensor* tensor, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_rnncell_get_tensor(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_release_tensor(libxsmm_dnn_rnncell* handle, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_set_sequence_length( libxsmm_dnn_rnncell* handle, const libxsmm_blasint T );
LIBXSMM_API libxsmm_blasint libxsmm_dnn_rnncell_get_sequence_length( libxsmm_dnn_rnncell* handle, libxsmm_dnn_err_t* status );
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_rnncell_execute_st(libxsmm_dnn_rnncell* handle, libxsmm_dnn_compute_kind kind,
/*unsigned*/int start_thread, /*unsigned*/int tid);
#endif /*LIBXSMM_DNN_RNNCELL_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke, Sasikanth Avancha (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_DNN_SOFTMAXLOSS_H
#define LIBXSMM_DNN_SOFTMAXLOSS_H
#include "libxsmm_dnn.h"
#include "libxsmm_dnn_tensor.h"
/** Opaque handles which represents LIBXSMM softmaxloss */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_softmaxloss libxsmm_dnn_softmaxloss;
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_softmaxloss_desc {
int N; /* number of images in mini-batch */
int C; /* number of input feature maps */
int bn; /* requested N blocking for NCNC format */
int bc; /* requested C blocking for NCNC format */
float loss_weight; /* loss weight */
int threads; /* number of threads used */
libxsmm_dnn_datatype datatype; /* datatype used for all buffers */
libxsmm_dnn_tensor_format buffer_format; /* format which is for activation buffers */
} libxsmm_dnn_softmaxloss_desc;
LIBXSMM_API libxsmm_dnn_softmaxloss* libxsmm_dnn_create_softmaxloss(libxsmm_dnn_softmaxloss_desc softmaxloss_desc, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_softmaxloss(const libxsmm_dnn_softmaxloss* handle);
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_softmaxloss_create_tensor_datalayout(const libxsmm_dnn_softmaxloss* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API void* libxsmm_dnn_softmaxloss_get_scratch_ptr (const libxsmm_dnn_softmaxloss* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API size_t libxsmm_dnn_softmaxloss_get_scratch_size(const libxsmm_dnn_softmaxloss* handle, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_softmaxloss_bind_scratch(libxsmm_dnn_softmaxloss* handle, const void* scratch);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_softmaxloss_release_scratch(libxsmm_dnn_softmaxloss* handle);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_softmaxloss_bind_tensor(libxsmm_dnn_softmaxloss* handle, const libxsmm_dnn_tensor* tensor, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_softmaxloss_get_tensor(libxsmm_dnn_softmaxloss* handle, const libxsmm_dnn_tensor_type type, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_softmaxloss_release_tensor(libxsmm_dnn_softmaxloss* handle, const libxsmm_dnn_tensor_type type);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_softmaxloss_execute_st(libxsmm_dnn_softmaxloss* handle, libxsmm_dnn_compute_kind kind,
/*unsigned*/int start_thread, /*unsigned*/int tid);
LIBXSMM_API float libxsmm_dnn_softmaxloss_get_loss(const libxsmm_dnn_softmaxloss* handle, libxsmm_dnn_err_t* status);
#endif /*LIBXSMM_DNN_SOFTMAXLOSS_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_DNN_TENSOR_H
#define LIBXSMM_DNN_TENSOR_H
#include "libxsmm_typedefs.h"
#include "libxsmm_dnn.h"
/** Opaque handles which represents convolutions and LIBXSMM datatypes */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_tensor libxsmm_dnn_tensor;
typedef enum libxsmm_dnn_tensor_dimtype {
/** Mini-batch */
LIBXSMM_DNN_TENSOR_DIMTYPE_N,
/** Image Height */
LIBXSMM_DNN_TENSOR_DIMTYPE_H,
/** Image Width */
LIBXSMM_DNN_TENSOR_DIMTYPE_W,
/** channels or input channels */
LIBXSMM_DNN_TENSOR_DIMTYPE_C,
/** output channels */
LIBXSMM_DNN_TENSOR_DIMTYPE_K,
/** kernel height */
LIBXSMM_DNN_TENSOR_DIMTYPE_R,
/** kernel width */
LIBXSMM_DNN_TENSOR_DIMTYPE_S,
/** sequence lenth counter */
LIBXSMM_DNN_TENSOR_DIMTYPE_T,
/** channle group counter */
LIBXSMM_DNN_TENSOR_DIMTYPE_G,
/** general counter */
LIBXSMM_DNN_TENSOR_DIMTYPE_X
} libxsmm_dnn_tensor_dimtype;
/** types of different buffers */
typedef enum libxsmm_dnn_tensor_type {
/** regular input buffer */
LIBXSMM_DNN_REGULAR_INPUT,
/** regular input buffer */
LIBXSMM_DNN_REGULAR_INPUT_ADD,
/** regular input buffer, transpose */
LIBXSMM_DNN_REGULAR_INPUT_TRANS,
/** gradient input buffer */
LIBXSMM_DNN_GRADIENT_INPUT,
/** gradient input buffer */
LIBXSMM_DNN_GRADIENT_INPUT_ADD,
/** regular output buffer */
LIBXSMM_DNN_REGULAR_OUTPUT,
/** gradient output buffer */
LIBXSMM_DNN_GRADIENT_OUTPUT,
/** general input type */
LIBXSMM_DNN_INPUT,
/** general output type */
LIBXSMM_DNN_OUTPUT,
/** general activation type */
LIBXSMM_DNN_ACTIVATION,
/* regular filter */
LIBXSMM_DNN_REGULAR_FILTER,
/* regular filter */
LIBXSMM_DNN_REGULAR_FILTER_TRANS,
/* gradient filter */
LIBXSMM_DNN_GRADIENT_FILTER,
/* master filter */
LIBXSMM_DNN_MASTER_FILTER,
/** general filter type */
LIBXSMM_DNN_FILTER,
/* regular bias */
LIBXSMM_DNN_REGULAR_CHANNEL_BIAS,
/* gradient bias */
LIBXSMM_DNN_GRADIENT_CHANNEL_BIAS,
/* bias */
LIBXSMM_DNN_CHANNEL_BIAS,
/* regular beta */
LIBXSMM_DNN_REGULAR_CHANNEL_BETA,
/* gradient beta */
LIBXSMM_DNN_GRADIENT_CHANNEL_BETA,
/* beta */
LIBXSMM_DNN_CHANNEL_BETA,
/* regular gamma */
LIBXSMM_DNN_REGULAR_CHANNEL_GAMMA,
/* gradient gamma */
LIBXSMM_DNN_GRADIENT_CHANNEL_GAMMA,
/* Gamma */
LIBXSMM_DNN_CHANNEL_GAMMA,
/* regular beta */
LIBXSMM_DNN_CHANNEL_EXPECTVAL,
/* regular beta */
LIBXSMM_DNN_CHANNEL_RCPSTDDEV,
/* variance */
LIBXSMM_DNN_CHANNEL_VARIANCE,
/** general bias type */
LIBXSMM_DNN_CHANNEL_SCALAR,
/** Labels */
LIBXSMM_DNN_LABEL,
/** batch stats */
LIBXSMM_DNN_BATCH_STATS,
LIBXSMM_DNN_MAX_STATS_FWD,
LIBXSMM_DNN_MAX_STATS_BWD,
LIBXSMM_DNN_MAX_STATS_UPD,
/** pooling mask */
LIBXSMM_DNN_POOLING_MASK,
/** ReLU mask */
LIBXSMM_DNN_RELU_MASK,
/** general type, if needed might cause API issues in copy in/out API */
LIBXSMM_DNN_TENSOR,
/** regular input buffer */
LIBXSMM_DNN_RNN_REGULAR_INPUT,
/** regular previous cell state buffer */
LIBXSMM_DNN_RNN_REGULAR_CS_PREV,
/** regular previous hidden state buffer */
LIBXSMM_DNN_RNN_REGULAR_HIDDEN_STATE_PREV,
/** regular weight (LSTM: wi, wc, wf, wo) */
LIBXSMM_DNN_RNN_REGULAR_WEIGHT,
/** regular recurrent weight (LSTM: ri, rc, rf, ro) */
LIBXSMM_DNN_RNN_REGULAR_RECUR_WEIGHT,
/** regular weight (LSTM: wi, wc, wf, wo) */
LIBXSMM_DNN_RNN_REGULAR_WEIGHT_TRANS,
/** regular recurrent weight (LSTM: ri, rc, rf, ro) */
LIBXSMM_DNN_RNN_REGULAR_RECUR_WEIGHT_TRANS,
/** regular bias (LSTM: bi, bc, bf, bo) */
LIBXSMM_DNN_RNN_REGULAR_BIAS,
/** regular output cell state buffer */
LIBXSMM_DNN_RNN_REGULAR_CS,
/** regular hidden state buffer */
LIBXSMM_DNN_RNN_REGULAR_HIDDEN_STATE,
/** gradient input buffer */
LIBXSMM_DNN_RNN_GRADIENT_INPUT,
/** gradient previous cell state buffer */
LIBXSMM_DNN_RNN_GRADIENT_CS_PREV,
/** gradient previous hidden state buffer */
LIBXSMM_DNN_RNN_GRADIENT_HIDDEN_STATE_PREV,
/** gradient weight */
LIBXSMM_DNN_RNN_GRADIENT_WEIGHT,
/** gradient recurrent weight */
LIBXSMM_DNN_RNN_GRADIENT_RECUR_WEIGHT,
/** gradient bias */
LIBXSMM_DNN_RNN_GRADIENT_BIAS,
/** gradient output cell state buffer */
LIBXSMM_DNN_RNN_GRADIENT_CS,
/** gradient hidden state buffer */
LIBXSMM_DNN_RNN_GRADIENT_HIDDEN_STATE,
/** internal i buffer */
LIBXSMM_DNN_RNN_INTERNAL_I,
/** internal f buffer */
LIBXSMM_DNN_RNN_INTERNAL_F,
/** internal o buffer */
LIBXSMM_DNN_RNN_INTERNAL_O,
/** internal ci buffer */
LIBXSMM_DNN_RNN_INTERNAL_CI,
/** internal co buffer */
LIBXSMM_DNN_RNN_INTERNAL_CO
} libxsmm_dnn_tensor_type;
/** layout descriptor to allow external data handling
outside of LIBXSMM */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dnn_tensor_datalayout {
libxsmm_dnn_tensor_dimtype* dim_type;
unsigned int* dim_size;
unsigned int num_dims;
libxsmm_dnn_tensor_format format; /* format of activation buffer */
libxsmm_dnn_datatype datatype; /* data type */
libxsmm_dnn_tensor_type tensor_type; /* tensor type */
} libxsmm_dnn_tensor_datalayout;
/** tensorlayout handling */
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_duplicate_tensor_datalayout(const libxsmm_dnn_tensor_datalayout* layout, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_tensor_datalayout(libxsmm_dnn_tensor_datalayout* layout);
LIBXSMM_API unsigned int libxsmm_dnn_compare_tensor_datalayout(const libxsmm_dnn_tensor_datalayout* layout_a, const libxsmm_dnn_tensor_datalayout* layout_b, libxsmm_dnn_err_t* status);
LIBXSMM_API unsigned int libxsmm_dnn_get_tensor_size(const libxsmm_dnn_tensor_datalayout* layout, libxsmm_dnn_err_t* status);
LIBXSMM_API unsigned int libxsmm_dnn_get_tensor_elements(const libxsmm_dnn_tensor_datalayout* layout, libxsmm_dnn_err_t* status);
/** Create and manage buffers, filters and bias (non-NULL if successful) */
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_link_tensor(const libxsmm_dnn_tensor_datalayout* layout, const void* data, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_tensor* libxsmm_dnn_link_qtensor(const libxsmm_dnn_tensor_datalayout* layout, const void* data, const unsigned char exp, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_set_tensor_data_ptr(libxsmm_dnn_tensor* tensor, const void* data);
LIBXSMM_API void* libxsmm_dnn_get_tensor_data_ptr(const libxsmm_dnn_tensor* tensor, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_tensor_datalayout* libxsmm_dnn_get_tensor_datalayout(const libxsmm_dnn_tensor* tensor, libxsmm_dnn_err_t* status);
LIBXSMM_API unsigned char libxsmm_dnn_get_qtensor_scf(const libxsmm_dnn_tensor* tensor, libxsmm_dnn_err_t* status);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_set_qtensor_scf(libxsmm_dnn_tensor* tensor, const unsigned char scf);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_destroy_tensor(const libxsmm_dnn_tensor* tensor);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_zero_tensor(const libxsmm_dnn_tensor* tensor);
/**
* Copy-in/out from a plain format such [N][C][H][W] or [N][H][W][C]
*/
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_copyin_tensor(const libxsmm_dnn_tensor* tensor, const void* data, const libxsmm_dnn_tensor_format in_format);
LIBXSMM_API libxsmm_dnn_err_t libxsmm_dnn_copyout_tensor(const libxsmm_dnn_tensor* tensor, void* data, const libxsmm_dnn_tensor_format out_format);
#endif /*LIBXSMM_DNN_TENSOR_H*/
This diff is collapsed.
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_FSSPMDM_H
#define LIBXSMM_FSSPMDM_H
#include "libxsmm_typedefs.h"
/** Opaque types for fsspmdm */
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_dfsspmdm libxsmm_dfsspmdm;
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_sfsspmdm libxsmm_sfsspmdm;
LIBXSMM_API libxsmm_dfsspmdm* libxsmm_dfsspmdm_create( libxsmm_blasint M, libxsmm_blasint N, libxsmm_blasint K,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
const double alpha, const double beta, libxsmm_blasint c_is_nt,
const double* a_dense );
LIBXSMM_API void libxsmm_dfsspmdm_execute( const libxsmm_dfsspmdm* handle, const double* B, double* C );
LIBXSMM_API void libxsmm_dfsspmdm_destroy( libxsmm_dfsspmdm* handle );
LIBXSMM_API libxsmm_sfsspmdm* libxsmm_sfsspmdm_create( libxsmm_blasint M, libxsmm_blasint N, libxsmm_blasint K,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
const float alpha, const float beta, libxsmm_blasint c_is_nt,
const float* a_dense );
LIBXSMM_API void libxsmm_sfsspmdm_execute( const libxsmm_sfsspmdm* handle, const float* B, float* C );
LIBXSMM_API void libxsmm_sfsspmdm_destroy( libxsmm_sfsspmdm* handle );
#endif /*LIBXSMM_FSSPMDM_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Alexander Heinecke, Hans Pabst (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_GENERATOR_H
#define LIBXSMM_GENERATOR_H
#include "libxsmm_typedefs.h"
#define LIBXSMM_GEMM_NO_BYPASS(FLAGS, ALPHA, BETA) ( \
0 == ((FLAGS) & (LIBXSMM_GEMM_FLAG_TRANS_A)) && \
(LIBXSMM_FEQ(1, ALPHA) /*|| LIBXSMM_FEQ(-1, ALPHA)*/) && \
(LIBXSMM_FEQ(1, BETA) || LIBXSMM_FEQ(0, BETA)))
/** Initialize GEMM descriptor as used by low-level routines (type-specific). */
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_dgemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
double alpha, double beta, int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_sgemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
float alpha, float beta, int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_wigemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
int alpha, int beta, int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_bigemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
int alpha, int beta, int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_bbgemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
int alpha, int beta, int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_bsgemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
float alpha, float beta, int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_bgemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
float alpha, float beta, int flags, int prefetch);
/** Initialize GEMM descriptor (generic: double-precision alpha/beta). */
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_gemm_descriptor_dinit(libxsmm_descriptor_blob* blob,
libxsmm_gemm_precision precision, libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc, double alpha, double beta,
int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_gemm_descriptor_dinit2(libxsmm_descriptor_blob* blob,
libxsmm_gemm_precision iprec, libxsmm_gemm_precision oprec, libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc,
double alpha, double beta, int flags, int prefetch);
/** Initialize GEMM descriptor as used by low-level routines (generic). */
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_gemm_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_gemm_precision precision, libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc, const void* alpha, const void* beta,
int flags, int prefetch);
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_gemm_descriptor_init2(libxsmm_descriptor_blob* blob,
libxsmm_gemm_precision iprec, libxsmm_gemm_precision oprec, libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc, const void* alpha, const void* beta,
int flags, int prefetch);
/** Similar to libxsmm_gemm_descriptor_init2 with optional type-converted alpha/beta (dalpha/dbeta). */
LIBXSMM_API libxsmm_gemm_descriptor* libxsmm_gemm_descriptor_init3(libxsmm_descriptor_blob* blob,
libxsmm_gemm_precision iprec, libxsmm_gemm_precision oprec, libxsmm_blasint m, libxsmm_blasint n, libxsmm_blasint k,
libxsmm_blasint lda, libxsmm_blasint ldb, libxsmm_blasint ldc, const void* alpha, const void* beta,
int flags, int prefetch, double* dalpha, double* dbeta);
/** Initialize transpose descriptor as used by low-level routines. */
LIBXSMM_API libxsmm_meltw_descriptor* libxsmm_meltw_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_datatype in_type, libxsmm_datatype out_type,
libxsmm_blasint m, libxsmm_blasint n,
libxsmm_blasint ldo, libxsmm_blasint ldi,
unsigned short flags, unsigned char param, unsigned char operation);
LIBXSMM_API libxsmm_meltw_descriptor* libxsmm_meltw_descriptor_init2(libxsmm_descriptor_blob* blob,
libxsmm_datatype in_type, libxsmm_datatype in2_type, libxsmm_datatype out_type, libxsmm_datatype out2_type,
libxsmm_blasint m, libxsmm_blasint n,
libxsmm_blasint ldo, libxsmm_blasint ldi, libxsmm_blasint ldi2, libxsmm_blasint ldi3,
unsigned short flags, unsigned char param, unsigned char operation);
/** Initialize matrix equation as used by low-level routines */
LIBXSMM_API libxsmm_meqn_descriptor* libxsmm_meqn_descriptor_init(libxsmm_descriptor_blob* blob,
libxsmm_datatype type, libxsmm_blasint m, libxsmm_blasint n,
libxsmm_blasint ldo, unsigned int eqn_idx);
/** Structure referring to the generated code with some attached information. */
LIBXSMM_EXTERN_C typedef struct libxsmm_generated_code {
void* generated_code; /** pointer to memory which can contain strings or binary code */
unsigned int buffer_size; /** total size if the buffer generated_code */
unsigned int code_size; /** size of bytes used in generated_code */
unsigned int code_type; /**
* 0: generated code contains inline assembly in a C function
* which can be dumped into a *.c/cc/cpp file
* 1: generated code contains assembly which can be
* dumped into an *.s file
* >1: generated code contains a function in binary code which can be
* called, when the code is copied into executable memory
*/
unsigned int last_error; /**
* 0: no error occurred
* >0: error code
*/
unsigned int arch; /* target arch for the current code generation task */
unsigned int sf_size; /* offset of RSP to the beginning of the stack frame
* we track this value to have RBP availbale for general compute
*/
} libxsmm_generated_code;
/** function to translate LIBXSMM Generator error codes to error messages */
LIBXSMM_API
const char* libxsmm_strerror(unsigned int i_error_code);
/* @TODO change int based architecture value */
LIBXSMM_API
void libxsmm_generator_gemm_inlineasm(const char* i_file_out,
const char* i_routine_name,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const char* i_arch );
/* @TODO change int based architecture value */
LIBXSMM_API
void libxsmm_generator_gemm_directasm(const char* i_file_out,
const char* i_routine_name,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const char* i_arch );
LIBXSMM_API
void libxsmm_generator_gemm_kernel(libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc );
/* @TODO change int based architecture value */
LIBXSMM_API
void libxsmm_generator_spgemm(const char* i_file_out,
const char* i_routine_name,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const char* i_arch,
const char* i_file_in,
const int i_is_csr);
/* @TODO change int based architecture value */
LIBXSMM_API
void libxsmm_generator_spgemm_csc_kernel(libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const char* i_arch,
const unsigned int* i_row_idx,
const unsigned int* i_column_idx,
const double* i_values);
/* @TODO change int based architecture value */
LIBXSMM_API
void libxsmm_generator_spgemm_csr_kernel(libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const char* i_arch,
const unsigned int* i_row_idx,
const unsigned int* i_column_idx,
const double* i_values);
/* @TODO change int based architecture value */
LIBXSMM_API
void libxsmm_generator_spgemm_csr_reg_kernel(libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const char* i_arch,
const unsigned int* i_row_idx,
const unsigned int* i_column_idx,
const double* i_values);
LIBXSMM_API
void libxsmm_generator_packed_spgemm_csr_kernel( libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const unsigned int* i_row_idx,
const unsigned int* i_column_idx,
const void* i_values,
const unsigned int i_packed_width );
LIBXSMM_API
void libxsmm_generator_packed_spgemm_csc_kernel( libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const unsigned int* i_row_idx,
const unsigned int* i_column_idx,
const void* i_values,
const unsigned int i_packed_width );
LIBXSMM_API
void libxsmm_generator_packed_gemm_ac_rm( libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const unsigned int i_packed_width );
LIBXSMM_API
void libxsmm_generator_packed_gemm_bc_rm( libxsmm_generated_code* io_generated_code,
const libxsmm_gemm_descriptor* i_xgemm_desc,
const unsigned int i_packed_width );
LIBXSMM_API
void libxsmm_generator_mateltwise_kernel( libxsmm_generated_code* io_generated_code,
const libxsmm_meltw_descriptor* i_mateltw_desc );
LIBXSMM_API
void libxsmm_generator_matequation_kernel( libxsmm_generated_code* io_generated_code,
const libxsmm_meqn_descriptor* i_mateqn_desc );
/** Initialization counter that can be used to check whether the library is initialized (!=0) or not (==0). */
LIBXSMM_APIVAR_PUBLIC(unsigned int libxsmm_ninit);
/** Target architecture (libxsmm_get_target_archid, libxsmm_set_target_archid). */
LIBXSMM_APIVAR_PUBLIC(int libxsmm_target_archid);
/** Verbosity level (0: quiet, 1: errors, 2: warnings, 3: info, neg.: all/dump). */
LIBXSMM_APIVAR_PUBLIC(int libxsmm_verbosity);
/** Security-enhanced environment. */
LIBXSMM_APIVAR_PUBLIC(int libxsmm_se);
#endif /*LIBXSMM_GENERATOR_H*/
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Hans Pabst (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_MATH_H
#define LIBXSMM_MATH_H
#include "libxsmm_typedefs.h"
/**
* Structure of differences with matrix norms according
* to http://www.netlib.org/lapack/lug/node75.html).
*/
LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_matdiff_info {
/** One-norm */ double norm1_abs, norm1_rel;
/** Infinity-norm */ double normi_abs, normi_rel;
/** Froebenius-norm */ double normf_rel;
/** Maximum difference, L2-norm (absolute and relative), and R-squared. */
double linf_abs, linf_rel, l2_abs, l2_rel, rsq;
/** Statistics: sum/l1, min., max., arith. avg., and variance. */
double l1_ref, min_ref, max_ref, avg_ref, var_ref;
/** Statistics: sum/l1, min., max., arith. avg., and variance. */
double l1_tst, min_tst, max_tst, avg_tst, var_tst;
/** Values (v_ref, v_tst) and location (m, n) of largest linf_abs. */
double v_ref, v_tst;
libxsmm_blasint m, n;
} libxsmm_matdiff_info;
/**
* Utility function to calculate a collection of scalar differences between two matrices (libxsmm_matdiff_info).
* The location (m, n) of the largest difference (linf_abs) is recorded (also in case of NaN). In case of NaN,
* differences are set to infinity. If no difference is discovered, the location (m, n) is negative (OOB).
*/
LIBXSMM_API int libxsmm_matdiff(libxsmm_matdiff_info* info,
libxsmm_datatype datatype, libxsmm_blasint m, libxsmm_blasint n, const void* ref, const void* tst,
const libxsmm_blasint* ldref, const libxsmm_blasint* ldtst);
/**
* Reduces input into output such that the difference is maintained or increased (max function).
* The very first (initial) output should be zeroed (libxsmm_matdiff_clear).
*/
LIBXSMM_API void libxsmm_matdiff_reduce(libxsmm_matdiff_info* output, const libxsmm_matdiff_info* input);
/** Clears the given info-structure, e.g., for the initial reduction-value (libxsmm_matdiff_reduce). */
LIBXSMM_API void libxsmm_matdiff_clear(libxsmm_matdiff_info* info);
/** Greatest common divisor (corner case: the GCD of 0 and 0 is 1). */
LIBXSMM_API size_t libxsmm_gcd(size_t a, size_t b);
/** Least common multiple. */
LIBXSMM_API size_t libxsmm_lcm(size_t a, size_t b);
/**
* This function finds prime-factors (up to 32) of an unsigned integer in ascending order, and
* returns the number of factors found (zero if the given number is prime and unequal to two).
*/
LIBXSMM_API int libxsmm_primes_u32(unsigned int num, unsigned int num_factors_n32[]);
/** Calculate co-prime number <= n/2 (except: libxsmm_shuffle(0|1) == 0). */
LIBXSMM_API size_t libxsmm_shuffle(unsigned int n);
/**
* Divides the product into prime factors and selects factors such that the new product is within
* the given limit (0/1-Knapsack problem), e.g., product=12=2*2*3 and limit=6 then result=2*3=6.
* The limit is at least reached or exceeded with the minimal possible product (is_lower=true).
*/
LIBXSMM_API unsigned int libxsmm_product_limit(unsigned int product, unsigned int limit, int is_lower);
/* Kahan's summation returns accumulator += value and updates compensation. */
LIBXSMM_API double libxsmm_kahan_sum(double value, double* accumulator, double* compensation);
/** SQRT with Newton's method using integer arithmetic. */
LIBXSMM_API unsigned int libxsmm_isqrt_u64(unsigned long long x);
/** SQRT with Newton's method using integer arithmetic. */
LIBXSMM_API unsigned int libxsmm_isqrt_u32(unsigned int x);
/** Based on libxsmm_isqrt_u32, but actual factor of x. */
LIBXSMM_API unsigned int libxsmm_isqrt2_u32(unsigned int x);
/** SQRT with Newton's method using double-precision. */
LIBXSMM_API double libxsmm_dsqrt(double x);
/** SQRT with Newton's method using single-precision. */
LIBXSMM_API float libxsmm_ssqrt(float x);
/** CBRT with Newton's method using integer arithmetic. */
LIBXSMM_API unsigned int libxsmm_icbrt_u64(unsigned long long x);
/** CBRT with Newton's method using integer arithmetic. */
LIBXSMM_API unsigned int libxsmm_icbrt_u32(unsigned int x);
/** Single-precision approximation of exponential function (base 2). */
LIBXSMM_API float libxsmm_sexp2(float x);
/**
* Exponential function (base 2), which is limited to unsigned 8-bit input values.
* This function reproduces bit-accurate results (single-precision).
*/
LIBXSMM_API float libxsmm_sexp2_u8(unsigned char x);
/**
* Exponential function (base 2), which is limited to signed 8-bit input values.
* This function reproduces bit-accurate results (single-precision).
*/
LIBXSMM_API float libxsmm_sexp2_i8(signed char x);
/** Similar to libxsmm_sexp2_i8, but takes an integer as signed 8-bit value (check). */
LIBXSMM_API float libxsmm_sexp2_i8i(int x);
/** Inlineable fast tanh, such that a the compiler can potentially vectorize. */
LIBXSMM_API_INLINE float libxsmm_stanh_pade78(float i_x) {
const float l_c0 = 2027025.0f;
const float l_c1 = 270270.0f;
const float l_c2 = 6930.0f;
const float l_c3 = 36.0f;
const float l_c1_d = 945945.0f;
const float l_c2_d = 51975.0f;
const float l_c3_d = 630.0f;
const float l_hi_bound = 4.97f;
const float l_lo_bound = -4.97f;
const float l_ones = 1.0f;
const float l_neg_ones = -1.0f;
const float x2 = i_x * i_x;
const float t1_nom = (l_c3 * x2) + l_c2;
const float t2_nom = (t1_nom * x2) + l_c1;
const float t3_nom = (t2_nom * x2) + l_c0;
const float nom = t3_nom * i_x;
const float t1_denom = x2 + l_c3_d;
const float t2_denom = (t1_denom * x2) + l_c2_d;
const float t3_denom = (t2_denom * x2) + l_c1_d;
const float denom = (t3_denom * x2) + l_c0;
float result = nom/denom ;
result = (result > l_hi_bound) ? l_ones : result;
result = (result < l_lo_bound) ? l_neg_ones : result;
return result;
}
#endif /*LIBXSMM_MATH_H*/
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/hfp/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
/* Hans Pabst (Intel Corp.)
******************************************************************************/
#ifndef LIBXSMM_MEMORY_H
#define LIBXSMM_MEMORY_H
#include "libxsmm_macros.h"
#if defined(__clang_analyzer__)
# define LIBXSMM_MEMSET127(PTRDST, VALUE, SIZE) memset((void*)(PTRDST), VALUE, SIZE)
#else
# define LIBXSMM_MEMSET127(PTRDST, VALUE, SIZE) { \
char *const libxsmm_memset127_dst_ = (char*)(PTRDST); \
union { size_t size; signed char size1; } libxsmm_memset127_; \
signed char libxsmm_memset127_i_; LIBXSMM_ASSERT((SIZE) <= 127); \
libxsmm_memset127_.size = (SIZE); \
LIBXSMM_PRAGMA_UNROLL \
for (libxsmm_memset127_i_ = 0; libxsmm_memset127_i_ < libxsmm_memset127_.size1; \
++libxsmm_memset127_i_) \
{ \
libxsmm_memset127_dst_[libxsmm_memset127_i_] = (char)(VALUE); \
} \
}
#endif
#define LIBXSMM_MEMZERO127(PTRDST) LIBXSMM_MEMSET127(PTRDST, '\0', sizeof(*(PTRDST)))
#define LIBXSMM_MEMCPY127_LOOP(PTRDST, PTRSRC, SIZE, NTS) { \
const unsigned char *const libxsmm_memcpy127_loop_src_ = (const unsigned char*)(PTRSRC); \
unsigned char *const libxsmm_memcpy127_loop_dst_ = (unsigned char*)(PTRDST); \
signed char libxsmm_memcpy127_loop_i_; LIBXSMM_ASSERT((SIZE) <= 127); \
NTS(libxsmm_memcpy127_loop_dst_) LIBXSMM_PRAGMA_UNROLL \
for (libxsmm_memcpy127_loop_i_ = 0; libxsmm_memcpy127_loop_i_ < (signed char)(SIZE); \
++libxsmm_memcpy127_loop_i_) \
{ \
libxsmm_memcpy127_loop_dst_[libxsmm_memcpy127_loop_i_] = \
libxsmm_memcpy127_loop_src_[libxsmm_memcpy127_loop_i_]; \
} \
}
#define LIBXSMM_MEMCPY127_NTS(...)
#define LIBXSMM_MEMCPY127(PTRDST, PTRSRC, SIZE) \
LIBXSMM_MEMCPY127_LOOP(PTRDST, PTRSRC, SIZE, LIBXSMM_MEMCPY127_NTS)
#define LIBXSMM_ASSIGN127(PTRDST, PTRSRC) LIBXSMM_ASSERT(sizeof(*(PTRSRC)) <= sizeof(*(PTRDST))); \
LIBXSMM_MEMCPY127(PTRDST, PTRSRC, sizeof(*(PTRSRC)))
/**
* Calculates if there is a difference between two (short) buffers.
* Returns zero if there is no difference; otherwise non-zero.
*/
LIBXSMM_API unsigned char libxsmm_diff(const void* a, const void* b, unsigned char size);
/**
* Calculates if there is a difference between "a" and "n x b".
* Returns the index of the first match (or "n" in case of no match).
*/
LIBXSMM_API unsigned int libxsmm_diff_n(const void* a, const void* bn, unsigned char size,
unsigned char stride, unsigned int hint, unsigned int n);
/** Similar to memcmp (C standard library), but the result is conceptually only a boolean. */
LIBXSMM_API int libxsmm_memcmp(const void* a, const void* b, size_t size);
/** Calculate a hash value for the given buffer and seed; accepts NULL-buffer. */
LIBXSMM_API unsigned int libxsmm_hash(const void* data, unsigned int size, unsigned int seed);
/** Calculate a 64-bit hash for the given character string; accepts NULL-string. */
LIBXSMM_API unsigned long long libxsmm_hash_string(const char* string);
/** Return the pointer to the 1st match of "b" in "a", or NULL (no match). */
LIBXSMM_API const char* libxsmm_stristr(const char* a, const char* b);
/**
* Check if pointer is SIMD-aligned and optionally consider the next access (increment in Bytes).
* Optionally calculates the alignment of the given pointer in Bytes.
*/
LIBXSMM_API int libxsmm_aligned(const void* ptr, const size_t* inc, int* alignment);
#endif /*LIBXSMM_MEMORY_H*/
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment