删除子模块的gitignore

c454d419 · lisj · 3359c1f1 · c454d419 · c454d419 · c454d419
Commit c454d419 authored May 12, 2023 by lisj
20 changed files
--- a/third_party/libxsmm/obj/intel64/libxsmm_main.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_main.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_malloc.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_malloc.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_math.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_math.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_matrixeqn.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_matrixeqn.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_memory.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_memory.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_mhd.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_mhd.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_noblas.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_noblas.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_perf.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_perf.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_python.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_python.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_rng.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_rng.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_spmdm.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_spmdm.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_sync.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_sync.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_timer.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_timer.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_trace.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_trace.o
--- a/third_party/libxsmm/obj/intel64/libxsmm_xcopy.o
+++ b/third_party/libxsmm/obj/intel64/libxsmm_xcopy.o
--- a/third_party/libxsmm/obj/libxsmm_dispatch.h
+++ b/third_party/libxsmm/obj/libxsmm_dispatch.h
+#if !defined(_WIN32)
+{ static const char *const build_state =
+#   include "../.state"
+  ;
+  internal_build_state = build_state;
+}
+#endif
--- a/third_party/libxsmm/samples/cp2k/.make
+++ b/third_party/libxsmm/samples/cp2k/.make
--- a/third_party/libxsmm/samples/deeplearning/tvm_cnnlayer/libxsmm_wrapper/Makefile
+++ b/third_party/libxsmm/samples/deeplearning/tvm_cnnlayer/libxsmm_wrapper/Makefile
+CC= icpc
+CFLAGS= -O3 -fPIC -std=c++11 -fopenmp
+LDFLAGS= -shared
+SOURCES = batch_reduce_plus_init.cc
+LIBXSMMDIR=./../../../../
+
+INC=-I$(LIBXSMMDIR)/include
+LIBS = $(LIBXSMMDIR)/lib/libxsmm.a $(LIBXSMMDIR)/lib/libxsmmext.a \
+       $(LIBXSMMDIR)/lib/libxsmmnoblas.a $(LIBXSMMDIR)/lib/libxsmmgen.a \
+       $(LIBXSMMDIR)/lib/libxsmmf.a
+
+TARGET= libxsmm_wrapper.so
+
+all:
+	$(CC) $(INC) $(CFLAGS) -fPIC $(SOURCES) $(LIBS)  -o $(TARGET) $(LDFLAGS)
+
+clean:
+	rm -f $(TARGET)
+
--- a/third_party/libxsmm/samples/deeplearning/tvm_cnnlayer/libxsmm_wrapper/batch_reduce_plus_init.cc
+++ b/third_party/libxsmm/samples/deeplearning/tvm_cnnlayer/libxsmm_wrapper/batch_reduce_plus_init.cc
+/******************************************************************************
+* Copyright (c) Intel Corporation - All rights reserved.                      *
+* This file is part of the LIBXSMM library.                                   *
+*                                                                             *
+* For information on the license, see the LICENSE file.                       *
+* Further information: https://github.com/hfp/libxsmm/                        *
+* SPDX-License-Identifier: BSD-3-Clause                                       *
+******************************************************************************/
+/* Anand Venkat (Intel Corp.)
+******************************************************************************/
+
+#include <libxsmm.h>
+#include <libxsmm_macros.h>
+
+extern "C" int  batch_reduce_kernel_update(const float *weight, const float *input, float *output, int blocks, int ofmblock, int ifmblock, int ofw, int stride_w, int r, int s, int ifh, int ifw){
+    int ld_b = stride_w*ifmblock;
+    libxsmm_smmfunction_reducebatch_addr batchreduce_kernela = libxsmm_smmdispatch_reducebatch_addr(ofmblock,ofw, ifmblock,NULL,&ld_b,NULL,NULL,NULL, NULL, NULL);
+    const unsigned long long cblocks = blocks;
+    const float * A[cblocks];
+    const float * B[cblocks];
+    int weight_stride = ofmblock*ifmblock*r*s;
+    int input_stride = ifw*ifh*ifmblock;
+    if(r == 1 && s == 1){
+        for (int icb = 0; icb < cblocks; icb ++) {
+            A[icb] = &weight[icb*weight_stride];
+            B[icb] = &input[icb*input_stride];
+        }
+    }else{/*Eg.if( r == 3 &&  s == 3){*/
+         for( int k = 0 ; k < blocks/(r*s); k++){
+            for(int i=0; i < r; i++){
+                for(int j =0; j < s; j++){
+                    A[k*r*s + i*s + j] = &weight[k*r*s*ofmblock*ifmblock +  (i*s + j)*ofmblock*ifmblock];
+                    B[k*r*s + i*s + j] = &input[k*ifw*ifh*ifmblock  +  i*ifw*ifmblock + j*ifmblock];
+                }
+            }
+        }
+    }
+
+    /* Reduce batch gemm call  */
+    batchreduce_kernela(A, B, output, &cblocks);
+
+    return 0;
+}
+
+extern "C" int  batch_reduce_kernel_init_update(const float *weight, const float *input, float *output, int blocks, int ofmblock, int ifmblock,int r, int s, int ifh, int ifw,int ofw, int stride_w ){
+    float beta = 0.0;
+    int lda = ofmblock;
+    int ldx = ofmblock;
+    int ld_b = stride_w*ifmblock;
+    int l_flags = ( LIBXSMM_GEMM_FLAGS('N', 'N') );
+    libxsmm_smmfunction_reducebatch_addr batchreduce_kernela = libxsmm_smmdispatch_reducebatch_addr(ofmblock,ofw, ifmblock,&lda,&ld_b,&ldx,NULL,&beta, &l_flags, NULL);
+
+    const unsigned long long cblocks = blocks;
+    const float * A[cblocks];
+    const float * B[cblocks];
+    int weight_stride = ofmblock*ifmblock*r*s;
+    int input_stride = ifw*ifh*ifmblock;
+    if(r == 1 && s == 1){
+    for (int icb = 0; icb < cblocks; icb ++) {
+            A[icb] = &weight[icb*weight_stride];
+            B[icb] = &input[icb*input_stride];
+    }
+    }else{ /*if( r == 3 &&  s == 3){*/
+      for( int k = 0 ; k < blocks/(r*s); k++)
+       for(int i=0; i < r; i++)
+         for(int j =0; j < s; j++){
+              A[k*r*s + i*s + j] = &weight[k*r*s*ofmblock*ifmblock +  (i*s + j)*ofmblock*ifmblock];
+              B[k*r*s + i*s + j] = &input[k*ifw*ifh*ifmblock  +  i*ifw*ifmblock + j*ifmblock];
+         }
+
+    }
+    /* Reduce batch gemm call  */
+    batchreduce_kernela(A, B, output, &cblocks);
+
+
+    return 0;
+}
+
+extern "C" int  batch_reduce_kernel_init(float *output, int ofmblock, int ofw){
+    int num_elements = ofw*ofmblock;
+
+    LIBXSMM_PRAGMA_SIMD
+    for(int i=0; i < num_elements; i++)
+          output[i] = 0.0;
+
+    return 0;
+}
+
+
--- a/third_party/libxsmm/samples/nek/.make
+++ b/third_party/libxsmm/samples/nek/.make