tensor.cpp 14.3 KB
Newer Older
PanZezhong's avatar
init  
PanZezhong committed
1
2
#include "../tensor.hpp"
#include "../utils.hpp"
3
#include <algorithm>
PanZezhong's avatar
init  
PanZezhong committed
4
5
#include <fstream>
#include <iostream>
6
#include <mutex>
PanZezhong's avatar
init  
PanZezhong committed
7
#include <numeric>
PanZezhong's avatar
PanZezhong committed
8
#include <sstream>
PanZezhong's avatar
init  
PanZezhong committed
9
10
11
12

std::shared_ptr<TensorDesc>
TensorDesc::create(infiniDtype_t dtype, const std::vector<size_t> &shape,
                   const std::vector<ptrdiff_t> &strides) {
PanZezhong's avatar
PanZezhong committed
13
    return std::shared_ptr<TensorDesc>(new TensorDesc(dtype, shape, strides));
PanZezhong's avatar
init  
PanZezhong committed
14
15
}

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
std::shared_ptr<TensorDesc>
TensorDesc::create(infiniDtype_t dtype, const std::vector<size_t> &shape) {
    auto ndim = shape.size();
    auto strides = std::vector<ptrdiff_t>(ndim);
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
    }
    return create(dtype, shape, strides);
}

std::shared_ptr<TensorDesc>
TensorDesc::createWithOrder(infiniDtype_t dtype, const std::vector<size_t> &shape,
                            const std::vector<size_t> &order) {
    ASSERT_EQ(shape.size(), order.size());
    auto ndim = shape.size();
    if (ndim == 0) {
        return create(dtype, shape);
    }
    auto strides = std::vector<ptrdiff_t>(order.size());
    auto idx = std::find(order.begin(), order.end(), size_t(ndim - 1));
    strides[std::distance(order.begin(), idx)] = 1;
    for (int i = ndim - 2; i >= 0; i--) {
        auto prev_dim = shape[std::distance(order.begin(), idx)];
        auto prev_stride = strides[std::distance(order.begin(), idx)];
        idx = std::find(order.begin(), order.end(), size_t(i));
        strides[std::distance(order.begin(), idx)] = prev_stride * prev_dim;
    }
    return create(dtype, shape, strides);
}

PanZezhong's avatar
PanZezhong committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
infiniopTensorDescriptor_t TensorDesc::desc() const {
    if (_desc == nullptr) {
        RUN_INFINI(infiniopCreateTensorDescriptor(
            (infiniopTensorDescriptor_t *)(&_desc), _shape.size(), _shape.data(),
            _strides.data(), _dtype));
    }
    return _desc;
};

void TensorDesc::resetDesc() {
    if (this->_desc != nullptr) {
        infiniopDestroyTensorDescriptor(this->_desc);
        this->_desc = nullptr;
    }
}

65
66
67
68
69
70
71
72
73
74
void TensorDesc::computeTensorDesHash() {
    _seed = 0;
    for (auto dim : this->shape()) {
        hash_combine(_seed, dim);
    }
    for (auto stride : this->strides()) {
        hash_combine(_seed, static_cast<size_t>(stride));
    }
}

PanZezhong's avatar
PanZezhong committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
bool TensorDesc::isContigous() const {
    auto ndim = this->ndim();
    auto shape = this->shape();
    auto strides = std::vector<ptrdiff_t>(ndim);
    strides[ndim - 1] = 1;
    for (int i = ndim - 2; i >= 0; i--) {
        strides[i] = strides[i + 1] * shape[i + 1];
    }
    ASSERT_EQ(strides.size(), this->_strides.size());
    return std::equal(strides.begin(), strides.end(), this->_strides.begin());
}

std::string TensorDesc::info() const {
    std::stringstream ss;

    ss << "Tensor: "
       << "shape[ ";
    for (auto s : this->shape()) {
        ss << s << " ";
    }
    ss << "] strides[ ";
    for (auto s : this->strides()) {
        ss << s << " ";
    }
    ss << "] dtype=" << this->dtype();

    return ss.str();
}

PanZezhong's avatar
init  
PanZezhong committed
104
TensorDesc::~TensorDesc() {
PanZezhong's avatar
PanZezhong committed
105
    this->resetDesc();
PanZezhong's avatar
init  
PanZezhong committed
106
107
}

PanZezhong's avatar
PanZezhong committed
108
109
110
111
112
113
const std::vector<size_t> &Tensor::shape() const { return this->_desc->shape(); }
const std::vector<ptrdiff_t> &Tensor::strides() const { return this->_desc->strides(); }
size_t Tensor::ndim() const { return this->_desc->ndim(); }
infiniDtype_t Tensor::dtype() const { return this->_desc->dtype(); }
infiniDevice_t Tensor::deviceType() const { return this->_storage->deviceType(); }
int Tensor::deviceId() const { return this->_storage->deviceId(); }
PanZezhong's avatar
init  
PanZezhong committed
114
115
Tensor::~Tensor() {}

blkmjsian's avatar
blkmjsian committed
116
117
118
119
size_t Tensor::numel() const {
    return std::accumulate(this->shape().begin(), this->shape().end(), size_t(1), std::multiplies<size_t>());
}

PanZezhong's avatar
PanZezhong committed
120
ptrdiff_t Tensor::dataOffset() const {
PanZezhong's avatar
PanZezhong committed
121
    return _offset;
PanZezhong's avatar
init  
PanZezhong committed
122
123
}

PanZezhong's avatar
PanZezhong committed
124
infiniopTensorDescriptor_t Tensor::desc() const { return _desc->desc(); }
PanZezhong's avatar
init  
PanZezhong committed
125
126
127

std::shared_ptr<Tensor> Tensor::buffer(infiniDtype_t dtype,
                                       const std::vector<size_t> &shape,
thatPepe's avatar
thatPepe committed
128
                                       std::shared_ptr<MemoryPool> pool) {
PanZezhong's avatar
init  
PanZezhong committed
129
130
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
PanZezhong's avatar
PanZezhong committed
131

PanZezhong's avatar
init  
PanZezhong committed
132
133
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
    auto strides = std::vector<ptrdiff_t>(ndim);
PanZezhong's avatar
PanZezhong committed
134
135
136
137
138
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
PanZezhong's avatar
init  
PanZezhong committed
139
    }
thatPepe's avatar
thatPepe committed
140
    tensor->_storage = Storage::createFromPool(size, pool);
PanZezhong's avatar
PanZezhong committed
141
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
PanZezhong's avatar
init  
PanZezhong committed
142
143
144
145
146
147
148
149
150
151
    tensor->_offset = 0;
    return tensor;
}

std::shared_ptr<Tensor> Tensor::weight(void *data, infiniDtype_t dtype,
                                       const std::vector<size_t> &shape) {
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
    auto strides = std::vector<ptrdiff_t>(ndim);
PanZezhong's avatar
PanZezhong committed
152
153
154
155
156
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
PanZezhong's avatar
init  
PanZezhong committed
157
    }
PanZezhong's avatar
PanZezhong committed
158

PanZezhong's avatar
PanZezhong committed
159
    tensor->_storage = Storage::create(size);
PanZezhong's avatar
PanZezhong committed
160
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
blkmjsian's avatar
blkmjsian committed
161
162
163
164
165
166
167
168
169
170
171
172
173
    if (data != nullptr) {
        tensor->load(data);
    }

    tensor->_offset = 0;
    return tensor;
}

void Tensor::load(const void *data, infinirtStream_t stream) {
    if (stream) {
        RUN_INFINI(infinirtMemcpyAsync(this->_storage->memory(), data, this->_storage->size(), INFINIRT_MEMCPY_H2D, stream));
        return;
    }
174
175
176
177
    // NOTE: 为兼容部分平台(沐曦)多线程并发对同一host数据执行memcpy卡死问题
    static std::mutex mutex;
    {
        std::lock_guard<std::mutex> lock(mutex);
blkmjsian's avatar
blkmjsian committed
178
179
        RUN_INFINI(infinirtMemcpy(this->_storage->memory(),
                                  data, this->_storage->size(), INFINIRT_MEMCPY_H2D));
180
    }
PanZezhong's avatar
init  
PanZezhong committed
181
182
}

PanZezhong's avatar
PanZezhong committed
183
184
std::shared_ptr<Tensor> Tensor::memShare(const std::vector<size_t> &shape, infiniDtype_t dtype_) const {
    auto dtype = dtype_ == INFINI_DTYPE_INVALID ? this->dtype() : dtype_;
185
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
PanZezhong's avatar
PanZezhong committed
186
    ASSERT(size <= this->_storage->size());
187
188
189
190
191
192
193
194
195
196
197
198

    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
    auto strides = std::vector<ptrdiff_t>(ndim);
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
    }
    tensor->_storage = this->_storage;
    tensor->_offset = 0;
PanZezhong's avatar
PanZezhong committed
199
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
200
201
202
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
203
void *Tensor::dataImpl(ptrdiff_t offset) const {
PanZezhong's avatar
PanZezhong committed
204
    return (char *)(this->_storage->memory()) + this->_offset + offset * dsize(this->dtype());
PanZezhong's avatar
init  
PanZezhong committed
205
206
207
}

void *Tensor::data(ptrdiff_t offset) {
PanZezhong's avatar
PanZezhong committed
208
    return this->dataImpl(offset);
PanZezhong's avatar
init  
PanZezhong committed
209
210
211
}

const void *Tensor::data(ptrdiff_t offset) const {
PanZezhong's avatar
PanZezhong committed
212
    return this->dataImpl(offset);
PanZezhong's avatar
init  
PanZezhong committed
213
214
}

PanZezhong's avatar
PanZezhong committed
215
216
void Tensor::copyFrom(std::shared_ptr<Tensor const> src,
                      infiniopHandle_t handle, infinirtStream_t stream) {
PanZezhong's avatar
init  
PanZezhong committed
217
218
219
220
    ASSERT_EQ(this->shape(), src->shape());
    ASSERT_EQ(this->dtype(), src->dtype());
    infiniopRearrangeDescriptor_t desc;
    RUN_INFINI(infiniopCreateRearrangeDescriptor(
PanZezhong's avatar
PanZezhong committed
221
        handle, &desc, this->desc(), src->desc()));
PanZezhong's avatar
init  
PanZezhong committed
222
223
224
225
226
    RUN_INFINI(infiniopRearrange(desc, this->data(), src->data(),
                                 stream));
    RUN_INFINI(infiniopDestroyRearrangeDescriptor(desc));
}

PanZezhong's avatar
PanZezhong committed
227
228
bool Tensor::isContigous() const {
    return this->_desc->isContigous();
PanZezhong's avatar
init  
PanZezhong committed
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
}

template <typename T>
void print_data(T *data, const std::vector<size_t> &shape,
                const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << data[i] << " ";
        }
        std::cout << std::endl;
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

template <>
void print_data(uint16_t const *data, const std::vector<size_t> &shape,
                const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << f16_to_f32(data[i * strides[dim]]) << " ";
        }
PanZezhong's avatar
PanZezhong committed
253
        std::cout << std::endl;
PanZezhong's avatar
init  
PanZezhong committed
254
255
256
257
258
259
260
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

PanZezhong's avatar
PanZezhong committed
261
262
263
264
265
266
267
268
269
void print_data_bf16(uint16_t const *data, const std::vector<size_t> &shape,
                     const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << bf16_to_f32(data[i * strides[dim]]) << " ";
        }
        std::cout << std::endl;
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
hejianlin's avatar
hejianlin committed
270
            print_data_bf16(data + i * strides[dim], shape, strides, dim + 1);
PanZezhong's avatar
PanZezhong committed
271
272
273
274
        }
    }
}

PanZezhong's avatar
PanZezhong committed
275
276
277
278
std::string Tensor::info() const {
    std::stringstream ss;

    ss << "Tensor: "
PanZezhong's avatar
PanZezhong committed
279
       << this->_desc->info()
PanZezhong's avatar
PanZezhong committed
280
281
       << " device=" << this->deviceType()
       << " device_id=" << this->deviceId();
PanZezhong's avatar
PanZezhong committed
282
    return this->_desc->info();
PanZezhong's avatar
PanZezhong committed
283
284
}

285
286
287
288
size_t Tensor::seed() const {
    return this->_desc->seed();
}

289
std::shared_ptr<Tensor> Tensor::view(const std::vector<size_t> &new_shape) const {
290
    // Step 1: Validate total size
291
    size_t numel = 1;
292
293
    for (size_t dim : this->_desc->shape()) {
        numel *= dim;
294
    }
295

296
    size_t new_numel = 1;
297
298
    for (size_t dim : new_shape) {
        new_numel *= dim;
299
300
    }

301
    ASSERT_EQ(numel, new_numel);
302

303
304
305
    // Step 2: Get current shape and strides
    const std::vector<size_t> &old_shape = this->_desc->shape();
    const std::vector<ptrdiff_t> &old_strides = this->_desc->strides();
306

307
308
309
    // Step 3: Create merged shape and strides
    std::vector<size_t> merged_shape;
    std::vector<ptrdiff_t> merged_strides;
310

311
312
313
    if (!old_shape.empty()) {
        merged_shape.push_back(old_shape[0]);
        merged_strides.push_back(old_strides[0]);
314

315
316
317
318
319
320
321
        for (size_t i = 1; i < old_shape.size(); ++i) {
            if (old_strides[i] * static_cast<ptrdiff_t>(old_shape[i]) == merged_strides.back()) {
                merged_shape.back() *= old_shape[i];
                merged_strides.back() = old_strides[i];
            } else {
                merged_shape.push_back(old_shape[i]);
                merged_strides.push_back(old_strides[i]);
322
            }
323
        }
324
325
    }

326
327
328
329
330
    // Step 4: Compute new strides by splitting merged dimensions
    std::vector<ptrdiff_t> new_strides(new_shape.size());
    size_t merged_idx = 0;
    ptrdiff_t current_stride = merged_strides[0];
    size_t remaining_size = merged_shape[0];
331

332
333
334
335
336
337
    for (size_t i = 0; i < new_shape.size(); ++i) {
        // Find which merged dimension contains this new dimension
        while (new_shape[i] > remaining_size) {
            ASSERT(++merged_idx < merged_shape.size());
            current_stride = merged_strides[merged_idx];
            remaining_size = merged_shape[merged_idx];
338
339
        }

340
341
342
343
        ASSERT_EQ(remaining_size % new_shape[i], 0);

        new_strides[i] = current_stride * (remaining_size / new_shape[i]);
        remaining_size /= new_shape[i];
344
345
    }

346
    return this->view_as(new_shape, new_strides);
347
348
}

349
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape) const {
350
351
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    tensor->_storage = this->_storage;
352
    tensor->_desc = TensorDesc::create(this->dtype(), new_shape);
353
354
355
356
    tensor->_offset = this->_offset;
    return tensor;
}

357
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape, const std::vector<ptrdiff_t> &new_strides) const {
358
359
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    tensor->_storage = this->_storage;
360
    tensor->_desc = TensorDesc::create(this->dtype(), new_shape, new_strides);
361
362
363
364
    tensor->_offset = this->_offset;
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
365
void Tensor::debug(const std::string &filename) const {
PanZezhong's avatar
PanZezhong committed
366
367
    RUN_INFINI(infinirtDeviceSynchronize());

PanZezhong's avatar
PanZezhong committed
368
    std::cout << info() << std::endl;
PanZezhong's avatar
PanZezhong committed
369

PanZezhong's avatar
init  
PanZezhong committed
370
    void const *cpu_data;
PanZezhong's avatar
PanZezhong committed
371
    if (this->deviceType() != INFINI_DEVICE_CPU) {
PanZezhong's avatar
PanZezhong committed
372
373
374
        void *cpu_memory = std::malloc(this->_storage->size());
        RUN_INFINI(infinirtMemcpy(cpu_memory, this->_storage->memory(),
                                  this->_storage->size(), INFINIRT_MEMCPY_D2H));
PanZezhong's avatar
init  
PanZezhong committed
375
376
        cpu_data = cpu_memory;
    } else {
PanZezhong's avatar
PanZezhong committed
377
        cpu_data = this->_storage->memory();
PanZezhong's avatar
init  
PanZezhong committed
378
379
380
381
382
383
384
385
    }

    if (!filename.empty()) {
        std::ofstream outFile(filename, std::ios::binary);
        if (!outFile) {
            std::cerr << "Error opening file for writing: " << filename << "\n";
            return;
        }
PanZezhong's avatar
PanZezhong committed
386
        outFile.write(reinterpret_cast<const char *>(cpu_data), this->_storage->size());
PanZezhong's avatar
init  
PanZezhong committed
387
388
389
390
391
        outFile.close();
        std::cout << "Data written to file: " << filename << "\n";
        return;
    }

PanZezhong's avatar
PanZezhong committed
392
    switch (this->dtype()) {
PanZezhong's avatar
init  
PanZezhong committed
393
    case INFINI_DTYPE_F16:
PanZezhong's avatar
PanZezhong committed
394
        print_data((uint16_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
395
396
397
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_F32:
PanZezhong's avatar
PanZezhong committed
398
        print_data((float const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
399
400
401
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_U64:
PanZezhong's avatar
PanZezhong committed
402
        print_data((uint64_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
403
404
405
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_I64:
PanZezhong's avatar
PanZezhong committed
406
        print_data((int64_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
407
408
409
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_U32:
PanZezhong's avatar
PanZezhong committed
410
        print_data((uint32_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
411
412
413
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_I32:
PanZezhong's avatar
PanZezhong committed
414
        print_data((int32_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
415
416
                   this->shape(), this->strides(), 0);
        break;
PanZezhong's avatar
PanZezhong committed
417
418
419
420
    case INFINI_DTYPE_BF16:
        print_data_bf16((uint16_t const *)((char const *)cpu_data + dataOffset()),
                        this->shape(), this->strides(), 0);
        break;
PanZezhong's avatar
init  
PanZezhong committed
421
422
423
424
425
426
    default:
        PANIC("Unsupported data type");
    }
}

void Tensor::debug() const { this->debug(""); }