tensor.cpp 13.9 KB
Newer Older
PanZezhong's avatar
init  
PanZezhong committed
1
2
#include "../tensor.hpp"
#include "../utils.hpp"
3
#include <algorithm>
PanZezhong's avatar
init  
PanZezhong committed
4
5
#include <fstream>
#include <iostream>
6
#include <mutex>
PanZezhong's avatar
init  
PanZezhong committed
7
#include <numeric>
PanZezhong's avatar
PanZezhong committed
8
#include <sstream>
PanZezhong's avatar
init  
PanZezhong committed
9
10
11
12

std::shared_ptr<TensorDesc>
TensorDesc::create(infiniDtype_t dtype, const std::vector<size_t> &shape,
                   const std::vector<ptrdiff_t> &strides) {
PanZezhong's avatar
PanZezhong committed
13
    return std::shared_ptr<TensorDesc>(new TensorDesc(dtype, shape, strides));
PanZezhong's avatar
init  
PanZezhong committed
14
15
}

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
std::shared_ptr<TensorDesc>
TensorDesc::create(infiniDtype_t dtype, const std::vector<size_t> &shape) {
    auto ndim = shape.size();
    auto strides = std::vector<ptrdiff_t>(ndim);
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
    }
    return create(dtype, shape, strides);
}

std::shared_ptr<TensorDesc>
TensorDesc::createWithOrder(infiniDtype_t dtype, const std::vector<size_t> &shape,
                            const std::vector<size_t> &order) {
    ASSERT_EQ(shape.size(), order.size());
    auto ndim = shape.size();
    if (ndim == 0) {
        return create(dtype, shape);
    }
    auto strides = std::vector<ptrdiff_t>(order.size());
    auto idx = std::find(order.begin(), order.end(), size_t(ndim - 1));
    strides[std::distance(order.begin(), idx)] = 1;
    for (int i = ndim - 2; i >= 0; i--) {
        auto prev_dim = shape[std::distance(order.begin(), idx)];
        auto prev_stride = strides[std::distance(order.begin(), idx)];
        idx = std::find(order.begin(), order.end(), size_t(i));
        strides[std::distance(order.begin(), idx)] = prev_stride * prev_dim;
    }
    return create(dtype, shape, strides);
}

PanZezhong's avatar
PanZezhong committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
infiniopTensorDescriptor_t TensorDesc::desc() const {
    if (_desc == nullptr) {
        RUN_INFINI(infiniopCreateTensorDescriptor(
            (infiniopTensorDescriptor_t *)(&_desc), _shape.size(), _shape.data(),
            _strides.data(), _dtype));
    }
    return _desc;
};

void TensorDesc::resetDesc() {
    if (this->_desc != nullptr) {
        infiniopDestroyTensorDescriptor(this->_desc);
        this->_desc = nullptr;
    }
}

65
66
67
68
69
70
71
72
73
74
void TensorDesc::computeTensorDesHash() {
    _seed = 0;
    for (auto dim : this->shape()) {
        hash_combine(_seed, dim);
    }
    for (auto stride : this->strides()) {
        hash_combine(_seed, static_cast<size_t>(stride));
    }
}

PanZezhong's avatar
PanZezhong committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
bool TensorDesc::isContigous() const {
    auto ndim = this->ndim();
    auto shape = this->shape();
    auto strides = std::vector<ptrdiff_t>(ndim);
    strides[ndim - 1] = 1;
    for (int i = ndim - 2; i >= 0; i--) {
        strides[i] = strides[i + 1] * shape[i + 1];
    }
    ASSERT_EQ(strides.size(), this->_strides.size());
    return std::equal(strides.begin(), strides.end(), this->_strides.begin());
}

std::string TensorDesc::info() const {
    std::stringstream ss;

    ss << "Tensor: "
       << "shape[ ";
    for (auto s : this->shape()) {
        ss << s << " ";
    }
    ss << "] strides[ ";
    for (auto s : this->strides()) {
        ss << s << " ";
    }
    ss << "] dtype=" << this->dtype();

    return ss.str();
}

PanZezhong's avatar
init  
PanZezhong committed
104
TensorDesc::~TensorDesc() {
PanZezhong's avatar
PanZezhong committed
105
    this->resetDesc();
PanZezhong's avatar
init  
PanZezhong committed
106
107
}

PanZezhong's avatar
PanZezhong committed
108
109
110
111
112
113
const std::vector<size_t> &Tensor::shape() const { return this->_desc->shape(); }
const std::vector<ptrdiff_t> &Tensor::strides() const { return this->_desc->strides(); }
size_t Tensor::ndim() const { return this->_desc->ndim(); }
infiniDtype_t Tensor::dtype() const { return this->_desc->dtype(); }
infiniDevice_t Tensor::deviceType() const { return this->_storage->deviceType(); }
int Tensor::deviceId() const { return this->_storage->deviceId(); }
PanZezhong's avatar
init  
PanZezhong committed
114
115
Tensor::~Tensor() {}

PanZezhong's avatar
PanZezhong committed
116
ptrdiff_t Tensor::dataOffset() const {
PanZezhong's avatar
PanZezhong committed
117
    return _offset;
PanZezhong's avatar
init  
PanZezhong committed
118
119
}

PanZezhong's avatar
PanZezhong committed
120
infiniopTensorDescriptor_t Tensor::desc() const { return _desc->desc(); }
PanZezhong's avatar
init  
PanZezhong committed
121
122
123

std::shared_ptr<Tensor> Tensor::buffer(infiniDtype_t dtype,
                                       const std::vector<size_t> &shape,
thatPepe's avatar
thatPepe committed
124
                                       std::shared_ptr<MemoryPool> pool) {
PanZezhong's avatar
init  
PanZezhong committed
125
126
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
PanZezhong's avatar
PanZezhong committed
127

PanZezhong's avatar
init  
PanZezhong committed
128
129
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
    auto strides = std::vector<ptrdiff_t>(ndim);
PanZezhong's avatar
PanZezhong committed
130
131
132
133
134
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
PanZezhong's avatar
init  
PanZezhong committed
135
    }
thatPepe's avatar
thatPepe committed
136
    tensor->_storage = Storage::createFromPool(size, pool);
PanZezhong's avatar
PanZezhong committed
137
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
PanZezhong's avatar
init  
PanZezhong committed
138
139
140
141
142
143
144
145
146
147
    tensor->_offset = 0;
    return tensor;
}

std::shared_ptr<Tensor> Tensor::weight(void *data, infiniDtype_t dtype,
                                       const std::vector<size_t> &shape) {
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
    auto strides = std::vector<ptrdiff_t>(ndim);
PanZezhong's avatar
PanZezhong committed
148
149
150
151
152
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
PanZezhong's avatar
init  
PanZezhong committed
153
    }
PanZezhong's avatar
PanZezhong committed
154

PanZezhong's avatar
PanZezhong committed
155
    tensor->_storage = Storage::create(size);
PanZezhong's avatar
PanZezhong committed
156
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
157
158
159
160
161
162
163
    // NOTE: 为兼容部分平台(沐曦)多线程并发对同一host数据执行memcpy卡死问题
    static std::mutex mutex;
    {
        std::lock_guard<std::mutex> lock(mutex);
        RUN_INFINI(infinirtMemcpy(tensor->_storage->memory(),
                                  data, size, INFINIRT_MEMCPY_H2D));
    }
PanZezhong's avatar
PanZezhong committed
164

PanZezhong's avatar
init  
PanZezhong committed
165
166
167
168
    tensor->_offset = 0;
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
169
170
std::shared_ptr<Tensor> Tensor::memShare(const std::vector<size_t> &shape, infiniDtype_t dtype_) const {
    auto dtype = dtype_ == INFINI_DTYPE_INVALID ? this->dtype() : dtype_;
171
    size_t size = std::accumulate(shape.begin(), shape.end(), dsize(dtype), std::multiplies<size_t>());
PanZezhong's avatar
PanZezhong committed
172
    ASSERT(size <= this->_storage->size());
173
174
175
176
177
178
179
180
181
182
183
184

    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    auto ndim = shape.size();
    auto strides = std::vector<ptrdiff_t>(ndim);
    if (ndim > 0) {
        strides[ndim - 1] = 1;
        for (int i = ndim - 2; i >= 0; i--) {
            strides[i] = strides[i + 1] * shape[i + 1];
        }
    }
    tensor->_storage = this->_storage;
    tensor->_offset = 0;
PanZezhong's avatar
PanZezhong committed
185
    tensor->_desc = TensorDesc::create(dtype, shape, strides);
186
187
188
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
189
void *Tensor::dataImpl(ptrdiff_t offset) const {
PanZezhong's avatar
PanZezhong committed
190
    return (char *)(this->_storage->memory()) + this->_offset + offset * dsize(this->dtype());
PanZezhong's avatar
init  
PanZezhong committed
191
192
193
}

void *Tensor::data(ptrdiff_t offset) {
PanZezhong's avatar
PanZezhong committed
194
    return this->dataImpl(offset);
PanZezhong's avatar
init  
PanZezhong committed
195
196
197
}

const void *Tensor::data(ptrdiff_t offset) const {
PanZezhong's avatar
PanZezhong committed
198
    return this->dataImpl(offset);
PanZezhong's avatar
init  
PanZezhong committed
199
200
}

PanZezhong's avatar
PanZezhong committed
201
202
void Tensor::copyFrom(std::shared_ptr<Tensor const> src,
                      infiniopHandle_t handle, infinirtStream_t stream) {
PanZezhong's avatar
init  
PanZezhong committed
203
204
205
206
    ASSERT_EQ(this->shape(), src->shape());
    ASSERT_EQ(this->dtype(), src->dtype());
    infiniopRearrangeDescriptor_t desc;
    RUN_INFINI(infiniopCreateRearrangeDescriptor(
PanZezhong's avatar
PanZezhong committed
207
        handle, &desc, this->desc(), src->desc()));
PanZezhong's avatar
init  
PanZezhong committed
208
209
210
211
212
    RUN_INFINI(infiniopRearrange(desc, this->data(), src->data(),
                                 stream));
    RUN_INFINI(infiniopDestroyRearrangeDescriptor(desc));
}

PanZezhong's avatar
PanZezhong committed
213
214
bool Tensor::isContigous() const {
    return this->_desc->isContigous();
PanZezhong's avatar
init  
PanZezhong committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
}

template <typename T>
void print_data(T *data, const std::vector<size_t> &shape,
                const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << data[i] << " ";
        }
        std::cout << std::endl;
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

template <>
void print_data(uint16_t const *data, const std::vector<size_t> &shape,
                const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << f16_to_f32(data[i * strides[dim]]) << " ";
        }
PanZezhong's avatar
PanZezhong committed
239
        std::cout << std::endl;
PanZezhong's avatar
init  
PanZezhong committed
240
241
242
243
244
245
246
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

PanZezhong's avatar
PanZezhong committed
247
248
249
250
251
252
253
254
255
256
257
258
259
260
void print_data_bf16(uint16_t const *data, const std::vector<size_t> &shape,
                     const std::vector<ptrdiff_t> &strides, size_t dim) {
    if (dim == shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            std::cout << bf16_to_f32(data[i * strides[dim]]) << " ";
        }
        std::cout << std::endl;
    } else if (dim < shape.size() - 1) {
        for (size_t i = 0; i < shape[dim]; i++) {
            print_data(data + i * strides[dim], shape, strides, dim + 1);
        }
    }
}

PanZezhong's avatar
PanZezhong committed
261
262
263
264
std::string Tensor::info() const {
    std::stringstream ss;

    ss << "Tensor: "
PanZezhong's avatar
PanZezhong committed
265
       << this->_desc->info()
PanZezhong's avatar
PanZezhong committed
266
267
       << " device=" << this->deviceType()
       << " device_id=" << this->deviceId();
PanZezhong's avatar
PanZezhong committed
268
    return this->_desc->info();
PanZezhong's avatar
PanZezhong committed
269
270
}

271
272
273
274
size_t Tensor::seed() const {
    return this->_desc->seed();
}

275
std::shared_ptr<Tensor> Tensor::view(const std::vector<size_t> &new_shape) const {
276
    // Step 1: Validate total size
277
    size_t numel = 1;
278
279
    for (size_t dim : this->_desc->shape()) {
        numel *= dim;
280
    }
281

282
    size_t new_numel = 1;
283
284
    for (size_t dim : new_shape) {
        new_numel *= dim;
285
286
    }

287
    ASSERT_EQ(numel, new_numel);
288

289
290
291
    // Step 2: Get current shape and strides
    const std::vector<size_t> &old_shape = this->_desc->shape();
    const std::vector<ptrdiff_t> &old_strides = this->_desc->strides();
292

293
294
295
    // Step 3: Create merged shape and strides
    std::vector<size_t> merged_shape;
    std::vector<ptrdiff_t> merged_strides;
296

297
298
299
    if (!old_shape.empty()) {
        merged_shape.push_back(old_shape[0]);
        merged_strides.push_back(old_strides[0]);
300

301
302
303
304
305
306
307
        for (size_t i = 1; i < old_shape.size(); ++i) {
            if (old_strides[i] * static_cast<ptrdiff_t>(old_shape[i]) == merged_strides.back()) {
                merged_shape.back() *= old_shape[i];
                merged_strides.back() = old_strides[i];
            } else {
                merged_shape.push_back(old_shape[i]);
                merged_strides.push_back(old_strides[i]);
308
            }
309
        }
310
311
    }

312
313
314
315
316
    // Step 4: Compute new strides by splitting merged dimensions
    std::vector<ptrdiff_t> new_strides(new_shape.size());
    size_t merged_idx = 0;
    ptrdiff_t current_stride = merged_strides[0];
    size_t remaining_size = merged_shape[0];
317

318
319
320
321
322
323
    for (size_t i = 0; i < new_shape.size(); ++i) {
        // Find which merged dimension contains this new dimension
        while (new_shape[i] > remaining_size) {
            ASSERT(++merged_idx < merged_shape.size());
            current_stride = merged_strides[merged_idx];
            remaining_size = merged_shape[merged_idx];
324
325
        }

326
327
328
329
        ASSERT_EQ(remaining_size % new_shape[i], 0);

        new_strides[i] = current_stride * (remaining_size / new_shape[i]);
        remaining_size /= new_shape[i];
330
331
    }

332
    return this->view_as(new_shape, new_strides);
333
334
}

335
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape) const {
336
337
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    tensor->_storage = this->_storage;
338
    tensor->_desc = TensorDesc::create(this->dtype(), new_shape);
339
340
341
342
    tensor->_offset = this->_offset;
    return tensor;
}

343
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape, const std::vector<ptrdiff_t> &new_strides) const {
344
345
    std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
    tensor->_storage = this->_storage;
346
    tensor->_desc = TensorDesc::create(this->dtype(), new_shape, new_strides);
347
348
349
350
    tensor->_offset = this->_offset;
    return tensor;
}

PanZezhong's avatar
PanZezhong committed
351
void Tensor::debug(const std::string &filename) const {
PanZezhong's avatar
PanZezhong committed
352
353
    RUN_INFINI(infinirtDeviceSynchronize());

PanZezhong's avatar
PanZezhong committed
354
    std::cout << info() << std::endl;
PanZezhong's avatar
PanZezhong committed
355

PanZezhong's avatar
init  
PanZezhong committed
356
    void const *cpu_data;
PanZezhong's avatar
PanZezhong committed
357
    if (this->deviceType() != INFINI_DEVICE_CPU) {
PanZezhong's avatar
PanZezhong committed
358
359
360
        void *cpu_memory = std::malloc(this->_storage->size());
        RUN_INFINI(infinirtMemcpy(cpu_memory, this->_storage->memory(),
                                  this->_storage->size(), INFINIRT_MEMCPY_D2H));
PanZezhong's avatar
init  
PanZezhong committed
361
362
        cpu_data = cpu_memory;
    } else {
PanZezhong's avatar
PanZezhong committed
363
        cpu_data = this->_storage->memory();
PanZezhong's avatar
init  
PanZezhong committed
364
365
366
367
368
369
370
371
    }

    if (!filename.empty()) {
        std::ofstream outFile(filename, std::ios::binary);
        if (!outFile) {
            std::cerr << "Error opening file for writing: " << filename << "\n";
            return;
        }
PanZezhong's avatar
PanZezhong committed
372
        outFile.write(reinterpret_cast<const char *>(cpu_data), this->_storage->size());
PanZezhong's avatar
init  
PanZezhong committed
373
374
375
376
377
        outFile.close();
        std::cout << "Data written to file: " << filename << "\n";
        return;
    }

PanZezhong's avatar
PanZezhong committed
378
    switch (this->dtype()) {
PanZezhong's avatar
init  
PanZezhong committed
379
    case INFINI_DTYPE_F16:
PanZezhong's avatar
PanZezhong committed
380
        print_data((uint16_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
381
382
383
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_F32:
PanZezhong's avatar
PanZezhong committed
384
        print_data((float const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
385
386
387
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_U64:
PanZezhong's avatar
PanZezhong committed
388
        print_data((uint64_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
389
390
391
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_I64:
PanZezhong's avatar
PanZezhong committed
392
        print_data((int64_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
393
394
395
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_U32:
PanZezhong's avatar
PanZezhong committed
396
        print_data((uint32_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
397
398
399
                   this->shape(), this->strides(), 0);
        break;
    case INFINI_DTYPE_I32:
PanZezhong's avatar
PanZezhong committed
400
        print_data((int32_t const *)((char const *)cpu_data + dataOffset()),
PanZezhong's avatar
init  
PanZezhong committed
401
402
                   this->shape(), this->strides(), 0);
        break;
PanZezhong's avatar
PanZezhong committed
403
404
405
406
    case INFINI_DTYPE_BF16:
        print_data_bf16((uint16_t const *)((char const *)cpu_data + dataOffset()),
                        this->shape(), this->strides(), 0);
        break;
PanZezhong's avatar
init  
PanZezhong committed
407
408
409
410
411
412
    default:
        PANIC("Unsupported data type");
    }
}

void Tensor::debug() const { this->debug(""); }