mtmd-audio.h 928 Bytes
Newer Older
1
2
3
#pragma once

#include "ggml.h"
4
#include "clip-model.h"
5
6
7
8
9

#include <cstdint>
#include <vector>
#include <string>

10
#define MTMD_INTERNAL_HEADER
11

12
struct mtmd_audio_mel {
13
14
15
16
17
18
19
    int n_len;
    int n_len_org;
    int n_mel;

    std::vector<float> data;
};

20
21
struct mtmd_audio_preprocessor {
    const clip_hparams & hparams;
22

23
    mtmd_audio_preprocessor(const clip_ctx * ctx): hparams(*clip_get_hparams(ctx)) {}
24

25
26
27
28
    virtual ~mtmd_audio_preprocessor() = default;
    virtual void initialize() = 0; // NOT thread-safe
    virtual bool preprocess(const float * samples, size_t n_samples, std::vector<mtmd_audio_mel> & output) = 0;
};
29

30
31
32
33
34
struct mtmd_audio_preprocessor_whisper : mtmd_audio_preprocessor {
    mtmd_audio_preprocessor_whisper(const clip_ctx * ctx) : mtmd_audio_preprocessor(ctx) {}
    void initialize() override;
    bool preprocess(const float * samples, size_t n_samples, std::vector<mtmd_audio_mel> & output) override;
};