Commit 094651bd authored by Soumith Chintala's avatar Soumith Chintala
Browse files

first commit

parents
Load Audio files directly from PyTorch's Tensors
================================================
Audio library for PyTorch
* Support audio I/O (Load files)
Load the following formats into a torch Tensor
* mp3, wav, aac, ogg, flac, avr, cdda, cvs/vms,
* aiff, au, amr, mp2, mp4, ac3, avi, wmv,
* mpeg, ircam and any other format supported by libsox.
Dependencies
------------
* libsox v14.3.2 or above
Quick install on
OSX (Homebrew):
```bash
brew install sox
```
Linux (Ubuntu):
```bash
sudo apt-get install sox libsox-dev libsox-fmt-all
```
Installation
------------
```bash
python setup.py install
```
Quick Usage
-----------
```python
import torchaudio
sound, sample_rate = torchaudio.load('foo.mp3')
```
API Reference
-----------
torchaudio.load
```
loads an audio file into a Tensor
audio.load(
string, # path to file
out=None, # optionally pass output Tensor (any CPU Tensor type)
)
```
import os
import torch
from torch.utils.ffi import create_extension
this_file = os.path.dirname(__file__)
sources = ['torchaudio/src/th_sox.c']
headers = [
'torchaudio/src/th_sox.h',
]
defines = []
ffi = create_extension(
'torchaudio._ext.th_sox',
package=True,
headers=headers,
sources=sources,
define_macros=defines,
relative_to=__file__,
libraries=['sox'],
include_dirs=['torchaudio/src'],
)
if __name__ == '__main__':
ffi.build()
#!/usr/bin/env python
import os
import sys
from setuptools import setup, find_packages
import build
this_file = os.path.dirname(__file__)
setup(
name="torchaudio",
version="0.1",
description="An example project using PyTorch FFI",
url="https://github.com/pytorch/ffi-examples",
author="XYZ",
author_email="author@email.com",
# Require cffi.
install_requires=["cffi>=1.0.0"],
setup_requires=["cffi>=1.0.0"],
# Exclude the build files.
packages=find_packages(exclude=["build"]),
# Package where to put the extensions. Has to be a prefix of build.py.
ext_package="",
# Extensions to compile.
cffi_modules=[
os.path.join(this_file, "build.py:ffi")
],
)
import torch
import torch.nn as nn
import torchaudio
x, sample_rate = torchaudio.load("steam-train-whistle-daniel_simon.mp3")
print(sample_rate)
print(x.size())
print(x[10000])
print(x.min(), x.max())
print(x.mean(), x.std())
x, sample_rate = torchaudio.load("steam-train-whistle-daniel_simon.mp3",
out=torch.LongTensor())
print(sample_rate)
print(x.size())
print(x[10000])
print(x.min(), x.max())
import torch
from cffi import FFI
ffi = FFI()
from ._ext import th_sox
def load(filename, out=None):
if out is not None:
assert torch.is_tensor(out)
assert not out.is_cuda
else:
out = torch.FloatTensor()
if isinstance(out, torch.FloatTensor):
func = th_sox.libthsox_Float_read_audio_file
elif isinstance(out, torch.DoubleTensor):
func = th_sox.libthsox_Double_read_audio_file
elif isinstance(out, torch.ByteTensor):
func = th_sox.libthsox_Byte_read_audio_file
elif isinstance(out, torch.CharTensor):
func = th_sox.libthsox_Char_read_audio_file
elif isinstance(out, torch.ShortTensor):
func = th_sox.libthsox_Short_read_audio_file
elif isinstance(out, torch.IntTensor):
func = th_sox.libthsox_Int_read_audio_file
elif isinstance(out, torch.LongTensor):
func = th_sox.libthsox_Long_read_audio_file
sample_rate_p = ffi.new('int*')
func(bytes(filename), out, sample_rate_p)
sample_rate = sample_rate_p[0]
return out, sample_rate
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/th_sox.c"
#else
void libthsox_(read_audio)(sox_format_t *fd, THTensor* tensor,
int* sample_rate, size_t nsamples)
{
int nchannels = fd->signal.channels;
long buffer_size = fd->signal.length;
if (buffer_size == 0) {
if (nsamples != -1) {
buffer_size = nsamples;
} else {
THError("[read_audio] Unknown length");
}
}
*sample_rate = (int) fd->signal.rate;
int32_t *buffer = (int32_t *)malloc(sizeof(int32_t) * buffer_size);
size_t samples_read = sox_read(fd, buffer, buffer_size);
if (samples_read == 0)
THError("[read_audio] Empty file or read failed in sox_read");
// alloc tensor
THTensor_(resize2d)(tensor, samples_read / nchannels, nchannels );
real *tensor_data = THTensor_(data)(tensor);
// convert audio to dest tensor
int x,k;
for (x=0; x<samples_read/nchannels; x++) {
for (k=0; k<nchannels; k++) {
*tensor_data++ = (real)buffer[x*nchannels+k];
}
}
// free buffer and sox structures
free(buffer);
}
void libthsox_(read_audio_file)(const char *file_name, THTensor* tensor, int* sample_rate)
{
// Create sox objects and read into int32_t buffer
sox_format_t *fd;
fd = sox_open_read(file_name, NULL, NULL, NULL);
if (fd == NULL)
THError("[read_audio_file] Failure to read file");
libthsox_(read_audio)(fd, tensor, sample_rate, -1);
sox_close(fd);
}
#endif
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/th_sox.h"
#else
void libthsox_(read_audio_file)(const char *file_name, THTensor* tensor, int* sample_rate);
#endif
#include <TH/TH.h>
#include <sox.h>
#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
#define libthsox_(NAME) TH_CONCAT_4(libthsox_, Real, _, NAME)
#include "generic/th_sox.c"
#include "THGenerateAllTypes.h"
/* #include <TH/TH.h> */
/* #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) */
/* #define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor) */
/* #define libthsox_(NAME) TH_CONCAT_4(libthsox_, Real, _, NAME) */
/* #include "generic/th_sox.h" */
/* #include "THGenerateAllTypes.h" */
/* gcc -E th_sox.h -I /home/soumith/code/pytorch/torch/lib/include/TH -I /home/soumith/code/pytorch/torch/lib/include/ -I .|grep libthsox */
void libthsox_Float_read_audio_file(const char *file_name, THFloatTensor* tensor, int* sample_rate);
void libthsox_Double_read_audio_file(const char *file_name, THDoubleTensor* tensor, int* sample_rate);
void libthsox_Byte_read_audio_file(const char *file_name, THByteTensor* tensor, int* sample_rate);
void libthsox_Char_read_audio_file(const char *file_name, THCharTensor* tensor, int* sample_rate);
void libthsox_Short_read_audio_file(const char *file_name, THShortTensor* tensor, int* sample_rate);
void libthsox_Int_read_audio_file(const char *file_name, THIntTensor* tensor, int* sample_rate);
void libthsox_Long_read_audio_file(const char *file_name, THLongTensor* tensor, int* sample_rate);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment