first commit

094651bd · Soumith Chintala · 094651bd · 094651bd · 094651bd · 094651bd
Commit 094651bd authored May 04, 2017 by Soumith Chintala
10 changed files
--- a/README.md
+++ b/README.md
+Load Audio files directly from PyTorch's Tensors
+================================================
+
+Audio library for PyTorch
+ * Support audio I/O (Load files)
+
+Load the following formats into a torch Tensor
+ * mp3, wav, aac, ogg, flac, avr, cdda, cvs/vms,
+ * aiff, au, amr, mp2, mp4, ac3, avi, wmv,
+ * mpeg, ircam and any other format supported by libsox.
+
+Dependencies
+------------
+* libsox v14.3.2 or above
+
+Quick install on
+OSX (Homebrew):
+```bash
+brew install sox
+```
+Linux (Ubuntu):
+```bash
+sudo apt-get install sox libsox-dev libsox-fmt-all
+```
+
+Installation
+------------
+
+```bash
+python setup.py install
+```
+
+Quick Usage
+-----------
+
+```python
+import torchaudio
+sound, sample_rate = torchaudio.load('foo.mp3')
+```
+
+API Reference
+-----------
+torchaudio.load
+```
+loads an audio file into a Tensor
+audio.load(
+	string,  # path to file
+	out=None, # optionally pass output Tensor (any CPU Tensor type)
+)
+```
+
--- a/build.py
+++ b/build.py
+import os
+import torch
+from torch.utils.ffi import create_extension
+
+this_file = os.path.dirname(__file__)
+
+sources = ['torchaudio/src/th_sox.c']
+headers = [
+    'torchaudio/src/th_sox.h',
+]
+defines = []
+
+ffi = create_extension(
+    'torchaudio._ext.th_sox',
+    package=True,
+    headers=headers,
+    sources=sources,
+    define_macros=defines,
+    relative_to=__file__,
+    libraries=['sox'],
+    include_dirs=['torchaudio/src'],
+)
+
+if __name__ == '__main__':
+    ffi.build()
--- a/setup.py
+++ b/setup.py
+#!/usr/bin/env python
+
+import os
+import sys
+
+from setuptools import setup, find_packages
+
+import build
+
+this_file = os.path.dirname(__file__)
+
+setup(
+    name="torchaudio",
+    version="0.1",
+    description="An example project using PyTorch FFI",
+    url="https://github.com/pytorch/ffi-examples",
+    author="XYZ",
+    author_email="author@email.com",
+    # Require cffi.
+    install_requires=["cffi>=1.0.0"],
+    setup_requires=["cffi>=1.0.0"],
+    # Exclude the build files.
+    packages=find_packages(exclude=["build"]),
+    # Package where to put the extensions. Has to be a prefix of build.py.
+    ext_package="",
+    # Extensions to compile.
+    cffi_modules=[
+        os.path.join(this_file, "build.py:ffi")
+    ],
+)
--- a/test/steam-train-whistle-daniel_simon.mp3
+++ b/test/steam-train-whistle-daniel_simon.mp3
--- a/test/test.py
+++ b/test/test.py
+
+import torch
+import torch.nn as nn
+import torchaudio
+
+x, sample_rate = torchaudio.load("steam-train-whistle-daniel_simon.mp3")
+print(sample_rate)
+print(x.size())
+print(x[10000])
+print(x.min(), x.max())
+print(x.mean(), x.std())
+
+x, sample_rate = torchaudio.load("steam-train-whistle-daniel_simon.mp3",
+                                 out=torch.LongTensor())
+print(sample_rate)
+print(x.size())
+print(x[10000])
+print(x.min(), x.max())
--- a/torchaudio/__init__.py
+++ b/torchaudio/__init__.py
+import torch
+
+from cffi import FFI
+ffi = FFI()
+from ._ext import th_sox
+
+def load(filename, out=None):
+    if out is not None:
+        assert torch.is_tensor(out)
+        assert not out.is_cuda
+    else:
+        out = torch.FloatTensor()
+
+    if isinstance(out, torch.FloatTensor):
+        func = th_sox.libthsox_Float_read_audio_file
+    elif isinstance(out, torch.DoubleTensor):
+        func = th_sox.libthsox_Double_read_audio_file
+    elif isinstance(out, torch.ByteTensor):
+        func = th_sox.libthsox_Byte_read_audio_file
+    elif isinstance(out, torch.CharTensor):
+        func = th_sox.libthsox_Char_read_audio_file
+    elif isinstance(out, torch.ShortTensor):
+        func = th_sox.libthsox_Short_read_audio_file
+    elif isinstance(out, torch.IntTensor):
+        func = th_sox.libthsox_Int_read_audio_file
+    elif isinstance(out, torch.LongTensor):
+        func = th_sox.libthsox_Long_read_audio_file
+        
+    sample_rate_p = ffi.new('int*')    
+    func(bytes(filename), out, sample_rate_p)
+    sample_rate = sample_rate_p[0]
+    return out, sample_rate
--- a/torchaudio/src/generic/th_sox.c
+++ b/torchaudio/src/generic/th_sox.c
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/th_sox.c"
+#else
+
+void libthsox_(read_audio)(sox_format_t *fd, THTensor* tensor,
+                         int* sample_rate, size_t nsamples)
+{
+  int nchannels = fd->signal.channels;
+  long buffer_size = fd->signal.length;
+  if (buffer_size == 0) {
+    if (nsamples != -1) {
+      buffer_size = nsamples;
+    } else {
+      THError("[read_audio] Unknown length");
+    }
+  }
+  *sample_rate = (int) fd->signal.rate;
+  int32_t *buffer = (int32_t *)malloc(sizeof(int32_t) * buffer_size);
+  size_t samples_read = sox_read(fd, buffer, buffer_size);
+  if (samples_read == 0)
+    THError("[read_audio] Empty file or read failed in sox_read");
+  // alloc tensor
+  THTensor_(resize2d)(tensor, samples_read / nchannels, nchannels );
+  real *tensor_data = THTensor_(data)(tensor);
+  // convert audio to dest tensor
+  int x,k;
+  for (x=0; x<samples_read/nchannels; x++) {
+    for (k=0; k<nchannels; k++) {
+      *tensor_data++ = (real)buffer[x*nchannels+k];
+    }
+  }
+  // free buffer and sox structures
+  free(buffer);
+}
+
+void libthsox_(read_audio_file)(const char *file_name, THTensor* tensor, int* sample_rate)
+{
+  // Create sox objects and read into int32_t buffer
+  sox_format_t *fd;
+  fd = sox_open_read(file_name, NULL, NULL, NULL);
+  if (fd == NULL)
+    THError("[read_audio_file] Failure to read file");
+  libthsox_(read_audio)(fd, tensor, sample_rate, -1);
+  sox_close(fd);
+}
+
+#endif
--- a/torchaudio/src/generic/th_sox.h
+++ b/torchaudio/src/generic/th_sox.h
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/th_sox.h"
+#else
+
+void libthsox_(read_audio_file)(const char *file_name, THTensor* tensor, int* sample_rate);
+#endif
--- a/torchaudio/src/th_sox.c
+++ b/torchaudio/src/th_sox.c
+
+#include <TH/TH.h>
+
+#include <sox.h>
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
+#define libthsox_(NAME) TH_CONCAT_4(libthsox_, Real, _, NAME)
+
+#include "generic/th_sox.c"
+#include "THGenerateAllTypes.h"
+
--- a/torchaudio/src/th_sox.h
+++ b/torchaudio/src/th_sox.h
+/* #include <TH/TH.h> */
+
+/* #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) */
+/* #define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor) */
+/* #define libthsox_(NAME) TH_CONCAT_4(libthsox_, Real, _, NAME) */
+
+/* #include "generic/th_sox.h" */
+/* #include "THGenerateAllTypes.h" */
+
+/* gcc -E th_sox.h -I /home/soumith/code/pytorch/torch/lib/include/TH -I /home/soumith/code/pytorch/torch/lib/include/ -I .|grep libthsox */
+void libthsox_Float_read_audio_file(const char *file_name, THFloatTensor* tensor, int* sample_rate);
+void libthsox_Double_read_audio_file(const char *file_name, THDoubleTensor* tensor, int* sample_rate);
+void libthsox_Byte_read_audio_file(const char *file_name, THByteTensor* tensor, int* sample_rate);
+void libthsox_Char_read_audio_file(const char *file_name, THCharTensor* tensor, int* sample_rate);
+void libthsox_Short_read_audio_file(const char *file_name, THShortTensor* tensor, int* sample_rate);
+void libthsox_Int_read_audio_file(const char *file_name, THIntTensor* tensor, int* sample_rate);
+void libthsox_Long_read_audio_file(const char *file_name, THLongTensor* tensor, int* sample_rate);