Commit 396ff7f5 authored by Naman Goyal's avatar Naman Goyal Committed by Facebook Github Bot
Browse files

installing numpy headers for cython

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/848

Differential Revision: D17060283

fbshipit-source-id: c7e61cae76a0566cc3e2ddc3ab4d48f8dec9d777
parent 3ab8e0fd
...@@ -15,8 +15,6 @@ import numpy as np ...@@ -15,8 +15,6 @@ import numpy as np
import sys import sys
import types import types
from fairseq.data.data_utils_fast import batch_by_size_fast
def infer_language_pair(path): def infer_language_pair(path):
"""Infer language pair from filename: <split>.<lang1>-<lang2>.(...).idx""" """Infer language pair from filename: <split>.<lang1>-<lang2>.(...).idx"""
...@@ -200,6 +198,12 @@ def batch_by_size( ...@@ -200,6 +198,12 @@ def batch_by_size(
required_batch_size_multiple (int, optional): require batch size to required_batch_size_multiple (int, optional): require batch size to
be a multiple of N (default: 1). be a multiple of N (default: 1).
""" """
try:
from fairseq.data.data_utils_fast import batch_by_size_fast
except ImportError:
raise ImportError(
'Please build Cython components with: `pip install --editable .`'
)
max_tokens = max_tokens if max_tokens is not None else sys.maxsize max_tokens = max_tokens if max_tokens is not None else sys.maxsize
max_sentences = max_sentences if max_sentences is not None else sys.maxsize max_sentences = max_sentences if max_sentences is not None else sys.maxsize
bsz_mult = required_batch_size_multiple bsz_mult = required_batch_size_multiple
......
...@@ -6,11 +6,6 @@ ...@@ -6,11 +6,6 @@
import numpy as np import numpy as np
import torch import torch
from fairseq.data.token_block_utils_fast import (
_get_slice_indices_fast,
_get_block_to_dataset_index_fast,
)
from fairseq.data import FairseqDataset, plasma_utils from fairseq.data import FairseqDataset, plasma_utils
...@@ -47,6 +42,16 @@ class TokenBlockDataset(FairseqDataset): ...@@ -47,6 +42,16 @@ class TokenBlockDataset(FairseqDataset):
include_targets=False, include_targets=False,
document_sep_len=1, document_sep_len=1,
): ):
try:
from fairseq.data.token_block_utils_fast import (
_get_slice_indices_fast,
_get_block_to_dataset_index_fast,
)
except ImportError:
raise ImportError(
'Please build Cython components with: `pip install --editable .`'
)
super().__init__() super().__init__()
self.dataset = dataset self.dataset = dataset
self.pad = pad self.pad = pad
......
...@@ -15,9 +15,12 @@ with open('README.md') as f: ...@@ -15,9 +15,12 @@ with open('README.md') as f:
readme = f.read() readme = f.read()
if sys.platform == 'darwin': if sys.platform == 'darwin':
extra_compile_args = ['-stdlib=libc++'] extra_compile_args = ['-stdlib=libc++', '-O3']
extra_link_args = ['-stdlib=libc++']
else: else:
extra_compile_args = ['-std=c++11'] extra_compile_args = ['-std=c++11', '-O3']
extra_link_args = ['-std=c++11']
bleu = Extension( bleu = Extension(
'fairseq.libbleu', 'fairseq.libbleu',
sources=[ sources=[
...@@ -27,8 +30,39 @@ bleu = Extension( ...@@ -27,8 +30,39 @@ bleu = Extension(
extra_compile_args=extra_compile_args, extra_compile_args=extra_compile_args,
) )
token_block_utils = [Extension("fairseq.data.token_block_utils_fast", ["fairseq/data/token_block_utils_fast.pyx"])]
data_utils_fast = [Extension("fairseq.data.data_utils_fast", ["fairseq/data/data_utils_fast.pyx"], language="c++")] def get_cython_modules():
token_block_utils = Extension(
"fairseq.data.token_block_utils_fast",
["fairseq/data/token_block_utils_fast.pyx"],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
data_utils_fast = Extension(
"fairseq.data.data_utils_fast",
["fairseq/data/data_utils_fast.pyx"],
language="c++",
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
return [token_block_utils, data_utils_fast]
def my_build_ext(pars):
"""
Delay loading of numpy headers.
More details: https://stackoverflow.com/questions/54117786/add-numpy-get-include-argument-to-setuptools-without-preinstalled-numpy
"""
from setuptools.command.build_ext import build_ext as _build_ext
class build_ext(_build_ext):
def finalize_options(self):
_build_ext.finalize_options(self)
__builtins__.__NUMPY_SETUP__ = False
import numpy
self.include_dirs.append(numpy.get_include())
return build_ext(pars)
setup( setup(
name='fairseq', name='fairseq',
...@@ -45,6 +79,7 @@ setup( ...@@ -45,6 +79,7 @@ setup(
long_description=readme, long_description=readme,
long_description_content_type='text/markdown', long_description_content_type='text/markdown',
setup_requires=[ setup_requires=[
'numpy',
'cython', 'cython',
'setuptools>=18.0', 'setuptools>=18.0',
], ],
...@@ -58,7 +93,7 @@ setup( ...@@ -58,7 +93,7 @@ setup(
'tqdm', 'tqdm',
], ],
packages=find_packages(exclude=['scripts', 'tests']), packages=find_packages(exclude=['scripts', 'tests']),
ext_modules=token_block_utils + data_utils_fast + [bleu], ext_modules=get_cython_modules() + [bleu],
test_suite='tests', test_suite='tests',
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
...@@ -71,5 +106,6 @@ setup( ...@@ -71,5 +106,6 @@ setup(
'fairseq-validate = fairseq_cli.validate:cli_main', 'fairseq-validate = fairseq_cli.validate:cli_main',
], ],
}, },
cmdclass={'build_ext': my_build_ext},
zip_safe=False, zip_safe=False,
) )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment