test_parfors.py

#
# Copyright (c) 2017 Intel Corporation
# SPDX-License-Identifier: BSD-2-Clause
#


import math
import re
import dis
import numbers
import os
import platform
import sys
import subprocess
import types as pytypes
import warnings
from functools import reduce
import numpy as np
from numpy.random import randn
import operator
from collections import defaultdict, namedtuple
import copy
from itertools import cycle, chain
import subprocess as subp

import numba.parfors.parfor
from numba import (njit, prange, parallel_chunksize,
                   get_parallel_chunksize, set_parallel_chunksize,
                   set_num_threads, get_num_threads, typeof)
from numba.core import (types, typing, errors, ir, rewrites,
                        typed_passes, inline_closurecall, config, compiler, cpu)
from numba.extending import (overload_method, register_model,
                             typeof_impl, unbox, NativeValue, models)
from numba.core.registry import cpu_target
from numba.core.annotations import type_annotations
from numba.core.ir_utils import (find_callname, guard, build_definitions,
                            get_definition, is_getitem, is_setitem,
                            index_var_of_get_setitem)
from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty
from numba.core.bytecode import ByteCodeIter
from numba.core.compiler import (compile_isolated, Flags, CompilerBase,
                                 DefaultPassBuilder)
from numba.core.compiler_machinery import register_pass, AnalysisPass
from numba.core.typed_passes import IRLegalization
from numba.tests.support import (TestCase, captured_stdout, MemoryLeakMixin,
                      override_env_config, linux_only, tag,
                      skip_parfors_unsupported, _32bit, needs_blas,
                      needs_lapack, disabled_test, skip_unless_scipy,
                      needs_subprocess)
from numba.core.extending import register_jitable
from numba.core.bytecode import _fix_LOAD_GLOBAL_arg
from numba.core import utils

import cmath
import unittest

# NOTE: Each parfors test class is run in separate subprocess, this is to reduce
# memory pressure in CI settings. The environment variable "SUBPROC_TEST" is
# used to determine whether a test is skipped or not, such that if you want to
# run any parfors test directly this environment variable can be set. The
# subprocesses running the test classes set this environment variable as the new
# process starts which enables the tests within the process. The decorator
# @needs_subprocess is used to ensure the appropriate test skips are made.


@skip_parfors_unsupported
class TestParforsRunner(TestCase):

    _numba_parallel_test_ = False

    # Each test class can run for 30 minutes before time out. Extend this to an
    # hour on aarch64 (some public CI systems were timing out).
    _TIMEOUT = 1800 if platform.machine() != 'aarch64' else 3600

    """This is the test runner for all the parfors tests, it runs them in
    subprocesses as described above. The convention for the test method naming
    is: `test_<TestClass>` where <TestClass> is the name of the test class in
    this module.
    """
    def runner(self):
        themod = self.__module__
        test_clazz_name = self.id().split('.')[-1].split('_')[-1]
        # don't specify a given test, it's an entire class that needs running
        self.subprocess_test_runner(test_module=themod,
                                    test_class=test_clazz_name,
                                    timeout=self._TIMEOUT)

    def test_TestParforBasic(self):
        self.runner()

    def test_TestParforNumericalMisc(self):
        self.runner()

    def test_TestParforNumPy(self):
        self.runner()

    def test_TestParfors(self):
        self.runner()

    def test_TestParforsBitMask(self):
        self.runner()

    def test_TestParforsDiagnostics(self):
        self.runner()

    def test_TestParforsLeaks(self):
        self.runner()

    def test_TestParforsMisc(self):
        self.runner()

    def test_TestParforsOptions(self):
        self.runner()

    def test_TestParforsSlice(self):
        self.runner()

    def test_TestParforsVectorizer(self):
        self.runner()

    def test_TestPrangeBasic(self):
        self.runner()

    def test_TestPrangeSpecific(self):
        self.runner()


x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test')

_GLOBAL_INT_FOR_TESTING1 = 17
_GLOBAL_INT_FOR_TESTING2 = 5

TestNamedTuple = namedtuple('TestNamedTuple', ('part0', 'part1'))


def null_comparer(a, b):
    """
    Used with check_arq_equality to indicate that we do not care
    whether the value of the parameter at the end of the function
    has a particular value.
    """
    pass


@needs_subprocess
class TestParforsBase(TestCase):
    """
    Base class for testing parfors.
    Provides functions for compilation and three way comparison between
    python functions, njit'd functions and parfor njit'd functions.
    """

    _numba_parallel_test_ = False

    def __init__(self, *args):
        # flags for njit()
        self.cflags = Flags()
        self.cflags.nrt = True

        # flags for njit(parallel=True)
        self.pflags = Flags()
        self.pflags.auto_parallel = cpu.ParallelOptions(True)
        self.pflags.nrt = True

        # flags for njit(parallel=True, fastmath=True)
        self.fast_pflags = Flags()
        self.fast_pflags.auto_parallel = cpu.ParallelOptions(True)
        self.fast_pflags.nrt = True
        self.fast_pflags.fastmath = cpu.FastMathOptions(True)
        super(TestParforsBase, self).__init__(*args)

    def _compile_this(self, func, sig, flags):
        return compile_isolated(func, sig, flags=flags)

    def compile_parallel(self, func, sig):
        return self._compile_this(func, sig, flags=self.pflags)

    def compile_parallel_fastmath(self, func, sig):
        return self._compile_this(func, sig, flags=self.fast_pflags)

    def compile_njit(self, func, sig):
        return self._compile_this(func, sig, flags=self.cflags)

    def compile_all(self, pyfunc, *args, **kwargs):
        sig = tuple([numba.typeof(x) for x in args])

        # compile the prange injected function
        cpfunc = self.compile_parallel(pyfunc, sig)

        # compile a standard njit of the original function
        cfunc = self.compile_njit(pyfunc, sig)

        return cfunc, cpfunc

    def check_parfors_vs_others(self, pyfunc, cfunc, cpfunc, *args, **kwargs):
        """
        Checks python, njit and parfor impls produce the same result.

        Arguments:
            pyfunc - the python function to test
            cfunc - CompilerResult from njit of pyfunc
            cpfunc - CompilerResult from njit(parallel=True) of pyfunc
            args - arguments for the function being tested
        Keyword Arguments:
            scheduler_type - 'signed', 'unsigned' or None, default is None.
                           Supply in cases where the presence of a specific
                           scheduler is to be asserted.
            fastmath_pcres - a fastmath parallel compile result, if supplied
                             will be run to make sure the result is correct
            check_arg_equality - some functions need to check that a
                                 parameter is modified rather than a certain
                                 value returned.  If this keyword argument
                                 is supplied, it should be a list of
                                 comparison functions such that the i'th
                                 function in the list is used to compare the
                                 i'th parameter of the njit and parallel=True
                                 functions against the i'th parameter of the
                                 standard Python function, asserting if they
                                 differ.  The length of this list must be equal
                                 to the number of parameters to the function.
                                 The null comparator is available for use
                                 when you do not desire to test if some
                                 particular parameter is changed.
            Remaining kwargs are passed to np.testing.assert_almost_equal
        """
        scheduler_type = kwargs.pop('scheduler_type', None)
        check_fastmath = kwargs.pop('check_fastmath', None)
        fastmath_pcres = kwargs.pop('fastmath_pcres', None)
        check_scheduling = kwargs.pop('check_scheduling', True)
        check_args_for_equality = kwargs.pop('check_arg_equality', None)

        def copy_args(*args):
            if not args:
                return tuple()
            new_args = []
            for x in args:
                if isinstance(x, np.ndarray):
                    new_args.append(x.copy('k'))
                elif isinstance(x, np.number):
                    new_args.append(x.copy())
                elif isinstance(x, numbers.Number):
                    new_args.append(x)
                elif x is None:
                    new_args.append(x)
                elif isinstance(x, tuple):
                    new_args.append(copy.deepcopy(x))
                elif isinstance(x, list):
                    new_args.append(x[:])
                else:
                    raise ValueError('Unsupported argument type encountered')
            return tuple(new_args)

        # python result
        py_args = copy_args(*args)
        py_expected = pyfunc(*py_args)

        # njit result
        njit_args = copy_args(*args)
        njit_output = cfunc.entry_point(*njit_args)

        # parfor result
        parfor_args = copy_args(*args)
        parfor_output = cpfunc.entry_point(*parfor_args)

        if check_args_for_equality is None:
            np.testing.assert_almost_equal(njit_output, py_expected, **kwargs)
            np.testing.assert_almost_equal(parfor_output, py_expected, **kwargs)
            self.assertEqual(type(njit_output), type(parfor_output))
        else:
            assert(len(py_args) == len(check_args_for_equality))
            for pyarg, njitarg, parforarg, argcomp in zip(
                py_args, njit_args, parfor_args, check_args_for_equality):
                argcomp(njitarg, pyarg, **kwargs)
                argcomp(parforarg, pyarg, **kwargs)

        if check_scheduling:
            self.check_scheduling(cpfunc, scheduler_type)

        # if requested check fastmath variant
        if fastmath_pcres is not None:
            parfor_fastmath_output = fastmath_pcres.entry_point(*copy_args(*args))
            np.testing.assert_almost_equal(parfor_fastmath_output, py_expected,
                                           **kwargs)

    def check(self, pyfunc, *args, **kwargs):
        """Checks that pyfunc compiles for *args under parallel=True and njit
        and asserts that all version execute and produce the same result"""
        cfunc, cpfunc = self.compile_all(pyfunc, *args)
        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)

    def check_variants(self, impl, arg_gen, **kwargs):
        """Run self.check(impl, ...) on array data generated from arg_gen.
        """
        for args in arg_gen():
            with self.subTest(list(map(typeof, args))):
                self.check(impl, *args, **kwargs)

    def count_parfors_variants(self, impl, arg_gen, **kwargs):
        """Run self.countParfors(impl, ...) on array types generated from
        arg_gen.
        """
        for args in arg_gen():
            with self.subTest(list(map(typeof, args))):
                argtys = tuple(map(typeof, args))
                # At least one parfors
                self.assertGreaterEqual(countParfors(impl, argtys), 1)

    def check_scheduling(self, cres, scheduler_type):
        # make sure parfor set up scheduling
        scheduler_str = '@do_scheduling'
        if scheduler_type is not None:
            if scheduler_type in ['signed', 'unsigned']:
                scheduler_str += '_' + scheduler_type
            else:
                msg = "Unknown scheduler_type specified: %s"
                raise ValueError(msg % scheduler_type)

        self.assertIn(scheduler_str, cres.library.get_llvm_str())

    def gen_linspace(self, n, ct):
        """Make *ct* sample 1D arrays of length *n* using np.linspace().
        """
        def gen():
            yield np.linspace(0, 1, n)
            yield np.linspace(2, 1, n)
            yield np.linspace(1, 2, n)

        src = cycle(gen())
        return [next(src) for i in range(ct)]

    def gen_linspace_variants(self, ct):
        """Make 1D, 2D, 3D variants of the data in C and F orders
        """
        # 1D
        yield self.gen_linspace(10, ct=ct)

        # 2D
        arr2ds = [x.reshape((2, 3))
                  for x in self.gen_linspace(n=2 * 3, ct=ct)]
        yield arr2ds
        # Fortran order
        yield [np.asfortranarray(x) for x in arr2ds]

        # 3D
        arr3ds = [x.reshape((2, 3, 4))
                  for x in self.gen_linspace(n=2 * 3 * 4, ct=ct)]
        yield arr3ds
        # Fortran order
        yield [np.asfortranarray(x) for x in arr3ds]

    def _filter_mod(self, mod, magicstr, checkstr=None):
        """ helper function to filter out modules by name"""
        filt = [x for x in mod if magicstr in x.name]
        if checkstr is not None:
            for x in filt:
                assert checkstr in str(x)
        return filt

    def _get_gufunc_modules(self, cres, magicstr, checkstr=None):
        """ gets the gufunc LLVM Modules"""
        _modules = [x for x in cres.library._codegen._engine._ee._modules]
        return self._filter_mod(_modules, magicstr, checkstr=checkstr)

    def _get_gufunc_info(self, cres, fn):
        """ helper for gufunc IR/asm generation"""
        # get the gufunc modules
        magicstr = '__numba_parfor_gufunc'
        gufunc_mods = self._get_gufunc_modules(cres, magicstr)
        x = dict()
        for mod in gufunc_mods:
            x[mod.name] = fn(mod)
        return x

    def _get_gufunc_ir(self, cres):
        """
        Returns the IR of the gufuncs used as parfor kernels
        as a dict mapping the gufunc name to its IR.

        Arguments:
         cres - a CompileResult from `njit(parallel=True, ...)`
        """
        return self._get_gufunc_info(cres, str)

    def _get_gufunc_asm(self, cres):
        """
        Returns the assembly of the gufuncs used as parfor kernels
        as a dict mapping the gufunc name to its assembly.

        Arguments:
         cres - a CompileResult from `njit(parallel=True, ...)`
        """
        tm = cres.library._codegen._tm
        def emit_asm(mod):
            return str(tm.emit_assembly(mod))
        return self._get_gufunc_info(cres, emit_asm)

    def assert_fastmath(self, pyfunc, sig):
        """
        Asserts that the fastmath flag has some effect in that suitable
        instructions are now labelled as `fast`. Whether LLVM can actually do
        anything to optimise better now the derestrictions are supplied is
        another matter!

        Arguments:
         pyfunc - a function that contains operations with parallel semantics
         sig - the type signature of pyfunc
        """

        cres = self.compile_parallel_fastmath(pyfunc, sig)
        _ir = self._get_gufunc_ir(cres)

        def _get_fast_instructions(ir):
            splitted = ir.splitlines()
            fast_inst = []
            for x in splitted:
                m = re.search(r'\bfast\b', x)  # \b for wholeword
                if m is not None:
                    fast_inst.append(x)
            return fast_inst

        def _assert_fast(instrs):
            ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp', 'call')
            for inst in instrs:
                count = 0
                for op in ops:
                    match = op + ' fast'
                    if match in inst:
                        count += 1
                self.assertTrue(count > 0)

        for name, guir in _ir.items():
            inst = _get_fast_instructions(guir)
            _assert_fast(inst)


def blackscholes_impl(sptprice, strike, rate, volatility, timev):
    # blackscholes example
    logterm = np.log(sptprice / strike)
    powterm = 0.5 * volatility * volatility
    den = volatility * np.sqrt(timev)
    d1 = (((rate + powterm) * timev) + logterm) / den
    d2 = d1 - den
    NofXd1 = 0.5 + 0.5 * 2.0 * d1
    NofXd2 = 0.5 + 0.5 * 2.0 * d2
    futureValue = strike * np.exp(- rate * timev)
    c1 = futureValue * NofXd2
    call = sptprice * NofXd1 - c1
    put = call - futureValue + sptprice
    return put


def lr_impl(Y, X, w, iterations):
    # logistic regression example
    for i in range(iterations):
        w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
    return w

def example_kmeans_test(A, numCenter, numIter, init_centroids):
    centroids = init_centroids
    N, D = A.shape

    for l in range(numIter):
        dist = np.array([[math.sqrt(np.sum((A[i,:]-centroids[j,:])**2))
                                for j in range(numCenter)] for i in range(N)])
        labels = np.array([dist[i,:].argmin() for i in range(N)])

        centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i)
                                 for j in range(D)] for i in range(numCenter)])

    return centroids

def get_optimized_numba_ir(test_func, args, **kws):
    typingctx = typing.Context()
    targetctx = cpu.CPUContext(typingctx, 'cpu')
    test_ir = compiler.run_frontend(test_func)
    if kws:
        options = cpu.ParallelOptions(kws)
    else:
        options = cpu.ParallelOptions(True)

    tp = TestPipeline(typingctx, targetctx, args, test_ir)

    with cpu_target.nested_context(typingctx, targetctx):
        typingctx.refresh()
        targetctx.refresh()

        inline_pass = inline_closurecall.InlineClosureCallPass(tp.state.func_ir,
                                                               options,
                                                               typed=True)
        inline_pass.run()

        rewrites.rewrite_registry.apply('before-inference', tp.state)

        tp.state.typemap, tp.state.return_type, tp.state.calltypes, _ = \
        typed_passes.type_inference_stage(tp.state.typingctx,
            tp.state.targetctx, tp.state.func_ir, tp.state.args, None)

        type_annotations.TypeAnnotation(
            func_ir=tp.state.func_ir,
            typemap=tp.state.typemap,
            calltypes=tp.state.calltypes,
            lifted=(),
            lifted_from=None,
            args=tp.state.args,
            return_type=tp.state.return_type,
            html_output=config.HTML)

        diagnostics = numba.parfors.parfor.ParforDiagnostics()

        preparfor_pass = numba.parfors.parfor.PreParforPass(
            tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
            tp.state.typingctx, tp.state.targetctx, options,
            swapped=diagnostics.replaced_fns)
        preparfor_pass.run()

        rewrites.rewrite_registry.apply('after-inference', tp.state)

        flags = compiler.Flags()
        parfor_pass = numba.parfors.parfor.ParforPass(
            tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
            tp.state.return_type, tp.state.typingctx, tp.state.targetctx,
            options, flags, tp.state.metadata, diagnostics=diagnostics)
        parfor_pass.run()
        parfor_pass = numba.parfors.parfor.ParforFusionPass(
            tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
            tp.state.return_type, tp.state.typingctx, tp.state.targetctx,
            options, flags, tp.state.metadata, diagnostics=diagnostics)
        parfor_pass.run()
        parfor_pass = numba.parfors.parfor.ParforPreLoweringPass(
            tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
            tp.state.return_type, tp.state.typingctx, tp.state.targetctx,
            options, flags, tp.state.metadata, diagnostics=diagnostics)
        parfor_pass.run()
        test_ir._definitions = build_definitions(test_ir.blocks)

    return test_ir, tp

def countParfors(test_func, args, **kws):
    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
    ret_count = 0

    for label, block in test_ir.blocks.items():
        for i, inst in enumerate(block.body):
            if isinstance(inst, numba.parfors.parfor.Parfor):
                ret_count += 1

    return ret_count


def countArrays(test_func, args, **kws):
    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
    return _count_arrays_inner(test_ir.blocks, tp.state.typemap)

def get_init_block_size(test_func, args, **kws):
    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
    blocks = test_ir.blocks

    ret_count = 0

    for label, block in blocks.items():
        for i, inst in enumerate(block.body):
            if isinstance(inst, numba.parfors.parfor.Parfor):
                ret_count += len(inst.init_block.body)

    return ret_count

def _count_arrays_inner(blocks, typemap):
    ret_count = 0
    arr_set = set()

    for label, block in blocks.items():
        for i, inst in enumerate(block.body):
            if isinstance(inst, numba.parfors.parfor.Parfor):
                parfor_blocks = inst.loop_body.copy()
                parfor_blocks[0] = inst.init_block
                ret_count += _count_arrays_inner(parfor_blocks, typemap)
            if (isinstance(inst, ir.Assign)
                    and isinstance(typemap[inst.target.name],
                                    types.ArrayCompatible)):
                arr_set.add(inst.target.name)

    ret_count += len(arr_set)
    return ret_count

def countArrayAllocs(test_func, args, **kws):
    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
    ret_count = 0

    for block in test_ir.blocks.values():
        ret_count += _count_array_allocs_inner(test_ir, block)

    return ret_count

def _count_array_allocs_inner(func_ir, block):
    ret_count = 0
    for inst in block.body:
        if isinstance(inst, numba.parfors.parfor.Parfor):
            ret_count += _count_array_allocs_inner(func_ir, inst.init_block)
            for b in inst.loop_body.values():
                ret_count += _count_array_allocs_inner(func_ir, b)

        if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr)
                and inst.value.op == 'call'
                and (guard(find_callname, func_ir, inst.value) == ('empty', 'numpy')
                or guard(find_callname, func_ir, inst.value)
                    == ('empty_inferred', 'numba.np.unsafe.ndarray'))):
            ret_count += 1

    return ret_count

def countNonParforArrayAccesses(test_func, args, **kws):
    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
    return _count_non_parfor_array_accesses_inner(test_ir, test_ir.blocks,
                                                  tp.state.typemap)

def _count_non_parfor_array_accesses_inner(f_ir, blocks, typemap, parfor_indices=None):
    ret_count = 0
    if parfor_indices is None:
        parfor_indices = set()

    for label, block in blocks.items():
        for stmt in block.body:
            if isinstance(stmt, numba.parfors.parfor.Parfor):
                parfor_indices.add(stmt.index_var.name)
                parfor_blocks = stmt.loop_body.copy()
                parfor_blocks[0] = stmt.init_block
                ret_count += _count_non_parfor_array_accesses_inner(
                    f_ir, parfor_blocks, typemap, parfor_indices)

            # getitem
            elif (is_getitem(stmt) and isinstance(typemap[stmt.value.value.name],
                        types.ArrayCompatible) and not _uses_indices(
                        f_ir, index_var_of_get_setitem(stmt), parfor_indices)):
                ret_count += 1

            # setitem
            elif (is_setitem(stmt) and isinstance(typemap[stmt.target.name],
                    types.ArrayCompatible) and not _uses_indices(
                    f_ir, index_var_of_get_setitem(stmt), parfor_indices)):
                ret_count += 1

            # find parfor_index aliases
            elif (isinstance(stmt, ir.Assign) and
                  isinstance(stmt.value, ir.Var) and
                  stmt.value.name in parfor_indices):
                parfor_indices.add(stmt.target.name)

    return ret_count

def _uses_indices(f_ir, index, index_set):
    if index.name in index_set:
        return True

    ind_def = guard(get_definition, f_ir, index)
    if isinstance(ind_def, ir.Expr) and ind_def.op == 'build_tuple':
        varnames = set(v.name for v in ind_def.items)
        return len(varnames & index_set) != 0

    return False


class TestPipeline(object):
    def __init__(self, typingctx, targetctx, args, test_ir):
        self.state = compiler.StateDict()
        self.state.typingctx = typingctx
        self.state.targetctx = targetctx
        self.state.args = args
        self.state.func_ir = test_ir
        self.state.typemap = None
        self.state.return_type = None
        self.state.calltypes = None
        self.state.metadata = {}


@skip_parfors_unsupported
class TestParforBasic(TestParforsBase):
    """Smoke tests for the parfors transforms. These tests check the most basic
    functionality"""

    def __init__(self, *args):
        TestParforsBase.__init__(self, *args)
        # these are used in the mass of simple tests
        m = np.reshape(np.arange(12.), (3, 4))
        self.simple_args = [np.arange(3.), np.arange(4.), m, m.T]

    def test_simple01(self):
        def test_impl():
            return np.ones(())
        with self.assertRaises(AssertionError) as raises:
            self.check(test_impl)
        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))

    def test_simple02(self):
        def test_impl():
            return np.ones((1,))
        self.check(test_impl)

    def test_simple03(self):
        def test_impl():
            return np.ones((1, 2))
        self.check(test_impl)

    def test_simple04(self):
        def test_impl():
            return np.ones(1)
        self.check(test_impl)

    def test_simple07(self):
        def test_impl():
            return np.ones((1, 2), dtype=np.complex128)
        self.check(test_impl)

    def test_simple08(self):
        def test_impl():
            return np.ones((1, 2)) + np.ones((1, 2))
        self.check(test_impl)

    def test_simple09(self):
        def test_impl():
            return np.ones((1, 1))
        self.check(test_impl)

    def test_simple10(self):
        def test_impl():
            return np.ones((0, 0))
        self.check(test_impl)

    def test_simple11(self):
        def test_impl():
            return np.ones((10, 10)) + 1.
        self.check(test_impl)

    def test_simple12(self):
        def test_impl():
            return np.ones((10, 10)) + np.complex128(1.)
        self.check(test_impl)

    def test_simple13(self):
        def test_impl():
            return np.complex128(1.)
        with self.assertRaises(AssertionError) as raises:
            self.check(test_impl)
        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))

    def test_simple14(self):
        def test_impl():
            return np.ones((10, 10))[0::20]
        self.check(test_impl)

    def test_simple15(self):
        def test_impl(v1, v2, m1, m2):
            return v1 + v1
        self.check(test_impl, *self.simple_args)

    def test_simple16(self):
        def test_impl(v1, v2, m1, m2):
            return m1 + m1
        self.check(test_impl, *self.simple_args)

    def test_simple17(self):
        def test_impl(v1, v2, m1, m2):
            return m2 + v1
        self.check(test_impl, *self.simple_args)

    @needs_lapack
    def test_simple18(self):
        def test_impl(v1, v2, m1, m2):
            return m1.T + np.linalg.svd(m2)[1]
        self.check(test_impl, *self.simple_args)

    @needs_blas
    def test_simple19(self):
        def test_impl(v1, v2, m1, m2):
            return np.dot(m1, v2)
        self.check(test_impl, *self.simple_args)

    @needs_blas
    def test_simple20(self):
        def test_impl(v1, v2, m1, m2):
            return np.dot(m1, m2)
        # gemm is left to BLAS
        with self.assertRaises(AssertionError) as raises:
            self.check(test_impl, *self.simple_args)
        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))

    @needs_blas
    def test_simple21(self):
        def test_impl(v1, v2, m1, m2):
            return np.dot(v1, v1)
        self.check(test_impl, *self.simple_args)

    def test_simple22(self):
        def test_impl(v1, v2, m1, m2):
            return np.sum(v1 + v1)
        self.check(test_impl, *self.simple_args)

    def test_simple23(self):
        def test_impl(v1, v2, m1, m2):
            x = 2 * v1
            y = 2 * v1
            return 4 * np.sum(x**2 + y**2 < 1) / 10
        self.check(test_impl, *self.simple_args)

    def test_simple24(self):
        def test_impl():
            n = 20
            A = np.ones((n, n))
            b = np.arange(n)
            return np.sum(A[:, b])
        self.check(test_impl)

    @disabled_test
    def test_simple_operator_15(self):
        """same as corresponding test_simple_<n> case but using operator.add"""
        def test_impl(v1, v2, m1, m2):
            return operator.add(v1, v1)

        self.check(test_impl, *self.simple_args)

    @disabled_test
    def test_simple_operator_16(self):
        def test_impl(v1, v2, m1, m2):
            return operator.add(m1, m1)

        self.check(test_impl, *self.simple_args)

    @disabled_test
    def test_simple_operator_17(self):
        def test_impl(v1, v2, m1, m2):
            return operator.add(m2, v1)

        self.check(test_impl, *self.simple_args)

    def test_inplace_alias(self):
        # issue7201
        def test_impl(a):
            a += 1
            a[:] = 3

        def comparer(a, b):
            np.testing.assert_equal(a, b)

        x = np.ones(1)
        self.check(test_impl, x, check_arg_equality=[comparer])


@skip_parfors_unsupported
class TestParforNumericalMisc(TestParforsBase):
    """ Miscellaneous 'classical' numerical tests """

    def test_pi(self):
        def test_impl(n):
            x = 2 * np.random.ranf(n) - 1
            y = 2 * np.random.ranf(n) - 1
            return 4 * np.sum(x**2 + y**2 < 1) / n

        self.check(test_impl, 100000, decimal=1)
        self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
        self.assertEqual(countArrays(test_impl, (types.intp,)), 0)

    def test_blackscholes(self):
        # blackscholes takes 5 1D float array args
        args = (numba.float64[:], ) * 5
        self.assertEqual(countParfors(blackscholes_impl, args), 1)

    @needs_blas
    def test_logistic_regression(self):
        args = (numba.float64[:], numba.float64[:,:], numba.float64[:],
                numba.int64)
        self.assertEqual(countParfors(lr_impl, args), 2)
        self.assertEqual(countArrayAllocs(lr_impl, args), 1)

    def test_kmeans(self):
        np.random.seed(0)
        N = 1024
        D = 10
        centers = 3
        A = np.random.ranf((N, D))
        init_centroids = np.random.ranf((centers, D))
        self.check(example_kmeans_test, A, centers, 3, init_centroids,
                                                                    decimal=1)
        # TODO: count parfors after k-means fusion is working
        # requires recursive parfor counting
        arg_typs = (types.Array(types.float64, 2, 'C'), types.intp, types.intp,
                    types.Array(types.float64, 2, 'C'))
        self.assertEqual(
            countNonParforArrayAccesses(example_kmeans_test, arg_typs), 0)


@skip_parfors_unsupported
class TestParforNumPy(TestParforsBase):
    """Tests NumPy functionality under parfors"""

    @needs_blas
    def test_mvdot(self):
        def test_impl(a, v):
            return np.dot(a, v)

        A = np.linspace(0, 1, 20).reshape(2, 10)
        v = np.linspace(2, 1, 10)

        self.check(test_impl, A, v)

    def test_fuse_argmin_argmax_max_min(self):
        for op in [np.argmin, np.argmax, np.min, np.max]:
            def test_impl(n):
                A = np.ones(n)
                C = op(A)
                B = A.sum()
                return B + C
            self.check(test_impl, 256)
            self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
            self.assertEqual(countArrays(test_impl, (types.intp,)), 0)

    def test_np_random_func_direct_import(self):
        def test_impl(n):
            A = randn(n)
            return A[0]
        self.assertEqual(countParfors(test_impl, (types.int64, )), 1)

    def test_arange(self):
        # test with stop only
        def test_impl1(n):
            return np.arange(n)
        # start and stop
        def test_impl2(s, n):
            return np.arange(s, n)
        # start, step, stop
        def test_impl3(s, n, t):
            return np.arange(s, n, t)

        for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]:
            self.check(test_impl1, arg)
            self.check(test_impl2, 2, arg)
            self.check(test_impl3, 2, arg, 2)

    def test_arange_dtype(self):
        # test with stop only
        def test_impl1(n):
            return np.arange(n, dtype=np.float32)
        # start and stop
        def test_impl2(s, n):
            return np.arange(s, n, dtype=np.float32)
        # start, step, stop
        def test_impl3(s, n, t):
            return np.arange(s, n, t, dtype=np.float32)

        for arg in [11, 128, 30.0]:
            self.check(test_impl1, arg)
            self.check(test_impl2, 2, arg)
            self.check(test_impl3, 2, arg, 2)

    def test_linspace(self):
        # without num
        def test_impl1(start, stop):
            return np.linspace(start, stop)
        # with num
        def test_impl2(start, stop, num):
            return np.linspace(start, stop, num)

        for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]:
            self.check(test_impl1, 2, arg)
            self.check(test_impl2, 2, arg, 30)

    def test_mean(self):
        def test_impl(A):
            return A.mean()
        N = 100
        A = np.random.ranf(N)
        B = np.random.randint(10, size=(N, 3))
        self.check(test_impl, A)
        self.check(test_impl, B)
        self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )), 1)
        self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )), 1)

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl, data_gen)
        self.count_parfors_variants(test_impl, data_gen)

    def test_var(self):
        def test_impl(A):
            return A.var()
        N = 100
        A = np.random.ranf(N)
        B = np.random.randint(10, size=(N, 3))
        C = A + 1j * A
        self.check(test_impl, A)
        self.check(test_impl, B)
        self.check(test_impl, C)
        self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )), 2)
        self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )), 2)

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl, data_gen)
        self.count_parfors_variants(test_impl, data_gen)

    def test_std(self):
        def test_impl(A):
            return A.std()
        N = 100
        A = np.random.ranf(N)
        B = np.random.randint(10, size=(N, 3))
        C = A + 1j * A
        self.check(test_impl, A)
        self.check(test_impl, B)
        self.check(test_impl, C)
        argty = (types.Array(types.float64, 1, 'C'),)
        self.assertEqual(countParfors(test_impl, argty), 2)
        self.assertEqual(countParfors(test_impl, argty), 2)

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl, data_gen)
        self.count_parfors_variants(test_impl, data_gen)

    def test_random_parfor(self):
        """
        Test function with only a random call to make sure a random function
        like ranf is actually translated to a parfor.
        """
        def test_impl(n):
            A = np.random.ranf((n, n))
            return A
        self.assertEqual(countParfors(test_impl, (types.int64, )), 1)

    def test_randoms(self):
        def test_impl(n):
            A = np.random.standard_normal(size=(n, n))
            B = np.random.randn(n, n)
            C = np.random.normal(0.0, 1.0, (n, n))
            D = np.random.chisquare(1.0, (n, n))
            E = np.random.randint(1, high=3, size=(n, n))
            F = np.random.triangular(1, 2, 3, (n, n))
            return np.sum(A+B+C+D+E+F)

        n = 128
        cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),))
        parfor_output = cpfunc.entry_point(n)
        py_output = test_impl(n)
        # check results within 5% since random numbers generated in parallel
        np.testing.assert_allclose(parfor_output, py_output, rtol=0.05)
        self.assertEqual(countParfors(test_impl, (types.int64, )), 1)

    def test_dead_randoms(self):
        def test_impl(n):
            A = np.random.standard_normal(size=(n, n))
            B = np.random.randn(n, n)
            C = np.random.normal(0.0, 1.0, (n, n))
            D = np.random.chisquare(1.0, (n, n))
            E = np.random.randint(1, high=3, size=(n, n))
            F = np.random.triangular(1, 2, 3, (n, n))
            return 3

        n = 128
        cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),))
        parfor_output = cpfunc.entry_point(n)
        py_output = test_impl(n)
        self.assertEqual(parfor_output, py_output)
        self.assertEqual(countParfors(test_impl, (types.int64, )), 0)

    def test_min(self):
        def test_impl1(A):
            return A.min()

        def test_impl2(A):
            return np.min(A)

        n = 211
        A = np.random.ranf(n)
        B = np.random.randint(10, size=n).astype(np.int32)
        C = np.random.ranf((n, n))  # test multi-dimensional array
        D = np.array([np.inf, np.inf])
        self.check(test_impl1, A)
        self.check(test_impl1, B)
        self.check(test_impl1, C)
        self.check(test_impl1, D)
        self.check(test_impl2, A)
        self.check(test_impl2, B)
        self.check(test_impl2, C)
        self.check(test_impl2, D)

        # checks that 0d array input raises
        msg = ("zero-size array to reduction operation "
               "minimum which has no identity")
        for impl in (test_impl1, test_impl2):
            pcfunc = self.compile_parallel(impl, (types.int64[:],))
            with self.assertRaises(ValueError) as e:
                pcfunc.entry_point(np.array([], dtype=np.int64))
            self.assertIn(msg, str(e.exception))

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl1, data_gen)
        self.count_parfors_variants(test_impl1, data_gen)
        self.check_variants(test_impl2, data_gen)
        self.count_parfors_variants(test_impl2, data_gen)

    def test_max(self):
        def test_impl1(A):
            return A.max()

        def test_impl2(A):
            return np.max(A)

        n = 211
        A = np.random.ranf(n)
        B = np.random.randint(10, size=n).astype(np.int32)
        C = np.random.ranf((n, n))  # test multi-dimensional array
        D = np.array([-np.inf, -np.inf])
        self.check(test_impl1, A)
        self.check(test_impl1, B)
        self.check(test_impl1, C)
        self.check(test_impl1, D)
        self.check(test_impl2, A)
        self.check(test_impl2, B)
        self.check(test_impl2, C)
        self.check(test_impl2, D)

        # checks that 0d array input raises
        msg = ("zero-size array to reduction operation "
               "maximum which has no identity")
        for impl in (test_impl1, test_impl2):
            pcfunc = self.compile_parallel(impl, (types.int64[:],))
            with self.assertRaises(ValueError) as e:
                pcfunc.entry_point(np.array([], dtype=np.int64))
            self.assertIn(msg, str(e.exception))

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl1, data_gen)
        self.count_parfors_variants(test_impl1, data_gen)
        self.check_variants(test_impl2, data_gen)
        self.count_parfors_variants(test_impl2, data_gen)

    def test_argmax(self):
        def test_impl1(A):
            return A.argmax()

        def test_impl2(A):
            return np.argmax(A)

        n = 211
        A = np.array([1., 0., 3., 2., 3.])
        B = np.random.randint(10, size=n).astype(np.int32)
        C = np.random.ranf((n, n))  # test multi-dimensional array
        D = np.array([1., 0., np.nan, 2., 3.])
        self.check(test_impl1, A)
        self.check(test_impl1, B)
        self.check(test_impl1, C)
        self.check(test_impl1, D)
        self.check(test_impl2, A)
        self.check(test_impl2, B)
        self.check(test_impl2, C)
        self.check(test_impl2, D)

        # checks that 0d array input raises
        msg = 'attempt to get argmax of an empty sequence'
        for impl in (test_impl1, test_impl2):
            pcfunc = self.compile_parallel(impl, (types.int64[:],))
            with self.assertRaises(ValueError) as e:
                pcfunc.entry_point(np.array([], dtype=np.int64))
            self.assertIn(msg, str(e.exception))

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl1, data_gen)
        self.count_parfors_variants(test_impl1, data_gen)
        self.check_variants(test_impl2, data_gen)
        self.count_parfors_variants(test_impl2, data_gen)

    def test_argmin(self):
        def test_impl1(A):
            return A.argmin()

        def test_impl2(A):
            return np.argmin(A)

        n = 211
        A = np.array([1., 0., 2., 0., 3.])
        B = np.random.randint(10, size=n).astype(np.int32)
        C = np.random.ranf((n, n))  # test multi-dimensional array
        D = np.array([1., 0., np.nan, 0., 3.])
        self.check(test_impl1, A)
        self.check(test_impl1, B)
        self.check(test_impl1, C)
        self.check(test_impl1, D)
        self.check(test_impl2, A)
        self.check(test_impl2, B)
        self.check(test_impl2, C)
        self.check(test_impl2, D)

        # checks that 0d array input raises
        msg = 'attempt to get argmin of an empty sequence'
        for impl in (test_impl1, test_impl2):
            pcfunc = self.compile_parallel(impl, (types.int64[:],))
            with self.assertRaises(ValueError) as e:
                pcfunc.entry_point(np.array([], dtype=np.int64))
            self.assertIn(msg, str(e.exception))

        # Test variants
        data_gen = lambda: self.gen_linspace_variants(1)
        self.check_variants(test_impl1, data_gen)
        self.count_parfors_variants(test_impl1, data_gen)
        self.check_variants(test_impl2, data_gen)
        self.count_parfors_variants(test_impl2, data_gen)

    def test_ndarray_fill(self):
        def test_impl(x):
            x.fill(7.0)
            return x
        x = np.zeros(10)
        self.check(test_impl, x)
        argty = (types.Array(types.float64, 1, 'C'),)
        self.assertEqual(countParfors(test_impl, argty), 1)

    def test_ndarray_fill2d(self):
        def test_impl(x):
            x.fill(7.0)
            return x
        x = np.zeros((2,2))
        self.check(test_impl, x)
        argty = (types.Array(types.float64, 2, 'C'),)
        self.assertEqual(countParfors(test_impl, argty), 1)

    def test_reshape_with_neg_one(self):
        # issue3314
        def test_impl(a, b):
            result_matrix = np.zeros((b, b, 1), dtype=np.float64)
            sub_a = a[0:b]
            a = sub_a.size
            b = a / 1
            z = sub_a.reshape(-1, 1)
            result_data = sub_a / z
            result_matrix[:,:,0] = result_data
            return result_matrix

        a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
                   7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
        b = 3

        self.check(test_impl, a, b)

    def test_reshape_with_large_neg(self):
        # issue3314
        def test_impl(a, b):
            result_matrix = np.zeros((b, b, 1), dtype=np.float64)
            sub_a = a[0:b]
            a = sub_a.size
            b = a / 1
            z = sub_a.reshape(-1307, 1)
            result_data = sub_a / z
            result_matrix[:,:,0] = result_data
            return result_matrix

        a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
                   7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
        b = 3

        self.check(test_impl, a, b)

    def test_reshape_with_too_many_neg_one(self):
        # issue3314
        with self.assertRaises(errors.UnsupportedRewriteError) as raised:
            @njit(parallel=True)
            def test_impl(a, b):
                rm = np.zeros((b, b, 1), dtype=np.float64)
                sub_a = a[0:b]
                a = sub_a.size
                b = a / 1
                z = sub_a.reshape(-1, -1)
                result_data = sub_a / z
                rm[:,:,0] = result_data
                return rm

            a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
                       7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
            b = 3
            test_impl(a, b)

        msg = ("The reshape API may only include one negative argument.")
        self.assertIn(msg, str(raised.exception))

    def test_0d_array(self):
        def test_impl(n):
            return np.sum(n) + np.prod(n) + np.min(n) + np.max(n) + np.var(n)
        self.check(test_impl, np.array(7), check_scheduling=False)

    def test_real_imag_attr(self):
        # See issue 8012
        def test_impl(z):
            return np.sum(z.real ** 2 + z.imag ** 2)

        z = np.arange(5) * (1 + 1j)
        self.check(test_impl, z)
        self.assertEqual(countParfors(test_impl, (types.complex128[::1],)), 1)


class TestParforsUnsupported(TestCase):
    """Tests for unsupported use of parfors"""
    @unittest.skipIf(not _32bit, "Only impacts 32 bit hardware")
    @needs_blas
    def test_unsupported_combination_raises(self):
        """
        This test is in place until issues with the 'parallel'
        target on 32 bit hardware are fixed.
        """
        with self.assertRaises(errors.UnsupportedParforsError) as raised:
            @njit(parallel=True)
            def ddot(a, v):
                return np.dot(a, v)

            A = np.linspace(0, 1, 20).reshape(2, 10)
            v = np.linspace(2, 1, 10)
            ddot(A, v)

        msg = ("The 'parallel' target is not currently supported on 32 bit "
               "hardware")
        self.assertIn(msg, str(raised.exception))

@skip_parfors_unsupported
class TestParfors(TestParforsBase):
    """ Tests cpython, reduction and various parfors features"""

    def test_arraymap(self):
        def test_impl(a, x, y):
            return a * x + y

        self.check_variants(test_impl, lambda: self.gen_linspace_variants(3))

    def test_0d_broadcast(self):
        def test_impl():
            X = np.array(1)
            Y = np.ones((10, 12))
            return np.sum(X + Y)
        self.check(test_impl)
        self.assertEqual(countParfors(test_impl, ()), 1)

    def test_2d_parfor(self):
        def test_impl():
            X = np.ones((10, 12))
            Y = np.zeros((10, 12))
            return np.sum(X + Y)
        self.check(test_impl)
        self.assertEqual(countParfors(test_impl, ()), 1)

    def test_nd_parfor(self):
        def case1():
            X = np.ones((10, 12))
            Y = np.zeros((10, 12))
            yield (X, Y)

        data_gen = lambda: chain(case1(), self.gen_linspace_variants(2))

        def test_impl(X, Y):
            return np.sum(X + Y)

        self.check_variants(test_impl, data_gen)
        self.count_parfors_variants(test_impl, data_gen)

    def test_np_func_direct_import(self):
        from numpy import ones  # import here becomes FreeVar
        def test_impl(n):
            A = ones(n)
            return A[0]
        n = 111
        self.check(test_impl, n)

    def test_size_assertion(self):
        def test_impl(m, n):
            A = np.ones(m)
            B = np.ones(n)
            return np.sum(A + B)

        self.check(test_impl, 10, 10)
        with self.assertRaises(AssertionError) as raises:
            cfunc = njit(parallel=True)(test_impl)
            cfunc(10, 9)
        msg = "Sizes of A, B do not match"
        self.assertIn(msg, str(raises.exception))

    def test_cfg(self):
        # from issue #2477
        def test_impl(x, is_positive, N):
            for i in numba.prange(2):
                for j in range( i*N//2, (i+1)*N//2 ):
                    is_positive[j] = 0
                    if x[j] > 0:
                        is_positive[j] = 1

            return is_positive

        N = 100
        x = np.random.rand(N)
        is_positive = np.zeros(N)
        self.check(test_impl, x, is_positive, N)

    def test_reduce(self):
        def test_impl(A):
            init_val = 10
            return reduce(lambda a,b: min(a, b), A, init_val)

        n = 211
        A = np.random.ranf(n)
        self.check(test_impl, A)
        A = np.random.randint(10, size=n).astype(np.int32)
        self.check(test_impl, A)

        # test checking the number of arguments for the reduce function
        def test_impl():
            g = lambda x: x ** 2
            return reduce(g, np.array([1, 2, 3, 4, 5]), 2)
        with self.assertTypingError():
            self.check(test_impl)

        # test checking reduction over bitarray masked arrays
        n = 160
        A = np.random.randint(10, size=n).astype(np.int32)
        def test_impl(A):
            return np.sum(A[A>=3])
        self.check(test_impl, A)
        # TODO: this should fuse
        # self.assertTrue(countParfors(test_impl, (numba.float64[:],)) == 1)

        def test_impl(A):
            B = A[:,0]
            return np.sum(A[B>=3,1])
        self.check(test_impl, A.reshape((16,10)))
        # TODO: this should also fuse
        #self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 1)

        def test_impl(A):
            B = A[:,0]
            return np.sum(A[B>=3,1:2])
        self.check(test_impl, A.reshape((16,10)))
        # this doesn't fuse due to mixed indices
        self.assertEqual(countParfors(test_impl, (numba.float64[:,:],)), 2)

        def test_impl(A):
            min_val = np.amin(A)
            return A - min_val
        self.check(test_impl, A)
        # this doesn't fuse due to use of reduction variable
        self.assertEqual(countParfors(test_impl, (numba.float64[:],)), 2)

    def test_use_of_reduction_var1(self):
        def test_impl():
            acc = 0
            for i in prange(1):
                acc = cmath.sqrt(acc)
            return acc

        # checks that invalid use of reduction variable is detected
        msg = ("Use of reduction variable acc in an unsupported reduction function.")
        with self.assertRaises(ValueError) as e:
            pcfunc = self.compile_parallel(test_impl, ())
        self.assertIn(msg, str(e.exception))

    def test_unsupported_floordiv1(self):
        def test_impl():
            acc = 100
            for i in prange(2):
                acc //= 2
            return acc

        # checks that invalid use of ifloordiv reduction operator is detected
        msg = ("Parallel floordiv reductions are not supported. "
               "If all divisors are integers then a floordiv "
               "reduction can in some cases be parallelized as "
               "a multiply reduction followed by a floordiv of "
               "the resulting product.")
        with self.assertRaises(errors.NumbaValueError) as e:
            pcfunc = self.compile_parallel(test_impl, ())
        self.assertIn(msg, str(e.exception))

    def test_unsupported_xor1(self):
        def test_impl():
            acc = 100
            for i in prange(2):
                acc ^= i + 2
            return acc

        msg = ("Use of reduction variable acc in an unsupported reduction function.")
        with self.assertRaises(ValueError) as e:
            pcfunc = self.compile_parallel(test_impl, ())
        self.assertIn(msg, str(e.exception))

    def test_parfor_array_access1(self):
        # signed index of the prange generated by sum() should be replaced
        # resulting in array A to be eliminated (see issue #2846)
        def test_impl(n):
            A = np.ones(n)
            return A.sum()

        n = 211
        self.check(test_impl, n)
        self.assertEqual(countArrays(test_impl, (types.intp,)), 0)

    def test_parfor_array_access2(self):
        # in this test, the prange index has the same name (i) in two loops
        # thus, i has multiple definitions and is harder to replace
        def test_impl(n):
            A = np.ones(n)
            m = 0
            n = 0
            for i in numba.prange(len(A)):
                m += A[i]

            for i in numba.prange(len(A)):
                if m == n:  # access in another block
                    n += A[i]

            return m + n

        n = 211
        self.check(test_impl, n)
        self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0)

    def test_parfor_array_access3(self):
        def test_impl(n):
            A = np.ones(n, np.int64)
            m = 0
            for i in numba.prange(len(A)):
                m += A[i]
                if m==2:
                    i = m

        n = 211
        with self.assertRaises(errors.UnsupportedRewriteError) as raises:
            self.check(test_impl, n)
        self.assertIn("Overwrite of parallel loop index", str(raises.exception))

    @needs_blas
    def test_parfor_array_access4(self):
        # in this test, one index of a multi-dim access should be replaced
        # np.dot parallel implementation produces this case
        def test_impl(A, b):
            return np.dot(A, b)

        n = 211
        d = 4
        A = np.random.ranf((n, d))
        b = np.random.ranf(d)
        self.check(test_impl, A, b)
        # make sure the parfor index is replaced in build_tuple of access to A
        test_ir, tp = get_optimized_numba_ir(
            test_impl, (types.Array(types.float64, 2, 'C'),
                        types.Array(types.float64, 1, 'C')))
        # this code should have one basic block after optimization
        self.assertTrue(len(test_ir.blocks) == 1 and 0 in test_ir.blocks)
        block = test_ir.blocks[0]
        parfor_found = False
        parfor = None
        for stmt in block.body:
            if isinstance(stmt, numba.parfors.parfor.Parfor):
                parfor_found = True
                parfor = stmt

        self.assertTrue(parfor_found)
        build_tuple_found = False
        # there should be only one build_tuple
        for bl in parfor.loop_body.values():
            for stmt in bl.body:
                if (isinstance(stmt, ir.Assign)
                        and isinstance(stmt.value, ir.Expr)
                        and stmt.value.op == 'build_tuple'):
                    build_tuple_found = True
                    self.assertTrue(parfor.index_var in stmt.value.items)

        self.assertTrue(build_tuple_found)

    def test_parfor_dtype_type(self):
        # test array type replacement creates proper type
        def test_impl(a):
            for i in numba.prange(len(a)):
                a[i] = a.dtype.type(0)
            return a[4]

        a = np.ones(10)
        self.check(test_impl, a)

    def test_parfor_array_access5(self):
        # one dim is slice in multi-dim access
        def test_impl(n):
            X = np.ones((n, 3))
            y = 0
            for i in numba.prange(n):
                y += X[i,:].sum()
            return y

        n = 211
        self.check(test_impl, n)
        self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0)

    @disabled_test # Test itself is problematic, see #3155
    def test_parfor_hoist_setitem(self):
        # Make sure that read of out is not hoisted.
        def test_impl(out):
            for i in prange(10):
                out[0] = 2 * out[0]
            return out[0]

        out = np.ones(1)
        self.check(test_impl, out)

    @needs_blas
    def test_parfor_generate_fuse(self):
        # issue #2857
        def test_impl(N, D):
            w = np.ones(D)
            X = np.ones((N, D))
            Y = np.ones(N)
            for i in range(3):
                B = (-Y * np.dot(X, w))

            return B

        n = 211
        d = 3
        self.check(test_impl, n, d)
        self.assertEqual(countArrayAllocs(test_impl, (types.intp, types.intp)), 4)
        self.assertEqual(countParfors(test_impl, (types.intp, types.intp)), 4)

    def test_ufunc_expr(self):
        # issue #2885
        def test_impl(A, B):
            return np.bitwise_and(A, B)

        A = np.ones(3, np.uint8)
        B = np.ones(3, np.uint8)
        B[1] = 0
        self.check(test_impl, A, B)

    def test_find_callname_intrinsic(self):
        def test_impl(n):
            A = unsafe_empty((n,))
            for i in range(n):
                A[i] = i + 2.0
            return A

        # the unsafe allocation should be found even though it is imported
        # as a different name
        self.assertEqual(countArrayAllocs(test_impl, (types.intp,)), 1)

    def test_reduction_var_reuse(self):
        # issue #3139
        def test_impl(n):
            acc = 0
            for i in prange(n):
                acc += 1

            for i in prange(n):
                acc += 2

            return acc
        self.check(test_impl, 16)

    def test_non_identity_initial(self):
        # issue #7344
        def test_impl(A, cond):
            s = 1
            for i in prange(A.shape[0]):
                if cond[i]:
                    s += 1
            return s
        self.check(test_impl, np.ones(10), np.ones(10).astype('bool'))

    def test_if_not_else_reduction(self):
        # issue #7344
        def test_impl(A, cond):
            s = 1
            t = 10
            for i in prange(A.shape[0]):
                if cond[i]:
                    s += 1
                    t += 1
                else:
                    s += 2
            return s + t
        self.check(test_impl, np.ones(10), np.ones(10).astype('bool'))

    def test_two_d_array_reduction_reuse(self):
        def test_impl(n):
            shp = (13, 17)
            size = shp[0] * shp[1]
            result1 = np.zeros(shp, np.int_)
            tmp = np.arange(size).reshape(shp)

            for i in numba.prange(n):
                result1 += tmp

            for i in numba.prange(n):
                result1 += tmp

            return result1

        self.check(test_impl, 100)

    def test_one_d_array_reduction(self):
        def test_impl(n):
            result = np.zeros(1, np.int_)

            for i in numba.prange(n):
                result += np.array([i], np.int_)

            return result

        self.check(test_impl, 100)

    def test_two_d_array_reduction(self):
        def test_impl(n):
            shp = (13, 17)
            size = shp[0] * shp[1]
            result1 = np.zeros(shp, np.int_)
            tmp = np.arange(size).reshape(shp)

            for i in numba.prange(n):
                result1 += tmp

            return result1

        self.check(test_impl, 100)

    def test_two_d_array_reduction_with_float_sizes(self):
        # result1 is float32 and tmp is float64.
        # Tests reduction with differing dtypes.
        def test_impl(n):
            shp = (2, 3)
            result1 = np.zeros(shp, np.float32)
            tmp = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(shp)

            for i in numba.prange(n):
                result1 += tmp

            return result1

        self.check(test_impl, 100)

    def test_two_d_array_reduction_prod(self):
        def test_impl(n):
            shp = (13, 17)
            result1 = 2 * np.ones(shp, np.int_)
            tmp = 2 * np.ones_like(result1)

            for i in numba.prange(n):
                result1 *= tmp

            return result1

        self.check(test_impl, 100)

    def test_three_d_array_reduction(self):
        def test_impl(n):
            shp = (3, 2, 7)
            result1 = np.zeros(shp, np.int_)

            for i in numba.prange(n):
                result1 += np.ones(shp, np.int_)

            return result1

        self.check(test_impl, 100)

    def test_preparfor_canonicalize_kws(self):
        # test canonicalize_array_math typing for calls with kw args
        def test_impl(A):
            return A.argsort() + 1

        n = 211
        A = np.arange(n)
        self.check(test_impl, A)

    def test_preparfor_datetime64(self):
        # test array.dtype transformation for datetime64
        def test_impl(A):
            return A.dtype

        A = np.empty(1, np.dtype('datetime64[ns]'))
        cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),))
        self.assertEqual(cpfunc.entry_point(A), test_impl(A))

    def test_no_hoisting_with_member_function_call(self):
        def test_impl(X):
            n = X.shape[0]
            acc = 0
            for i in prange(n):
                R = {1, 2, 3}
                R.add(i)
                tmp = 0
                for x in R:
                    tmp += x
                acc += tmp
            return acc

        self.check(test_impl, np.random.ranf(128))

    def test_array_compare_scalar(self):
        """ issue3671: X != 0 becomes an arrayexpr with operator.ne.
            That is turned into a parfor by devectorizing.  Make sure
            the return type of the devectorized operator.ne
            on integer types works properly.
        """
        def test_impl():
            X = np.zeros(10, dtype=np.int_)
            return X != 0

        self.check(test_impl)

    def test_array_analysis_optional_def(self):
        def test_impl(x, half):
            size = len(x)
            parr = x[0:size]

            if half:
                parr = x[0:size//2]

            return parr.sum()
        x = np.ones(20)
        self.check(test_impl, x, True, check_scheduling=False)

    def test_prange_side_effects(self):
        def test_impl(a, b):
            data = np.empty(len(a), dtype=np.float64)
            size = len(data)
            for i in numba.prange(size):
                data[i] = a[i]
            for i in numba.prange(size):
                data[i] = data[i] + b[i]
            return data

        x = np.arange(10 ** 2, dtype=float)
        y = np.arange(10 ** 2, dtype=float)

        self.check(test_impl, x, y)
        self.assertEqual(countParfors(test_impl,
                                    (types.Array(types.float64, 1, 'C'),
                                     types.Array(types.float64, 1, 'C'))), 1)

    def test_tuple1(self):
        def test_impl(a):
            atup = (3, 4)
            b = 7
            for i in numba.prange(len(a)):
                a[i] += atup[0] + atup[1] + b
            return a

        x = np.arange(10)
        self.check(test_impl, x)

    def test_tuple2(self):
        def test_impl(a):
            atup = a.shape
            b = 7
            for i in numba.prange(len(a)):
                a[i] += atup[0] + b
            return a

        x = np.arange(10)
        self.check(test_impl, x)

    def test_tuple3(self):
        def test_impl(a):
            atup = (np.arange(10), 4)
            b = 7
            for i in numba.prange(len(a)):
                a[i] += atup[0][5] + atup[1] + b
            return a

        x = np.arange(10)
        self.check(test_impl, x)

    def test_namedtuple1(self):
        def test_impl(a):
            antup = TestNamedTuple(part0=3, part1=4)
            b = 7
            for i in numba.prange(len(a)):
                a[i] += antup.part0 + antup.part1 + b
            return a

        x = np.arange(10)
        self.check(test_impl, x)

    def test_namedtuple2(self):
        TestNamedTuple2 = namedtuple('TestNamedTuple2', ('part0', 'part1'))
        def test_impl(a):
            antup = TestNamedTuple2(part0=3, part1=4)
            b = 7
            for i in numba.prange(len(a)):
                a[i] += antup.part0 + antup.part1 + b
            return a

        x = np.arange(10)
        self.check(test_impl, x)

    def test_namedtuple3(self):
        # issue5872: test that a.y[:] = 5 is not removed as
        # deadcode.
        TestNamedTuple3 = namedtuple(f'TestNamedTuple3',['y'])

        def test_impl(a):
            a.y[:] = 5

        def comparer(a, b):
            np.testing.assert_almost_equal(a.y, b.y)

        x = TestNamedTuple3(y=np.zeros(10))
        self.check(test_impl, x, check_arg_equality=[comparer])

    def test_inplace_binop(self):
        def test_impl(a, b):
            b += a
            return b

        X = np.arange(10) + 10
        Y = np.arange(10) + 100
        self.check(test_impl, X, Y)
        self.assertEqual(countParfors(test_impl,
                                    (types.Array(types.float64, 1, 'C'),
                                     types.Array(types.float64, 1, 'C'))), 1)

    def test_tuple_concat(self):
        # issue5383
        def test_impl(a):
            n = len(a)
            array_shape = n, n
            indices = np.zeros(((1,) + array_shape + (1,)), dtype=np.uint64)
            k_list = indices[0, :]

            for i, g in enumerate(a):
                k_list[i, i] = i
            return k_list

        x = np.array([1, 1])
        self.check(test_impl, x)

    def test_tuple_concat_with_reverse_slice(self):
        # issue5383
        def test_impl(a):
            n = len(a)
            array_shape = n, n
            indices = np.zeros(((1,) + array_shape + (1,))[:-1],
                               dtype=np.uint64)
            k_list = indices[0, :]

            for i, g in enumerate(a):
                k_list[i, i] = i
            return k_list

        x = np.array([1, 1])
        self.check(test_impl, x)

    def test_array_tuple_concat(self):
        # issue6399
        def test_impl(a):
            S = (a,) + (a, a)
            return S[0].sum()

        x = np.ones((3,3))
        self.check(test_impl, x)

    def test_high_dimension1(self):
        # issue6749
        def test_impl(x):
            return x * 5.0
        x = np.ones((2, 2, 2, 2, 2, 15))
        self.check(test_impl, x)

    def test_tuple_arg(self):
        def test_impl(x, sz):
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        sz = (10, 5)
        self.check(test_impl, np.empty(sz), sz)

    def test_tuple_arg_not_whole_array(self):
        def test_impl(x, sz):
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        sz = (10, 5)
        self.check(test_impl, np.zeros(sz), (10, 3))

    def test_tuple_for_pndindex(self):
        def test_impl(x):
            sz = (10, 5)
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        sz = (10, 5)
        self.check(test_impl, np.zeros(sz))

    def test_tuple_arg_literal(self):
        def test_impl(x, first):
            sz = (first, 5)
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        sz = (10, 5)
        self.check(test_impl, np.zeros(sz), 10)

    def test_tuple_of_literal_nonliteral(self):
        # This test has to be done manually as the self.check uses
        # compile_isolated and one function cannot "see" the other

        def test_impl(x, sz):
            for i in numba.pndindex(sz):
                x[i] = 1
            return x

        def call(x, fn):
            return fn(x, (10, 3)) # Only want to iterate to the 3rd

        get_input = lambda: np.zeros((10, 10))
        expected = call(get_input(), test_impl)

        def check(dec):
            f1 = dec(test_impl)
            f2 = njit(call) # no parallel semantics in the caller
            got = f2(get_input(), f1)
            self.assertPreciseEqual(expected, got)

        for d in (njit, njit(parallel=True)):
            check(d)

    def test_tuple_arg_1d(self):
        def test_impl(x, sz):
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        sz = (10,)
        self.check(test_impl, np.zeros(sz), sz)

    def test_tuple_arg_1d_literal(self):
        def test_impl(x):
            sz = (10,)
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        sz = (10,)
        self.check(test_impl, np.zeros(sz))

    def test_int_arg_pndindex(self):
        def test_impl(x, sz):
            for i in numba.pndindex(sz):
                x[i] = 1
            return x
        self.check(test_impl, np.zeros((10, 10)), 3)

    def test_prange_unknown_call1(self):
        @register_jitable
        def issue7854_proc(u, i, even, size):
            for j in range((even + i + 1) % 2 + 1, size - 1, 2):
                u[i, j] = u[i + 1, j] + 1

        # issue7854
        # Forbid fusion in unanalyzable call inside prange.
        def test_impl(u, size):
            for i in numba.prange(1, size - 1):
                issue7854_proc(u, i, 0, size)
            for i in numba.prange(1, size - 1):
                issue7854_proc(u, i, 1, size)
            return u

        size = 4
        u = np.zeros((size, size))
        cptypes = (numba.float64[:, ::1], types.int64)
        self.assertEqual(countParfors(test_impl, cptypes), 2)
        self.check(test_impl, u, size)

    def test_prange_index_calc1(self):
        # Should forbid fusion due to cross-iteration dependency as
        # detected by loop index calcuation (i+1) as array index.
        def test_impl(u, size):
            for i in numba.prange(1, size - 1):
                for j in range((i + 1) % 2 + 1, size - 1, 2):
                    u[i, j] = u[i + 1, j] + 1
            for i in numba.prange(1, size - 1):
                for j in range(i % 2 + 1, size - 1, 2):
                    u[i, j] = u[i + 1, j] + 1
            return u

        size = 4
        u = np.zeros((size, size))
        cptypes = (numba.float64[:, ::1], types.int64)
        self.assertEqual(countParfors(test_impl, cptypes), 2)
        self.check(test_impl, u, size)

    def test_prange_reverse_order1(self):
        # Testing if reversed loop index usage as array index
        # prevents fusion.
        def test_impl(a, b, size):
            for i in numba.prange(size):
                for j in range(size):
                    a[i, j] = b[i, j] + 1
            for i in numba.prange(size):
                for j in range(size):
                    b[j, i] = 3
            return a[0, 0] + b[0, 0]

        size = 10
        a = np.zeros((size, size))
        b = np.zeros((size, size))
        cptypes = (numba.float64[:, ::1], numba.float64[:, ::1], types.int64)
        self.assertEqual(countParfors(test_impl, cptypes), 2)
        self.check(test_impl, a, b, size)

    def test_prange_parfor_index_then_not(self):
        # Testing if accessing an array first with a parfor index then
        # without will prevent fusion.
        def test_impl(a, size):
            b = 0
            for i in numba.prange(size):
                a[i] = i
            for i in numba.prange(size):
                b += a[5]
            return b

        size = 10
        a = np.zeros(size)
        cptypes = (numba.float64[:], types.int64)
        self.assertEqual(countParfors(test_impl, cptypes), 2)
        self.check(test_impl, a, size)

    def test_prange_parfor_index_const_tuple_fusion(self):
        # Testing if accessing a tuple with prange index
        # and later with a constant will not prevent fusion.
        def test_impl(a, tup, size):
            acc = 0
            for i in numba.prange(size):
                a[i] = i + tup[i]
            for i in numba.prange(size):
                acc += a[i] + tup[1]
            return acc

        size = 10
        a = np.zeros(size)
        b = tuple(a)
        cptypes = (numba.float64[:],
                   types.containers.UniTuple(types.float64, size),
                   types.intp)
        self.assertEqual(countParfors(test_impl, cptypes), 1)
        self.check(test_impl, a, b, size)

    def test_prange_non_parfor_index_then_opposite(self):
        # Testing if accessing an array first without a parfor index then
        # with will prevent fusion.
        def test_impl(a, b, size):
            for i in numba.prange(size):
                b[i] = a[5]
            for i in numba.prange(size):
                a[i] = i
            # Need this to stop previous prange from being optimized away.
            b[0] += a[0]
            return b

        size = 10
        a = np.zeros(size)
        b = np.zeros(size)
        cptypes = (numba.float64[:], numba.float64[:], types.int64)
        self.assertEqual(countParfors(test_impl, cptypes), 2)
        self.check(test_impl, a, b, size)

    def test_prange_optional(self):
        def test_impl(arr, pred=None):
            for i in prange(1):
                if pred is not None:
                    arr[i] = 0.0

        arr = np.ones(10)
        self.check(test_impl, arr, None,
                   check_arg_equality=[np.testing.assert_almost_equal,
                                       lambda x, y: x == y])
        self.assertEqual(arr.sum(), 10.0)

    def test_untraced_value_tuple(self):
        # This is a test for issue #6478.
        def test_impl():
            a = (1.2, 1.3)
            return a[0]

        with self.assertRaises(AssertionError) as raises:
            self.check(test_impl)
        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))

    def test_recursive_untraced_value_tuple(self):
        # This is a test for issue #6478.
        def test_impl():
            a = ((1.2, 1.3),)
            return a[0][0]

        with self.assertRaises(AssertionError) as raises:
            self.check(test_impl)
        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))

    def test_untraced_value_parfor(self):
        # This is a test for issue #6478.
        def test_impl(arr):
            a = (1.2, 1.3)
            n1 = len(arr)
            arr2 = np.empty(n1, np.float64)
            for i in prange(n1):
                arr2[i] = arr[i] * a[0]
            n2 = len(arr2)
            arr3 = np.empty(n2, np.float64)
            for j in prange(n2):
                arr3[j] = arr2[j] - a[1]
            total = 0.0
            n3 = len(arr3)
            for k in prange(n3):
                total += arr3[k]
            return total + a[0]

        arg = (types.Array(types.int64, 1, 'C'), )
        self.assertEqual(countParfors(test_impl, arg), 1)

        arr = np.arange(10, dtype=np.int64)
        self.check(test_impl, arr)

    def test_setitem_2d_one_replaced(self):
        # issue7843
        def test_impl(x):
            count = 0
            for n in range(x.shape[0]):
                # Useless "if" necessary to trigger bug.
                if n:
                    n
                x[count, :] = 1
                count += 1
            return x

        self.check(test_impl, np.zeros((3, 1)))

    def test_1array_control_flow(self):
        # issue8146
        def test_impl(arr, flag1, flag2):
            inv = np.arange(arr.size)
            if flag1:
                return inv.astype(np.float64)
            if flag2:
                ret = inv[inv]
            else:
                ret = inv[inv - 1]
            return ret / arr.size

        arr = np.arange(100)
        self.check(test_impl, arr, True, False)
        self.check(test_impl, arr, True, True)
        self.check(test_impl, arr, False, False)

    def test_2array_1_control_flow(self):
        # issue8146
        def test_impl(arr, l, flag):
            inv1 = np.arange(arr.size)
            inv2 = np.arange(l, arr.size + l)
            if flag:
                ret = inv1[inv1]
            else:
                ret = inv1[inv1 - 1]
            return ret / inv2

        arr = np.arange(100)
        self.check(test_impl, arr, 10, True)
        self.check(test_impl, arr, 10, False)

    def test_2array_2_control_flow(self):
        # issue8146
        def test_impl(arr, l, flag):
            inv1 = np.arange(arr.size)
            inv2 = np.arange(l, arr.size + l)
            if flag:
                ret1 = inv1[inv1]
                ret2 = inv2[inv1]
            else:
                ret1 = inv1[inv1 - 1]
                ret2 = inv2[inv1 - 1]
            return ret1 / ret2

        arr = np.arange(100)
        self.check(test_impl, arr, 10, True)
        self.check(test_impl, arr, 10, False)

    def test_issue8515(self):
        # issue8515: an array is filled in the first prange and
        # then accessed with c[i - 1] in the next prange which
        # should prevent fusion with the previous prange.
        def test_impl(n):
            r = np.zeros(n, dtype=np.intp)
            c = np.zeros(n, dtype=np.intp)
            for i in prange(n):
                for j in range(i):
                    c[i] += 1

            for i in prange(n):
                if i == 0:
                    continue
                r[i] = c[i] - c[i - 1]
            return r[1:]

        self.check(test_impl, 15)
        self.assertEqual(countParfors(test_impl, (types.int64, )), 2)

    def test_fusion_no_side_effects(self):
        def test_impl(a, b):
            X = np.ones(100)
            b = math.ceil(b)
            Y = np.ones(100)
            c = int(max(a, b))
            return X + Y + c
        self.check(test_impl, 3.7, 4.3)
        self.assertEqual(countParfors(test_impl, (types.float64, types.float64)), 1)


@skip_parfors_unsupported
class TestParforsLeaks(MemoryLeakMixin, TestParforsBase):
    def check(self, pyfunc, *args, **kwargs):
        cfunc, cpfunc = self.compile_all(pyfunc, *args)
        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)

    def test_reduction(self):
        # issue4299
        @njit(parallel=True)
        def test_impl(arr):
            return arr.sum()

        arr = np.arange(10).astype(np.float64)
        self.check(test_impl, arr)

    def test_multiple_reduction_vars(self):
        @njit(parallel=True)
        def test_impl(arr):
            a = 0.
            b = 1.
            for i in prange(arr.size):
                a += arr[i]
                b += 1. / (arr[i] + 1)
            return a * b
        arr = np.arange(10).astype(np.float64)
        self.check(test_impl, arr)


@skip_parfors_unsupported
class TestParforsSlice(TestParforsBase):

    def test_parfor_slice1(self):
        def test_impl(a):
            (n,) = a.shape
            b = a[0:n-2] + a[1:n-1]
            return b

        self.check(test_impl, np.ones(10))

    def test_parfor_slice2(self):
        def test_impl(a, m):
            (n,) = a.shape
            b = a[0:n-2] + a[1:m]
            return b

        # runtime assertion should succeed
        self.check(test_impl, np.ones(10), 9)
        # next we expect failure
        with self.assertRaises(AssertionError) as raises:
            njit(parallel=True)(test_impl)(np.ones(10),10)
        self.assertIn("do not match", str(raises.exception))

    def test_parfor_slice3(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a[0:m-1,0:n-1] + a[1:m,1:n]
            return b

        self.check(test_impl, np.ones((4,3)))

    def test_parfor_slice4(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a[:,0:n-1] + a[:,1:n]
            return b

        self.check(test_impl, np.ones((4,3)))

    def test_parfor_slice5(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a[0:m-1,:] + a[1:m,:]
            return b

        self.check(test_impl, np.ones((4,3)))

    def test_parfor_slice6(self):
        def test_impl(a):
            b = a.transpose()
            c = a[1,:] + b[:,1]
            return c

        self.check(test_impl, np.ones((4,3)))

    def test_parfor_slice7(self):
        def test_impl(a):
            b = a.transpose()
            c = a[1,:] + b[1,:]
            return c

        # runtime check should succeed
        self.check(test_impl, np.ones((3,3)))
        # next we expect failure
        with self.assertRaises(AssertionError) as raises:
            njit(parallel=True)(test_impl)(np.ones((3,4)))
        self.assertIn("do not match", str(raises.exception))

    @disabled_test
    def test_parfor_slice8(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.transpose()
            b[1:m,1:n] = a[1:m,1:n]
            return b

        self.check(test_impl, np.arange(9).reshape((3,3)))

    @disabled_test
    def test_parfor_slice9(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.transpose()
            b[1:n,1:m] = a[:,1:m]
            return b

        self.check(test_impl, np.arange(12).reshape((3,4)))

    @disabled_test
    def test_parfor_slice10(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.transpose()
            b[2,1:m] = a[2,1:m]
            return b

        self.check(test_impl, np.arange(9).reshape((3,3)))

    def test_parfor_slice11(self):
        def test_impl(a):
            (m,n,l) = a.shape
            b = a.copy()
            b[:,1,1:l] = a[:,2,1:l]
            return b

        self.check(test_impl, np.arange(27).reshape((3,3,3)))

    def test_parfor_slice12(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.copy()
            b[1,1:-1] = a[0,:-2]
            return b

        self.check(test_impl, np.arange(12).reshape((3,4)))

    def test_parfor_slice13(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.copy()
            c = -1
            b[1,1:c] = a[0,-n:c-1]
            return b

        self.check(test_impl, np.arange(12).reshape((3,4)))

    def test_parfor_slice14(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.copy()
            b[1,:-1] = a[0,-3:4]
            return b

        self.check(test_impl, np.arange(12).reshape((3,4)))

    def test_parfor_slice15(self):
        def test_impl(a):
            (m,n) = a.shape
            b = a.copy()
            b[1,-(n-1):] = a[0,-3:4]
            return b

        self.check(test_impl, np.arange(12).reshape((3,4)))

    @disabled_test
    def test_parfor_slice16(self):
        """ This test is disabled because if n is larger than the array size
            then n and n-1 will both be the end of the array and thus the
            slices will in fact be of different sizes and unable to fuse.
        """
        def test_impl(a, b, n):
            assert(a.shape == b.shape)
            a[1:n] = 10
            b[0:(n-1)] = 10
            return a * b

        self.check(test_impl, np.ones(10), np.zeros(10), 8)
        args = (numba.float64[:], numba.float64[:], numba.int64)
        self.assertEqual(countParfors(test_impl, args), 2)

    def test_parfor_slice17(self):
        def test_impl(m, A):
            B = np.zeros(m)
            n = len(A)
            B[-n:] = A
            return B

        self.check(test_impl, 10, np.ones(10))

    def test_parfor_slice18(self):
        # issue 3534
        def test_impl():
            a = np.zeros(10)
            a[1:8] = np.arange(0, 7)
            y = a[3]
            return y

        self.check(test_impl)

    def test_parfor_slice19(self):
        # issues #3561 and #3554, empty slice binop
        def test_impl(X):
            X[:0] += 1
            return X

        self.check(test_impl, np.ones(10))

    def test_parfor_slice20(self):
        # issue #4075, slice size
        def test_impl():
            a = np.ones(10)
            c = a[1:]
            s = len(c)
            return s

        self.check(test_impl, check_scheduling=False)

    def test_parfor_slice21(self):
        def test_impl(x1, x2):
            x1 = x1.reshape(x1.size, 1)
            x2 = x2.reshape(x2.size, 1)
            return x1 >= x2[:-1, :]

        x1 = np.random.rand(5)
        x2 = np.random.rand(6)
        self.check(test_impl, x1, x2)

    def test_parfor_slice22(self):
        def test_impl(x1, x2):
            b = np.zeros((10,))
            for i in prange(1):
                b += x1[:, x2]
            return b

        x1 = np.zeros((10,7))
        x2 = np.array(4)
        self.check(test_impl, x1, x2)

    def test_parfor_slice23(self):
        # issue #4630
        def test_impl(x):
            x[:0] = 2
            return x

        self.check(test_impl, np.ones(10))

    def test_parfor_slice24(self):
        def test_impl(m, A, n):
            B = np.zeros(m)
            C = B[n:]
            C = A[:len(C)]
            return B

        for i in range(-15, 15):
            self.check(test_impl, 10, np.ones(10), i)

    def test_parfor_slice25(self):
        def test_impl(m, A, n):
            B = np.zeros(m)
            C = B[:n]
            C = A[:len(C)]
            return B

        for i in range(-15, 15):
            self.check(test_impl, 10, np.ones(10), i)

    def test_parfor_slice26(self):
        def test_impl(a):
            (n,) = a.shape
            b = a.copy()
            b[-(n-1):] = a[-3:4]
            return b

        self.check(test_impl, np.arange(4))

    def test_parfor_slice27(self):
        # issue5601: tests array analysis of the slice with
        # n_valid_vals of unknown size.
        def test_impl(a):
            n_valid_vals = 0

            for i in prange(a.shape[0]):
                if a[i] != 0:
                    n_valid_vals += 1

                if n_valid_vals:
                    unused = a[:n_valid_vals]

            return 0

        self.check(test_impl, np.arange(3))

    def test_parfor_array_access_lower_slice(self):
        for ts in [slice(1, 3, None), slice(2, None, None), slice(None, 2, -1),
                   slice(None, None, None), slice(None, None, -2)]:

            def test_impl(n):
                X = np.arange(n * 4).reshape((n, 4))
                y = 0
                for i in numba.prange(n):
                    y += X[i, ts].sum()
                return y

            n = 10
            self.check(test_impl, n)

            X = np.arange(n * 4).reshape((n, 4))

            def test_impl(X):
                y = 0
                for i in numba.prange(X.shape[0]):
                    y += X[i, ts].sum()
                return y

            self.check(test_impl, X)


@skip_parfors_unsupported
class TestParforsOptions(TestParforsBase):

    def test_parfor_options(self):
        def test_impl(a):
            n = a.shape[0]
            b = np.ones(n)
            c = np.array([ i for i in range(n) ])
            b[:n] = a + b * c
            for i in prange(n):
                c[i] = b[i] * a[i]
            return reduce(lambda x,y:x+y, c, 0)

        self.check(test_impl, np.ones(10))
        args = (numba.float64[:],)
        # everything should fuse with default option
        self.assertEqual(countParfors(test_impl, args), 1)
        # with no fusion
        self.assertEqual(countParfors(test_impl, args, fusion=False), 6)
        # with no fusion, comprehension
        self.assertEqual(countParfors(test_impl, args, fusion=False,
                         comprehension=False), 5)
        #with no fusion, comprehension, setitem
        self.assertEqual(countParfors(test_impl, args, fusion=False,
                         comprehension=False, setitem=False), 4)
         # with no fusion, comprehension, prange
        self.assertEqual(countParfors(test_impl, args, fusion=False,
                         comprehension=False, setitem=False, prange=False), 3)
         # with no fusion, comprehension, prange, reduction
        self.assertEqual(countParfors(test_impl, args, fusion=False,
                         comprehension=False, setitem=False, prange=False,
                         reduction=False), 2)
        # with no fusion, comprehension, prange, reduction, numpy
        self.assertEqual(countParfors(test_impl, args, fusion=False,
                         comprehension=False, setitem=False, prange=False,
                         reduction=False, numpy=False), 0)


@skip_parfors_unsupported
class TestParforsBitMask(TestParforsBase):

    def test_parfor_bitmask1(self):
        def test_impl(a, n):
            b = a > n
            a[b] = 0
            return a

        self.check(test_impl, np.arange(10), 5)

    def test_parfor_bitmask2(self):
        def test_impl(a, b):
            a[b] = 0
            return a

        a = np.arange(10)
        b = a > 5
        self.check(test_impl, a, b)

    def test_parfor_bitmask3(self):
        def test_impl(a, b):
            a[b] = a[b]
            return a

        a = np.arange(10)
        b = a > 5
        self.check(test_impl, a, b)

    def test_parfor_bitmask4(self):
        def test_impl(a, b):
            a[b] = (2 * a)[b]
            return a

        a = np.arange(10)
        b = a > 5
        self.check(test_impl, a, b)

    def test_parfor_bitmask5(self):
        def test_impl(a, b):
            a[b] = a[b] * a[b]
            return a

        a = np.arange(10)
        b = a > 5
        self.check(test_impl, a, b)

    def test_parfor_bitmask6(self):
        def test_impl(a, b, c):
            a[b] = c
            return a

        a = np.arange(10)
        b = a > 5
        c = np.zeros(sum(b))

        # expect failure due to lack of parallelism
        with self.assertRaises(AssertionError) as raises:
            self.check(test_impl, a, b, c)
        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))


@skip_parfors_unsupported
class TestParforsMisc(TestParforsBase):
    """
    Tests miscellaneous parts of ParallelAccelerator use.
    """
    def test_no_warn_if_cache_set(self):

        def pyfunc():
            arr = np.ones(100)
            for i in prange(arr.size):
                arr[i] += i
            return arr

        cfunc = njit(parallel=True, cache=True)(pyfunc)

        with warnings.catch_warnings(record=True) as raised_warnings:
            warnings.simplefilter('always')
            warnings.filterwarnings(action="ignore",
                                    module="typeguard")
            # Filter out warnings about TBB interface mismatch
            warnings.filterwarnings(action='ignore',
                                    message=r".*TBB_INTERFACE_VERSION.*",
                                    category=numba.errors.NumbaWarning,
                                    module=r'numba\.np\.ufunc\.parallel.*')
            cfunc()

        self.assertEqual(len(raised_warnings), 0)

        # Make sure the dynamic globals flag is set
        has_dynamic_globals = [cres.library.has_dynamic_globals
                               for cres in cfunc.overloads.values()]
        self.assertEqual(has_dynamic_globals, [False])

    def test_statement_reordering_respects_aliasing(self):
        def impl():
            a = np.zeros(10)
            a[1:8] = np.arange(0, 7)
            print('a[3]:', a[3])
            print('a[3]:', a[3])
            return a

        cres = self.compile_parallel(impl, ())
        with captured_stdout() as stdout:
            cres.entry_point()
        for line in stdout.getvalue().splitlines():
            self.assertEqual('a[3]: 2.0', line)

    def test_parfor_ufunc_typing(self):
        def test_impl(A):
            return np.isinf(A)

        A = np.array([np.inf, 0.0])
        cfunc = njit(parallel=True)(test_impl)
        # save global state
        old_seq_flag = numba.parfors.parfor.sequential_parfor_lowering
        try:
            numba.parfors.parfor.sequential_parfor_lowering = True
            np.testing.assert_array_equal(test_impl(A), cfunc(A))
        finally:
            # recover global state
            numba.parfors.parfor.sequential_parfor_lowering = old_seq_flag

    def test_init_block_dce(self):
        # issue4690
        def test_impl():
            res = 0
            arr = [1,2,3,4,5]
            numba.parfors.parfor.init_prange()
            dummy = arr
            for i in numba.prange(5):
                res += arr[i]
            return res + dummy[2]

        self.assertEqual(get_init_block_size(test_impl, ()), 0)

    def test_alias_analysis_for_parfor1(self):
        def test_impl():
            acc = 0
            for _ in range(4):
                acc += 1

            data = np.zeros((acc,))
            return data

        self.check(test_impl)

    def test_no_state_change_in_gufunc_lowering_on_error(self):
        # tests #5098, if there's an exception arising in gufunc lowering the
        # sequential_parfor_lowering global variable should remain as False on
        # stack unwind.

        BROKEN_MSG = 'BROKEN_MSG'

        @register_pass(mutates_CFG=True, analysis_only=False)
        class BreakParfors(AnalysisPass):
            _name = "break_parfors"

            def __init__(self):
                AnalysisPass.__init__(self)

            def run_pass(self, state):
                for blk in state.func_ir.blocks.values():
                    for stmt in blk.body:
                        if isinstance(stmt, numba.parfors.parfor.Parfor):
                            # races should be a set(), that list is iterable
                            # permits it to get through to the
                            # _create_gufunc_for_parfor_body routine at which
                            # point it needs to be a set so e.g. set.difference
                            # can be computed, this therefore creates an error
                            # in the right location.
                            class Broken(list):

                                def difference(self, other):
                                    raise errors.LoweringError(BROKEN_MSG)

                            stmt.races = Broken()
                    return True


        class BreakParforsCompiler(CompilerBase):

            def define_pipelines(self):
                pm = DefaultPassBuilder.define_nopython_pipeline(self.state)
                pm.add_pass_after(BreakParfors, IRLegalization)
                pm.finalize()
                return [pm]


        @njit(parallel=True, pipeline_class=BreakParforsCompiler)
        def foo():
            x = 1
            for _ in prange(1):
                x += 1
            return x

        # assert default state for global
        self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)

        with self.assertRaises(errors.LoweringError) as raises:
            foo()

        self.assertIn(BROKEN_MSG, str(raises.exception))

        # assert state has not changed
        self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)

    def test_issue_5098(self):
        class DummyType(types.Opaque):
            pass

        dummy_type = DummyType("my_dummy")
        register_model(DummyType)(models.OpaqueModel)

        class Dummy(object):
            pass

        @typeof_impl.register(Dummy)
        def typeof_Dummy(val, c):
            return dummy_type

        @unbox(DummyType)
        def unbox_index(typ, obj, c):
            return NativeValue(c.context.get_dummy_value())

        @overload_method(DummyType, "method1", jit_options={"parallel":True})
        def _get_method1(obj, arr, func):
            def _foo(obj, arr, func):
                def baz(a, f):
                    c = a.copy()
                    c[np.isinf(a)] = np.nan
                    return f(c)

                length = len(arr)
                output_arr = np.empty(length, dtype=np.float64)
                for i in prange(length):
                    output_arr[i] = baz(arr[i], func)
                for i in prange(length - 1):
                    output_arr[i] += baz(arr[i], func)
                return output_arr
            return _foo

        @njit
        def bar(v):
            return v.mean()

        @njit
        def test1(d):
            return d.method1(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), bar)

        save_state = numba.parfors.parfor.sequential_parfor_lowering
        self.assertFalse(save_state)
        try:
            test1(Dummy())
            self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
        finally:
            # always set the sequential_parfor_lowering state back to the
            # original state
            numba.parfors.parfor.sequential_parfor_lowering = save_state

    def test_oversized_tuple_as_arg_to_kernel(self):

        @njit(parallel=True)
        def oversize_tuple(idx):
            big_tup = (1,2,3,4)
            z = 0
            for x in prange(10):
                z += big_tup[idx]
            return z

        with override_env_config('NUMBA_PARFOR_MAX_TUPLE_SIZE', '3'):
            with self.assertRaises(errors.UnsupportedParforsError) as raises:
                oversize_tuple(0)

        errstr = str(raises.exception)
        self.assertIn("Use of a tuple", errstr)
        self.assertIn("in a parallel region", errstr)

    def test_issue5167(self):

        def ndvi_njit(img_nir, img_red):
            fillvalue = 0
            out_img = np.full(img_nir.shape, fillvalue, dtype=img_nir.dtype)
            dims = img_nir.shape
            for y in prange(dims[0]):
                for x in prange(dims[1]):
                    out_img[y, x] = ((img_nir[y, x] - img_red[y, x]) /
                                     (img_nir[y, x] + img_red[y, x]))
            return out_img

        tile_shape = (4, 4)
        array1 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape)
        array2 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape)
        self.check(ndvi_njit, array1, array2)

    def test_issue5065(self):

        def reproducer(a, dist, dist_args):
            result = np.zeros((a.shape[0], a.shape[0]), dtype=np.float32)
            for i in prange(a.shape[0]):
                for j in range(i + 1, a.shape[0]):
                    d = dist(a[i], a[j], *dist_args)
                    result[i, j] = d
                    result[j, i] = d
            return result

        @njit
        def euclidean(x, y):
            result = 0.0
            for i in range(x.shape[0]):
                result += (x[i] - y[i]) ** 2
            return np.sqrt(result)

        a = np.random.random(size=(5, 2))

        got = njit(parallel=True)(reproducer)(a.copy(), euclidean,())
        expected = reproducer(a.copy(), euclidean,())

        np.testing.assert_allclose(got, expected)

    def test_issue5001(self):

        def test_numba_parallel(myarray):
            result = [0] * len(myarray)
            for i in prange(len(myarray)):
                result[i] = len(myarray[i])
            return result

        myarray = (np.empty(100),np.empty(50))
        self.check(test_numba_parallel, myarray)

    def test_issue3169(self):

        @njit
        def foo(grids):
            pass

        @njit(parallel=True)
        def bar(grids):
            for x in prange(1):
                foo(grids)

        # returns nothing, just check it compiles
        bar(([1],) * 2)

    @disabled_test
    def test_issue4846(self):

        mytype = namedtuple("mytype", ("a", "b"))

        def outer(mydata):
            for k in prange(3):
                inner(k, mydata)
            return mydata.a

        @njit(nogil=True)
        def inner(k, mydata):
            f = (k, mydata.a)
            g = (k, mydata.b)

        mydata = mytype(a="a", b="b")

        self.check(outer, mydata)

    def test_issue3748(self):

        def test1b():
            x = (1, 2, 3, 4, 5)
            a = 0
            for i in prange(len(x)):
                a += x[i]
            return a

        self.check(test1b,)

    def test_issue5277(self):

        def parallel_test(size, arr):
            for x in prange(size[0]):
                for y in prange(size[1]):
                    arr[y][x] = x * 4.5 + y
            return arr

        size = (10, 10)
        arr = np.zeros(size, dtype=int)

        self.check(parallel_test, size, arr)

    def test_issue5570_ssa_races(self):
        @njit(parallel=True)
        def foo(src, method, out):
            for i in prange(1):
                for j in range(1):
                    out[i, j] = 1
            if method:
                out += 1
            return out

        src = np.zeros((5,5))
        method = 57
        out = np.zeros((2, 2))

        self.assertPreciseEqual(
            foo(src, method, out),
            foo.py_func(src, method, out)
        )

    def test_issue6095_numpy_max(self):
        @njit(parallel=True)
        def find_maxima_3D_jit(args):
            package = args
            for index in range(0, 10):
                z_stack = package[index, :, :]
            return np.max(z_stack)

        np.random.seed(0)
        args = np.random.random((10, 10, 10))
        self.assertPreciseEqual(
            find_maxima_3D_jit(args),
            find_maxima_3D_jit.py_func(args),
        )

    def test_issue5942_1(self):
        # issue5942: tests statement reordering of
        # aliased arguments.
        def test_impl(gg, gg_next):
            gs = gg.shape
            d = gs[0]
            for i_gg in prange(d):
                gg_next[i_gg, :]  = gg[i_gg, :]
                gg_next[i_gg, 0] += 1

            return gg_next

        d = 4
        k = 2

        gg      = np.zeros((d, k), dtype = np.int32)
        gg_next = np.zeros((d, k), dtype = np.int32)
        self.check(test_impl, gg, gg_next)

    def test_issue5942_2(self):
        # issue5942: tests statement reordering
        def test_impl(d, k):
            gg      = np.zeros((d, k), dtype = np.int32)
            gg_next = np.zeros((d, k), dtype = np.int32)

            for i_gg in prange(d):
                for n in range(k):
                    gg[i_gg, n] = i_gg
                gg_next[i_gg, :]  = gg[i_gg, :]
                gg_next[i_gg, 0] += 1

            return gg_next

        d = 4
        k = 2

        self.check(test_impl, d, k)

    @skip_unless_scipy
    def test_issue6102(self):
        # The problem is originally observed on Python3.8 because of the
        # changes in how loops are represented in 3.8 bytecode.
        @njit(parallel=True)
        def f(r):
            for ir in prange(r.shape[0]):
                dist = np.inf
                tr = np.array([0, 0, 0], dtype=np.float32)
                for i in [1, 0, -1]:
                    dist_t = np.linalg.norm(r[ir, :] + i)
                    if dist_t < dist:
                        dist = dist_t
                        tr = np.array([i, i, i], dtype=np.float32)
                r[ir, :] += tr
            return r

        r = np.array([[0., 0., 0.], [0., 0., 1.]])
        self.assertPreciseEqual(f(r), f.py_func(r))

    def test_issue6774(self):
        @njit(parallel=True)
        def test_impl():
            n = 5
            na_mask = np.ones((n,))
            result = np.empty((n - 1,))
            for i in prange(len(result)):
                result[i] = np.sum(na_mask[i:i + 1])
            return result

        self.check(test_impl)

    def test_issue4963_globals(self):
        def test_impl():
            buf = np.zeros((_GLOBAL_INT_FOR_TESTING1, _GLOBAL_INT_FOR_TESTING2))
            return buf
        self.check(test_impl)

    def test_issue4963_freevars(self):
        _FREEVAR_INT_FOR_TESTING1 = 17
        _FREEVAR_INT_FOR_TESTING2 = 5
        def test_impl():
            buf = np.zeros((_FREEVAR_INT_FOR_TESTING1, _FREEVAR_INT_FOR_TESTING2))
            return buf
        self.check(test_impl)


@skip_parfors_unsupported
class TestParforsDiagnostics(TestParforsBase):

    def check(self, pyfunc, *args, **kwargs):
        cfunc, cpfunc = self.compile_all(pyfunc, *args)
        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)

    def assert_fusion_equivalence(self, got, expected):
        a = self._fusion_equivalent(got)
        b = self._fusion_equivalent(expected)
        self.assertEqual(a, b)

    def _fusion_equivalent(self, thing):
        # parfors indexes the Parfors class instance id's from wherever the
        # internal state happens to be. To assert fusion equivalence we just
        # check that the relative difference between fusion adjacency lists
        # is the same. For example:
        # {3: [2, 1]} is the same as {13: [12, 11]}
        # this function strips the indexing etc out returning something suitable
        # for checking equivalence
        new = defaultdict(list)
        min_key = min(thing.keys())
        for k in sorted(thing.keys()):
            new[k - min_key] = [x - min_key for x in thing[k]]
        return new

    def assert_diagnostics(self, diagnostics, parfors_count=None,
                           fusion_info=None, nested_fusion_info=None,
                           replaced_fns=None, hoisted_allocations=None):
        if parfors_count is not None:
            self.assertEqual(parfors_count, diagnostics.count_parfors())
        if fusion_info is not None:
            self.assert_fusion_equivalence(fusion_info, diagnostics.fusion_info)
        if nested_fusion_info is not None:
            self.assert_fusion_equivalence(nested_fusion_info,
                                           diagnostics.nested_fusion_info)
        if replaced_fns is not None:
            repl = diagnostics.replaced_fns.values()
            for x in replaced_fns:
                for replaced in repl:
                    if replaced[0] == x:
                        break
                else:
                    msg = "Replacement for %s was not found. Had %s" % (x, repl)
                    raise AssertionError(msg)

        if hoisted_allocations is not None:
            hoisted_allocs = diagnostics.hoisted_allocations()
            self.assertEqual(hoisted_allocations, len(hoisted_allocs))

        # just make sure that the dump() function doesn't have an issue!
        with captured_stdout():
            for x in range(1, 5):
                diagnostics.dump(x)

    def test_array_expr(self):
        def test_impl():
            n = 10
            a = np.ones(n)
            b = np.zeros(n)
            return a + b

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, parfors_count=1,
                                fusion_info = {3: [4, 5]})

    def test_prange(self):
        def test_impl():
            n = 10
            a = np.empty(n)
            for i in prange(n):
                a[i] = i * 10
            return a

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, parfors_count=1)

    def test_user_varname(self):
        """make sure original user variable name is used in fusion info
        """
        def test_impl():
            n = 10
            x = np.ones(n)
            a = np.sin(x)
            b = np.cos(a * a)
            acc = 0
            for i in prange(n - 2):
                for j in prange(n - 1):
                    acc += b[i] + b[j + 1]
            return acc

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        # make sure original 'n' variable name is used in fusion report for loop
        # dimension mismatch
        self.assertTrue(
            any("slice(0, n, 1)" in r.message for r in diagnostics.fusion_reports))

    def test_nested_prange(self):
        def test_impl():
            n = 10
            a = np.empty((n, n))
            for i in prange(n):
                for j in prange(n):
                    a[i, j] = i * 10 + j
            return a

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, parfors_count=2,
                                nested_fusion_info={2: [1]})

    def test_function_replacement(self):
        def test_impl():
            n = 10
            a = np.ones(n)
            b = np.argmin(a)
            return b

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, parfors_count=1,
                                fusion_info={2: [3]},
                                replaced_fns = [('argmin', 'numpy'),])

    def test_reduction(self):
        def test_impl():
            n = 10
            a = np.ones(n + 1) # prevent fusion
            acc = 0
            for i in prange(n):
                acc += a[i]
            return acc

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, parfors_count=2)

    def test_setitem(self):
        def test_impl():
            n = 10
            a = np.ones(n)
            a[:] = 7
            return a

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, parfors_count=1)

    def test_allocation_hoisting(self):
        def test_impl():
            n = 10
            m = 5
            acc = 0
            for i in prange(n):
                temp = np.zeros((m,)) # the np.empty call should get hoisted
                for j in range(m):
                    temp[j] = i
                acc += temp[-1]
            return acc

        self.check(test_impl,)
        cpfunc = self.compile_parallel(test_impl, ())
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        self.assert_diagnostics(diagnostics, hoisted_allocations=1)


class TestPrangeBase(TestParforsBase):

    def generate_prange_func(self, pyfunc, patch_instance):
        """
        This function does the actual code augmentation to enable the explicit
        testing of `prange` calls in place of `range`.
        """
        pyfunc_code = pyfunc.__code__

        prange_names = list(pyfunc_code.co_names)

        if patch_instance is None:
            # patch all instances, cheat by just switching
            # range for prange
            assert 'range' in pyfunc_code.co_names
            prange_names = tuple([x if x != 'range' else 'prange'
                                  for x in pyfunc_code.co_names])
            new_code = bytes(pyfunc_code.co_code)
        else:
            # patch specified instances...
            # find where 'range' is in co_names
            range_idx = pyfunc_code.co_names.index('range')
            range_locations = []
            # look for LOAD_GLOBALs that point to 'range'
            for instr in dis.Bytecode(pyfunc_code):
                if instr.opname == 'LOAD_GLOBAL':
                    if _fix_LOAD_GLOBAL_arg(instr.arg) == range_idx:
                        range_locations.append(instr.offset + 1)
            # add in 'prange' ref
            prange_names.append('prange')
            prange_names = tuple(prange_names)
            prange_idx = len(prange_names) - 1
            if utils.PYVERSION == (3, 11):
                # this is the inverse of _fix_LOAD_GLOBAL_arg
                prange_idx = 1 + (prange_idx << 1)
            new_code = bytearray(pyfunc_code.co_code)
            assert len(patch_instance) <= len(range_locations)
            # patch up the new byte code
            for i in patch_instance:
                idx = range_locations[i]
                new_code[idx] = prange_idx
            new_code = bytes(new_code)

        # create code object with prange mutation
        prange_code = pyfunc_code.replace(co_code=new_code,
                                          co_names=prange_names)

        # get function
        pfunc = pytypes.FunctionType(prange_code, globals())

        return pfunc

    def prange_tester(self, pyfunc, *args, **kwargs):
        """
        The `prange` tester
        This is a hack. It basically switches out range calls for prange.
        It does this by copying the live code object of a function
        containing 'range' then copying the .co_names and mutating it so
        that 'range' is replaced with 'prange'. It then creates a new code
        object containing the mutation and instantiates a function to contain
        it. At this point three results are created:
        1. The result of calling the original python function.
        2. The result of calling a njit compiled version of the original
            python function.
        3. The result of calling a njit(parallel=True) version of the mutated
           function containing `prange`.
        The three results are then compared and the `prange` based function's
        llvm_ir is inspected to ensure the scheduler code is present.

        Arguments:
         pyfunc - the python function to test
         args - data arguments to pass to the pyfunc under test

        Keyword Arguments:
         patch_instance - iterable containing which instances of `range` to
                          replace. If not present all instance of `range` are
                          replaced.
         scheduler_type - 'signed', 'unsigned' or None, default is None.
                           Supply in cases where the presence of a specific
                           scheduler is to be asserted.
         check_fastmath - if True then a check will be performed to ensure the
                          IR contains instructions labelled with 'fast'
         check_fastmath_result - if True then a check will be performed to
                                 ensure the result of running with fastmath
                                 on matches that of the pyfunc
         Remaining kwargs are passed to np.testing.assert_almost_equal


        Example:
            def foo():
                acc = 0
                for x in range(5):
                    for y in range(10):
                        acc +=1
                return acc

            # calling as
            prange_tester(foo)
            # will test code equivalent to
            # def foo():
            #     acc = 0
            #     for x in prange(5): # <- changed
            #         for y in prange(10): # <- changed
            #             acc +=1
            #     return acc

            # calling as
            prange_tester(foo, patch_instance=[1])
            # will test code equivalent to
            # def foo():
            #     acc = 0
            #     for x in range(5): # <- outer loop (0) unchanged
            #         for y in prange(10): # <- inner loop (1) changed
            #             acc +=1
            #     return acc

        """
        patch_instance = kwargs.pop('patch_instance', None)
        check_fastmath = kwargs.pop('check_fastmath', False)
        check_fastmath_result = kwargs.pop('check_fastmath_result', False)

        pfunc = self.generate_prange_func(pyfunc, patch_instance)

        # Compile functions
        # compile a standard njit of the original function
        sig = tuple([numba.typeof(x) for x in args])
        cfunc = self.compile_njit(pyfunc, sig)

        # compile the prange injected function
        with warnings.catch_warnings(record=True) as raised_warnings:
            warnings.simplefilter('always')
            cpfunc = self.compile_parallel(pfunc, sig)

        # if check_fastmath is True then check fast instructions
        if check_fastmath:
            self.assert_fastmath(pfunc, sig)

        # if check_fastmath_result is True then compile a function
        # so that the parfors checker can assert the result is ok.
        if check_fastmath_result:
            fastcpfunc = self.compile_parallel_fastmath(pfunc, sig)
            kwargs = dict({'fastmath_pcres': fastcpfunc}, **kwargs)

        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
        return raised_warnings


@skip_parfors_unsupported
class TestPrangeBasic(TestPrangeBase):
    """ Tests Prange """

    def test_prange01(self):
        def test_impl():
            n = 4
            A = np.zeros(n)
            for i in range(n):
                A[i] = 2.0 * i
            return A
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange02(self):
        def test_impl():
            n = 4
            A = np.zeros(n - 1)
            for i in range(1, n):
                A[i - 1] = 2.0 * i
            return A
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange03(self):
        def test_impl():
            s = 10
            for i in range(10):
                s += 2
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange03mul(self):
        def test_impl():
            s = 3
            for i in range(10):
                s *= 2
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange03sub(self):
        def test_impl():
            s = 100
            for i in range(10):
                s -= 2
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange03div(self):
        def test_impl():
            s = 10
            for i in range(10):
                s /= 2
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange04(self):
        def test_impl():
            a = 2
            b = 3
            A = np.empty(4)
            for i in range(4):
                if i == a:
                    A[i] = b
                else:
                    A[i] = 0
            return A
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange05(self):
        def test_impl():
            n = 4
            A = np.ones((n), dtype=np.float64)
            s = 0
            for i in range(1, n - 1, 1):
                s += A[i]
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange06(self):
        def test_impl():
            n = 4
            A = np.ones((n), dtype=np.float64)
            s = 0
            for i in range(1, 1, 1):
                s += A[i]
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange07(self):
        def test_impl():
            n = 4
            A = np.ones((n), dtype=np.float64)
            s = 0
            for i in range(n, 1):
                s += A[i]
            return s
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange08(self):
        def test_impl():
            n = 4
            A = np.ones((n))
            acc = 0
            for i in range(len(A)):
                for j in range(len(A)):
                    acc += A[i]
            return acc
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange08_1(self):
        def test_impl():
            n = 4
            A = np.ones((n))
            acc = 0
            for i in range(4):
                for j in range(4):
                    acc += A[i]
            return acc
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange09(self):
        def test_impl():
            n = 4
            acc = 0
            for i in range(n):
                for j in range(n):
                    acc += 1
            return acc
        # patch inner loop to 'prange'
        self.prange_tester(test_impl, patch_instance=[1],
                           scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange10(self):
        def test_impl():
            n = 4
            acc2 = 0
            for j in range(n):
                acc1 = 0
                for i in range(n):
                    acc1 += 1
                acc2 += acc1
            return acc2
        # patch outer loop to 'prange'
        self.prange_tester(test_impl, patch_instance=[0],
                           scheduler_type='unsigned',
                           check_fastmath=True)

    @unittest.skip("list append is not thread-safe yet (#2391, #2408)")
    def test_prange11(self):
        def test_impl():
            n = 4
            return [np.sin(j) for j in range(n)]
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange12(self):
        def test_impl():
            acc = 0
            n = 4
            X = np.ones(n)
            for i in range(-len(X)):
                acc += X[i]
            return acc
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange13(self):
        def test_impl(n):
            acc = 0
            for i in range(n):
                acc += 1
            return acc
        self.prange_tester(test_impl, np.int32(4), scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange14(self):
        def test_impl(A):
            s = 3
            for i in range(len(A)):
                s += A[i]*2
            return s
        # this tests reduction detection well since the accumulated variable
        # is initialized before the parfor and the value accessed from the array
        # is updated before accumulation
        self.prange_tester(test_impl, np.random.ranf(4),
                           scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange15(self):
        # from issue 2587
        # test parfor type inference when there is multi-dimensional indexing
        def test_impl(N):
            acc = 0
            for i in range(N):
                x = np.ones((1, 1))
                acc += x[0, 0]
            return acc
        self.prange_tester(test_impl, 1024, scheduler_type='unsigned',
                           check_fastmath=True)

    # Tests for negative ranges
    def test_prange16(self):
        def test_impl(N):
            acc = 0
            for i in range(-N, N):
                acc += 2
            return acc
        self.prange_tester(test_impl, 1024, scheduler_type='signed',
                           check_fastmath=True)

    def test_prange17(self):
        def test_impl(N):
            acc = 0
            X = np.ones(N)
            for i in range(-N, N):
                acc += X[i]
            return acc
        self.prange_tester(test_impl, 9, scheduler_type='signed',
                           check_fastmath=True)

    def test_prange18(self):
        def test_impl(N):
            acc = 0
            X = np.ones(N)
            for i in range(-N, 5):
                acc += X[i]
                for j in range(-4, N):
                    acc += X[j]
            return acc
        self.prange_tester(test_impl, 9, scheduler_type='signed',
                           check_fastmath=True)

    def test_prange19(self):
        def test_impl(N):
            acc = 0
            M = N + 4
            X = np.ones((N, M))
            for i in range(-N, N):
                for j in range(-M, M):
                    acc += X[i, j]
            return acc
        self.prange_tester(test_impl, 9, scheduler_type='signed',
                           check_fastmath=True)

    def test_prange20(self):
        def test_impl(N):
            acc = 0
            X = np.ones(N)
            for i in range(-1, N):
                acc += X[i]
            return acc
        self.prange_tester(test_impl, 9, scheduler_type='signed',
                           check_fastmath=True)

    def test_prange21(self):
        def test_impl(N):
            acc = 0
            for i in range(-3, -1):
                acc += 3
            return acc
        self.prange_tester(test_impl, 9, scheduler_type='signed',
                           check_fastmath=True)

    def test_prange22(self):
        def test_impl():
            a = 0
            b = 3
            A = np.empty(4)
            for i in range(-2, 2):
                if i == a:
                    A[i] = b
                elif i < 1:
                    A[i] = -1
                else:
                    A[i] = 7
            return A
        self.prange_tester(test_impl, scheduler_type='signed',
                           check_fastmath=True, check_fastmath_result=True)

    def test_prange23(self):
        # test non-contig input
        def test_impl(A):
            for i in range(len(A)):
                A[i] = i
            return A
        A = np.zeros(32)[::2]
        self.prange_tester(test_impl, A, scheduler_type='unsigned',
                           check_fastmath=True, check_fastmath_result=True)

    def test_prange24(self):
        # test non-contig input, signed range
        def test_impl(A):
            for i in range(-len(A), 0):
                A[i] = i
            return A
        A = np.zeros(32)[::2]
        self.prange_tester(test_impl, A, scheduler_type='signed',
                           check_fastmath=True, check_fastmath_result=True)

    def test_prange25(self):
        def test_impl(A):
            n = len(A)
            buf = [np.zeros_like(A) for _ in range(n)]
            for i in range(n):
                buf[i] = A + i
            return buf
        A = np.ones((10,))
        self.prange_tester(test_impl, A,  patch_instance=[1],
                           scheduler_type='unsigned', check_fastmath=True,
                           check_fastmath_result=True)

        cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),))
        diagnostics = cpfunc.metadata['parfor_diagnostics']
        hoisted_allocs = diagnostics.hoisted_allocations()
        self.assertEqual(len(hoisted_allocs), 0)

    def test_prange26(self):
        def test_impl(A):
            B = A[::3]
            for i in range(len(B)):
                B[i] = i
            return A
        A = np.zeros(32)[::2]
        self.prange_tester(test_impl, A, scheduler_type='unsigned',
                           check_fastmath=True, check_fastmath_result=True)

    def test_prange27(self):
        # issue5597: usedef error in parfor
        def test_impl(a, b, c):
            for j in range(b[0]-1):
                for k in range(2):
                    z = np.abs(a[c-1:c+1])
            return 0

        # patch inner loop to 'prange'
        self.prange_tester(test_impl,
                           np.arange(20),
                           np.asarray([4,4,4,4,4,4,4,4,4,4]),
                           0,
                           patch_instance=[1],
                           scheduler_type='unsigned',
                           check_fastmath=True)

    def test_prange28(self):
        # issue7105: label conflict in nested parfor
        def test_impl(x, y):
            out = np.zeros(len(y))
            for idx in range(0, len(y)):
                i0 = y[idx, 0]
                i1 = y[idx, 1]
                Pt1 = x[i0]
                Pt2 = x[i1]
                v = Pt1 - Pt2
                vl2 = v[0] + v[1]
                out[idx] = vl2
            return out

        X = np.array([[-1., -1.],
                      [-1.,  1.],
                      [ 0.,  0.],
                      [ 1., -1.],
                      [ 1.,  0.],
                      [ 1.,  1.]])

        Y = np.array([[0, 1],
                      [1, 2],
                      [2, 3],
                      [3, 4],
                      [4, 5]])

        self.prange_tester(test_impl, X, Y, scheduler_type='unsigned',
                           check_fastmath=True, check_fastmath_result=True)

    def test_prange29(self):
        # issue7630: SSA renaming in prange header
        def test_impl(flag):
            result = 0
            if flag:
                for i in range(1):
                    result += 1
            else:
                for i in range(1):
                    result -= 3
            return result

        self.prange_tester(test_impl, True)
        self.prange_tester(test_impl, False)

    def test_prange30(self):
        # issue7675: broadcast setitem
        def test_impl(x, par, numthreads):
            n_par = par.shape[0]
            n_x = len(x)
            result = np.zeros((n_par, n_x), dtype=np.float64)
            chunklen = (len(x) + numthreads - 1) // numthreads

            for i in range(numthreads):
                start = i * chunklen
                stop = (i + 1) * chunklen
                result[:, start:stop] = x[start:stop] * par[:]

            return result

        x = np.array(np.arange(0, 6, 1.0))
        par = np.array([1.0, 2.0, 3.0])

        self.prange_tester(test_impl, x, par, 2)


@skip_parfors_unsupported
class TestPrangeSpecific(TestPrangeBase):
    """ Tests specific features/problems found under prange"""

    def test_prange_two_instances_same_reduction_var(self):
        # issue4922 - multiple uses of same reduction variable
        def test_impl(n):
            c = 0
            for i in range(n):
                c += 1
                if i > 10:
                    c += 1
            return c
        self.prange_tester(test_impl, 9)

    def test_prange_conflicting_reduction_ops(self):
        def test_impl(n):
            c = 0
            for i in range(n):
                c += 1
                if i > 10:
                    c *= 1
            return c

        with self.assertRaises(errors.UnsupportedError) as raises:
            self.prange_tester(test_impl, 9)
        msg = ('Reduction variable c has multiple conflicting reduction '
               'operators.')
        self.assertIn(msg, str(raises.exception))

    def test_prange_two_conditional_reductions(self):
        # issue6414
        def test_impl():
            A = B = 0
            for k in range(1):
                if k == 2:
                    A += 1
                else:
                    x = np.zeros((1, 1))
                    if x[0, 0]:
                        B += 1
            return A, B
        self.prange_tester(test_impl)

    def test_prange_nested_reduction1(self):
        def test_impl():
            A = 0
            for k in range(1):
                for i in range(1):
                    if i == 0:
                        A += 1
            return A
        self.prange_tester(test_impl)

    @disabled_test
    def test_check_error_model(self):
        def test_impl():
            n = 32
            A = np.zeros(n)
            for i in range(n):
                A[i] = 1 / i # div-by-zero when i = 0
            return A

        with self.assertRaises(ZeroDivisionError) as raises:
            test_impl()

        # compile parallel functions
        pfunc = self.generate_prange_func(test_impl, None)
        pcres = self.compile_parallel(pfunc, ())
        pfcres = self.compile_parallel_fastmath(pfunc, ())

        # should raise
        with self.assertRaises(ZeroDivisionError) as raises:
            pcres.entry_point()

        # should not raise
        result = pfcres.entry_point()
        self.assertEqual(result[0], np.inf)

    def test_check_alias_analysis(self):
        # check alias analysis reports ok
        def test_impl(A):
            for i in range(len(A)):
                B = A[i]
                B[:] = 1
            return A
        A = np.zeros(32).reshape(4, 8)
        self.prange_tester(test_impl, A, scheduler_type='unsigned',
                           check_fastmath=True, check_fastmath_result=True)
        pfunc = self.generate_prange_func(test_impl, None)
        sig = tuple([numba.typeof(A)])
        cres = self.compile_parallel_fastmath(pfunc, sig)
        _ir = self._get_gufunc_ir(cres)
        for k, v in _ir.items():
            for line in v.splitlines():
                # get the fn definition line
                if 'define' in line and k in line:
                    # there should only be 2x noalias, one on each of the first
                    # 2 args (retptr, excinfo).
                    # Note: used to be 3x no noalias, but env arg is dropped.
                    self.assertEqual(line.count('noalias'), 2)
                    break

    def test_prange_raises_invalid_step_size(self):
        def test_impl(N):
            acc = 0
            for i in range(0, N, 2):
                acc += 2
            return acc

        with self.assertRaises(errors.UnsupportedRewriteError) as raises:
            self.prange_tester(test_impl, 1024)
        msg = 'Only constant step size of 1 is supported for prange'
        self.assertIn(msg, str(raises.exception))

    def test_prange_fastmath_check_works(self):
        # this function will benefit from `fastmath`, the div will
        # get optimised to a multiply by reciprocal and the accumulator
        # then becomes an fmadd: A = A + i * 0.5
        def test_impl():
            n = 128
            A = 0
            for i in range(n):
                A += i / 2.0
            return A
        self.prange_tester(test_impl, scheduler_type='unsigned',
                           check_fastmath=True)
        pfunc = self.generate_prange_func(test_impl, None)
        cres = self.compile_parallel_fastmath(pfunc, ())
        ir = self._get_gufunc_ir(cres)
        _id = '%[A-Z_0-9]?(.[0-9]+)+[.]?[i]?'
        recipr_str = '\s+%s = fmul fast double %s, 5.000000e-01'
        reciprocal_inst = re.compile(recipr_str % (_id, _id))
        fadd_inst = re.compile('\s+%s = fadd fast double %s, %s'
                               % (_id, _id, _id))
        # check there is something like:
        #  %.329 = fmul fast double %.325, 5.000000e-01
        #  %.337 = fadd fast double %A.07, %.329
        for name, kernel in ir.items():
            splitted = kernel.splitlines()
            for i, x in enumerate(splitted):
                if reciprocal_inst.match(x):
                    break
            self.assertTrue(fadd_inst.match(splitted[i + 1]))

    def test_parfor_alias1(self):
        def test_impl(n):
            b = np.zeros((n, n))
            a = b[0]
            for j in range(n):
                a[j] = j + 1
            return b.sum()
        self.prange_tester(test_impl, 4)

    def test_parfor_alias2(self):
        def test_impl(n):
            b = np.zeros((n, n))
            for i in range(n):
              a = b[i]
              for j in range(n):
                a[j] = i + j
            return b.sum()
        self.prange_tester(test_impl, 4)

    def test_parfor_alias3(self):
        def test_impl(n):
            b = np.zeros((n, n, n))
            for i in range(n):
              a = b[i]
              for j in range(n):
                c = a[j]
                for k in range(n):
                  c[k] = i + j + k
            return b.sum()
        self.prange_tester(test_impl, 4)

    def test_parfor_race_1(self):
        def test_impl(x, y):
            for j in range(y):
                k = x
            return k
        raised_warnings = self.prange_tester(test_impl, 10, 20)
        warning_obj = raised_warnings[0]
        expected_msg = ("Variable k used in parallel loop may be written to "
                        "simultaneously by multiple workers and may result "
                        "in non-deterministic or unintended results.")
        self.assertIn(expected_msg, str(warning_obj.message))

    def test_nested_parfor_push_call_vars(self):
        """ issue 3686: if a prange has something inside it that causes
            a nested parfor to be generated and both the inner and outer
            parfor use the same call variable defined outside the parfors
            then ensure that when that call variable is pushed into the
            parfor that the call variable isn't duplicated with the same
            name resulting in a redundant type lock.
        """
        def test_impl():
            B = 0
            f = np.negative
            for i in range(1):
                this_matters = f(1.)
                B += f(np.zeros(1,))[0]
            for i in range(2):
                this_matters = f(1.)
                B += f(np.zeros(1,))[0]

            return B
        self.prange_tester(test_impl)

    def test_copy_global_for_parfor(self):
        """ issue4903: a global is copied next to a parfor so that
            it can be inlined into the parfor and thus not have to be
            passed to the parfor (i.e., an unsupported function type).
            This global needs to be renamed in the block into which
            it is copied.
        """
        def test_impl(zz, tc):
            lh = np.zeros(len(tc))
            lc = np.zeros(len(tc))
            for i in range(1):
                nt = tc[i]
                for t in range(nt):
                    lh += np.exp(zz[i, t])
                for t in range(nt):
                    lc += np.exp(zz[i, t])
            return lh, lc

        m = 2
        zz = np.ones((m, m, m))
        tc = np.ones(m, dtype=np.int_)
        self.prange_tester(test_impl, zz, tc, patch_instance=[0])

    def test_multiple_call_getattr_object(self):
        def test_impl(n):
            B = 0
            f = np.negative
            for i in range(1):
                this_matters = f(1.0)
                B += f(n)

            return B
        self.prange_tester(test_impl, 1.0)

    def test_argument_alias_recarray_field(self):
        # Test for issue4007.
        def test_impl(n):
            for i in range(len(n)):
                n.x[i] = 7.0
            return n
        X1 = np.zeros(10, dtype=[('x', float), ('y', int), ])
        X2 = np.zeros(10, dtype=[('x', float), ('y', int), ])
        X3 = np.zeros(10, dtype=[('x', float), ('y', int), ])
        v1 = X1.view(np.recarray)
        v2 = X2.view(np.recarray)
        v3 = X3.view(np.recarray)

        # Numpy doesn't seem to support almost equal on recarray.
        # So, we convert to list and use assertEqual instead.
        python_res = list(test_impl(v1))
        njit_res = list(njit(test_impl)(v2))
        pa_func = njit(test_impl, parallel=True)
        pa_res = list(pa_func(v3))
        self.assertEqual(python_res, njit_res)
        self.assertEqual(python_res, pa_res)

    def test_mutable_list_param(self):
        """ issue3699: test that mutable variable to call in loop
            is not hoisted.  The call in test_impl forces a manual
            check here rather than using prange_tester.
        """
        @njit
        def list_check(X):
            """ If the variable X is hoisted in the test_impl prange
                then subsequent list_check calls would return increasing
                values.
            """
            ret = X[-1]
            a = X[-1] + 1
            X.append(a)
            return ret
        def test_impl(n):
            for i in prange(n):
                X = [100]
                a = list_check(X)
            return a
        python_res = test_impl(10)
        njit_res = njit(test_impl)(10)
        pa_func = njit(test_impl, parallel=True)
        pa_res = pa_func(10)
        self.assertEqual(python_res, njit_res)
        self.assertEqual(python_res, pa_res)

    def test_list_comprehension_prange(self):
        # issue4569
        def test_impl(x):
            return np.array([len(x[i]) for i in range(len(x))])
        x = [np.array([1,2,3], dtype=int),np.array([1,2], dtype=int)]
        self.prange_tester(test_impl, x)

    def test_ssa_false_reduction(self):
        # issue5698
        # SSA for h creates assignments to h that make it look like a
        # reduction variable except that it lacks an associated
        # reduction operator.  Test here that h is excluded as a
        # reduction variable.
        def test_impl(image, a, b):
            empty = np.zeros(image.shape)
            for i in range(image.shape[0]):
                r = image[i][0] / 255.0
                if a == 0:
                    h = 0
                if b == 0:
                    h = 0
                empty[i] = [h, h, h]
            return empty

        image = np.zeros((3, 3), dtype=np.int32)
        self.prange_tester(test_impl, image, 0, 0)

    def test_list_setitem_hoisting(self):
        # issue5979
        # Don't hoist list initialization if list item set.
        def test_impl():
            n = 5
            a = np.empty(n, dtype=np.int64)
            for k in range(5):
                X = [0]
                X[0] = 1
                a[k] = X[0]
            return a

        self.prange_tester(test_impl)

    def test_record_array_setitem(self):
        # issue6704
        state_dtype = np.dtype([('var', np.int32)])

        def test_impl(states):
            for i in range(1):
                states[i]['var'] = 1

        def comparer(a, b):
            assert(a[0]['var'] == b[0]['var'])

        self.prange_tester(test_impl,
                           np.zeros(shape=1, dtype=state_dtype),
                           check_arg_equality=[comparer])

    def test_record_array_setitem_yield_array(self):
        state_dtype = np.dtype([('x', np.intp)])

        def test_impl(states):
            n = states.size
            for i in range(states.size):
                states["x"][i] = 7 + i
            return states

        states = np.zeros(10, dtype=state_dtype)

        def comparer(a, b):
            np.testing.assert_equal(a, b)

        self.prange_tester(test_impl,
                           states,
                           check_arg_equality=[comparer])

    def test_issue7501(self):
        def test_impl(size, case):
            result = np.zeros((size,))
            if case == 1:
                for i in range(size):
                    result[i] += 1
            else:
                for i in range(size):
                    result[i] += 2
            return result[0]

        self.prange_tester(test_impl, 3, 1)

    def test_kde_example(self):
        def test_impl(X):
            # KDE example
            b = 0.5
            points = np.array([-1.0, 2.0, 5.0])
            N = points.shape[0]
            n = X.shape[0]
            exps = 0
            for i in range(n):
                p = X[i]
                d = (-(p - points)**2) / (2 * b**2)
                m = np.min(d)
                exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m)))
            return exps

        n = 128
        X = np.random.ranf(n)
        self.prange_tester(test_impl, X)

    @skip_parfors_unsupported
    def test_issue_due_to_max_label(self):
        # Run the actual test in a new process since it can only reproduce in
        # a fresh state.
        out = subp.check_output(
            [sys.executable, '-m', 'numba.tests.parfors_max_label_error'],
            timeout=30,
            stderr=subp.STDOUT, # redirect stderr to stdout
        )
        self.assertIn("TEST PASSED", out.decode())

    @skip_parfors_unsupported
    def test_issue7578(self):
        def test_impl(x):
            A = np.zeros_like(x)
            tmp = np.cos(x) # this can be any 1-arity ufunc

            for i in range(len(x)):
                A[i] = tmp.sum()

            return A

        x = np.arange(10.)
        self.prange_tester(test_impl, x)

@skip_parfors_unsupported
class TestParforChunksizing(TestCase):
    """
    Tests chunksize handling in ParallelAccelerator.
    """
    _numba_parallel_test_ = False

    def setUp(self):
        set_parallel_chunksize(0)

    def tearDown(self):
        set_parallel_chunksize(0)

    def test_python_parallel_chunksize_basic(self):
        # Test basic chunksize operations outside njit.
        self.assertEqual(get_parallel_chunksize(), 0)
        set_parallel_chunksize(8)
        self.assertEqual(get_parallel_chunksize(), 8)
        set_parallel_chunksize(0)
        self.assertEqual(get_parallel_chunksize(), 0)

    def test_python_with_chunksize(self):
        # Test "with parallel_chunksize" outside njit.
        self.assertEqual(get_parallel_chunksize(), 0)
        with parallel_chunksize(8):
            self.assertEqual(get_parallel_chunksize(), 8)
        self.assertEqual(get_parallel_chunksize(), 0)

    def test_njit_parallel_chunksize_basic(self):
        # Test basic chunksize operations inside njit.
        @njit
        def get_cs():
            return get_parallel_chunksize()

        @njit
        def set_cs(x):
            return set_parallel_chunksize(x)

        self.assertEqual(get_cs(), 0)
        set_cs(8)
        self.assertEqual(get_cs(), 8)
        set_cs(0)
        self.assertEqual(get_cs(), 0)

    def test_njit_with_chunksize(self):
        # Test "with parallel_chunksize" inside njit.
        @njit
        def test_impl(x):
            cs1 = get_parallel_chunksize()
            with parallel_chunksize(8):
                cs2 = get_parallel_chunksize()
            cs3 = get_parallel_chunksize()
            return cs1, cs2, cs3

        cs1, cs2, cs3 = test_impl(8)

        self.assertEqual(cs1, 0)
        self.assertEqual(cs2, 8)
        self.assertEqual(cs3, 0)

    def test_all_iterations_reset_chunksize(self):
        """ Test that all the iterations get run if you set the
            chunksize.  Also check that the chunksize that each
            worker thread sees has been reset to 0. """

        @njit(parallel=True)
        def test_impl(cs, n):
            res = np.zeros(n)
            inner_cs = np.full(n, -13)
            with numba.parallel_chunksize(cs):
                for i in numba.prange(n):
                    inner_cs[i] = numba.get_parallel_chunksize()
                    res[i] = 13
            return res, inner_cs

        # Test a variety of array and chunk sizes.
        # 1000 is a round number, 997 is prime, 943 is product of two
        # primes, 961 is square of a prime.
        for j in [1000, 997, 943, 961]:
            for i in range(15):
                res, inner_cs = test_impl(i+1, j)
                self.assertTrue(np.all(res == 13))
                self.assertTrue(np.all(inner_cs == 0))

    def test_njit_parallel_chunksize_negative(self):
        # Test negative set_parallel_chunksize inside njit.
        with self.assertRaises(ValueError) as raised:
            @njit
            def neg_test():
                set_parallel_chunksize(-1)

            neg_test()

        msg = "chunksize must be greater than or equal to zero"
        self.assertIn(msg, str(raised.exception))

    def test_python_parallel_chunksize_negative(self):
        # Test negative set_parallel_chunksize outside njit.
        with self.assertRaises(ValueError) as raised:
            set_parallel_chunksize(-1)

        msg = "chunksize must be greater than or equal to zero"
        self.assertIn(msg, str(raised.exception))

    def test_njit_parallel_chunksize_invalid_type(self):
        with self.assertRaises(errors.TypingError) as raised:
            @njit
            def impl():
                set_parallel_chunksize('invalid_type')

            impl()

        msg = "The parallel chunksize must be an integer"
        self.assertIn(msg, str(raised.exception))

    def test_python_parallel_chunksize_invalid_type(self):
        with self.assertRaises(TypeError) as raised:
            set_parallel_chunksize('invalid_type')

        msg = "The parallel chunksize must be an integer"
        self.assertIn(msg, str(raised.exception))


@skip_parfors_unsupported
@x86_only
class TestParforsVectorizer(TestPrangeBase):

    # env mutating test
    _numba_parallel_test_ = False

    def get_gufunc_asm(self, func, schedule_type, *args, **kwargs):

        fastmath = kwargs.pop('fastmath', False)
        cpu_name = kwargs.pop('cpu_name', 'skylake-avx512')
        assertions = kwargs.pop('assertions', True)
        # force LLVM to use zmm registers for vectorization
        # https://reviews.llvm.org/D67259
        cpu_features = kwargs.pop('cpu_features', '-prefer-256-bit')

        env_opts = {'NUMBA_CPU_NAME': cpu_name,
                    'NUMBA_CPU_FEATURES': cpu_features,
                    }

        overrides = []
        for k, v in env_opts.items():
            overrides.append(override_env_config(k, v))

        with overrides[0], overrides[1]:
            sig = tuple([numba.typeof(x) for x in args])
            pfunc_vectorizable = self.generate_prange_func(func, None)
            if fastmath == True:
                cres = self.compile_parallel_fastmath(pfunc_vectorizable, sig)
            else:
                cres = self.compile_parallel(pfunc_vectorizable, sig)

            # get the gufunc asm
            asm = self._get_gufunc_asm(cres)

            if assertions:
                schedty = re.compile('call\s+\w+\*\s+@do_scheduling_(\w+)\(')
                matches = schedty.findall(cres.library.get_llvm_str())
                self.assertGreaterEqual(len(matches), 1) # at least 1 parfor call
                self.assertEqual(matches[0], schedule_type)
                self.assertNotEqual(asm, {})

            return asm

    @linux_only
    def test_vectorizer_fastmath_asm(self):
        """ This checks that if fastmath is set and the underlying hardware
        is suitable, and the function supplied is amenable to fastmath based
        vectorization, that the vectorizer actually runs.
        """

        # This function will benefit from `fastmath` if run on a suitable
        # target. The vectorizer should unwind the loop and generate
        # packed dtype=double add and sqrt instructions.
        def will_vectorize(A):
            n = len(A)
            acc = 0
            for i in range(n):
                acc += np.sqrt(i)
            return acc

        arg = np.zeros(10)

        fast_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
                                       fastmath=True)
        slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
                                       fastmath=False)
        for v in fast_asm.values():
            # should unwind and call vector sqrt then vector add
            # all on packed doubles using zmm's
            self.assertTrue('vaddpd' in v)
            self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
            self.assertTrue('zmm' in v)

        for v in slow_asm.values():
            # vector variants should not be present
            self.assertTrue('vaddpd' not in v)
            self.assertTrue('vsqrtpd' not in v)
            # check scalar variant is present
            self.assertTrue('vsqrtsd' in v and '__svml_sqrt' not in v)
            self.assertTrue('vaddsd' in v)
            # check no zmm addressing is present
            self.assertTrue('zmm' not in v)

    @linux_only
    def test_unsigned_refusal_to_vectorize(self):
        """ This checks that if fastmath is set and the underlying hardware
        is suitable, and the function supplied is amenable to fastmath based
        vectorization, that the vectorizer actually runs.
        """

        def will_not_vectorize(A):
            n = len(A)
            for i in range(-n, 0):
                A[i] = np.sqrt(A[i])
            return A

        def will_vectorize(A):
            n = len(A)
            for i in range(n):
                A[i] = np.sqrt(A[i])
            return A

        arg = np.zeros(10)

        # Boundschecking breaks vectorization
        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
            novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg,
                                            fastmath=True)

            vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
                                          fastmath=True)

        for v in novec_asm.values():
            # vector variant should not be present
            self.assertTrue('vsqrtpd' not in v)
            # check scalar variant is present
            self.assertTrue('vsqrtsd' in v)
            # check no zmm addressing is present
            self.assertTrue('zmm' not in v)

        for v in vec_asm.values():
            # should unwind and call vector sqrt then vector mov
            # all on packed doubles using zmm's
            self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
            self.assertTrue('vmovupd' in v)
            self.assertTrue('zmm' in v)

    @linux_only
    # needed as 32bit doesn't have equivalent signed/unsigned instruction
    # generation for this function
    def test_signed_vs_unsigned_vec_asm(self):
        """ This checks vectorization for signed vs unsigned variants of a
        trivial accumulator, the only meaningful difference should be the
        presence of signed vs. unsigned unpack instructions (for the
        induction var).
        """
        def signed_variant():
            n = 4096
            A = 0.
            for i in range(-n, 0):
                A += i
            return A

        def unsigned_variant():
            n = 4096
            A = 0.
            for i in range(n):
                A += i
            return A

        # Boundschecking breaks the diff check below because of the pickled exception
        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
            signed_asm = self.get_gufunc_asm(signed_variant, 'signed',
                                             fastmath=True)
            unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned',
                                               fastmath=True)

        def strip_instrs(asm):
            acc = []
            for x in asm.splitlines():
                spd = x.strip()
                # filter out anything that isn't a trivial instruction
                # and anything with the gufunc id as it contains an address
                if spd != '' and not (spd.startswith('.')
                                     or spd.startswith('_')
                                     or spd.startswith('"')
                                     or '__numba_parfor_gufunc' in spd):
                        acc.append(re.sub('[\t]', '', spd))
            return acc

        for k, v in signed_asm.items():
            signed_instr = strip_instrs(v)
            break

        for k, v in unsigned_asm.items():
            unsigned_instr = strip_instrs(v)
            break

        from difflib import SequenceMatcher as sm
        # make sure that the only difference in instruction (if there is a
        # difference) is the char 'u'. For example:
        # vcvtsi2sdq vs. vcvtusi2sdq
        self.assertEqual(len(signed_instr), len(unsigned_instr))
        for a, b in zip(signed_instr, unsigned_instr):
            if a == b:
                continue
            else:
                s = sm(lambda x: x == '\t', a, b)
                ops = s.get_opcodes()
                for op in ops:
                    if op[0] == 'insert':
                        self.assertEqual(b[op[-2]:op[-1]], 'u')


@skip_parfors_unsupported
class TestParforReductionSetNumThreads(TestCase):
    """Test execution correctness on reductions with set_num_threads.
    """
    def test_add(self):
        N = config.NUMBA_NUM_THREADS
        M = 2 * N
        mask = N - 1

        @njit(parallel=True)
        def udt(nthreads):
            acc = 0
            set_num_threads(nthreads)
            for i in prange(M):
                local_mask = 1 + i % mask
                set_num_threads(local_mask)
                gnt = get_num_threads()
                acc += gnt
            return acc

        expect = udt.py_func(mask)
        got = udt(mask)
        self.assertPreciseEqual(expect, got)

    def test_mul(self):
        # This min will prevent larger thread counts from generating
        # overflow in the loop below.
        N = min(4, config.NUMBA_NUM_THREADS)
        M = 2 * N
        mask = N - 1

        @njit(parallel=True)
        def udt(nthreads):
            acc = 1
            set_num_threads(nthreads)
            for i in prange(M):
                local_mask = 1 + i % mask
                set_num_threads(local_mask)
                gnt = get_num_threads()
                acc *= gnt
            return acc

        expect = udt.py_func(mask)
        got = udt(mask)
        self.assertPreciseEqual(expect, got)

    def test_max(self):
        N = config.NUMBA_NUM_THREADS
        M = 2 * N
        mask = N - 1

        @njit(parallel=True)
        def udt(nthreads):
            acc = 1
            set_num_threads(nthreads)
            for i in prange(M):
                local_mask = 1 + i % mask
                set_num_threads(local_mask)
                gnt = get_num_threads()
                acc = max(acc, gnt)
            return acc

        expect = udt.py_func(mask)
        got = udt(mask)
        self.assertPreciseEqual(expect, got)


@skip_parfors_unsupported
class TestDiagnosticEnvVar(TestCase):
    @TestCase.run_test_in_subprocess()
    def test_diagnostics_env_var1(self):
        os.environ['NUMBA_PARALLEL_DIAGNOSTICS']='4'
        with captured_stdout() as stdout:
            @njit(parallel=True)
            def impl():
                n = 100
                b = np.zeros((n), dtype=np.float64)
                for i in prange(n):
                    b[i] = 1
                return b

            impl()
        the_output = stdout.getvalue()
        self.assertIn("Parallel Accelerator Optimizing", the_output)


if __name__ == "__main__":
    unittest.main()