#line 1 "numpy/core/src/_simd/_simd.dispatch.c.src"

/*
 *****************************************************************************
 **       This file was autogenerated from a template  DO NOT EDIT!!!!      **
 **       Changes should be made to the original source (.src) file         **
 *****************************************************************************
 */

#line 1
/*@targets #simd_test*/
#include "_simd.h"
#include "_simd_inc.h"

#if NPY_SIMD
#include "_simd_data.inc"
#include "_simd_convert.inc"
#include "_simd_vector.inc"
#include "_simd_arg.inc"
#include "_simd_easyintrin.inc"

//#########################################################################
//## Defining NPYV intrinsics as module functions
//#########################################################################
#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_u8, vu8, qu8)

#line 42
SIMD_IMPL_INTRIN_1(loada_u8, vu8, qu8)

#line 42
SIMD_IMPL_INTRIN_1(loads_u8, vu8, qu8)

#line 42
SIMD_IMPL_INTRIN_1(loadl_u8, vu8, qu8)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_u8(seq_arg.data.qu8, vec_arg.data.vu8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_u8(seq_arg.data.qu8, vec_arg.data.vu8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_u8(seq_arg.data.qu8, vec_arg.data.vu8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_u8(seq_arg.data.qu8, vec_arg.data.vu8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_u8(seq_arg.data.qu8, vec_arg.data.vu8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_u8, vu8, qu8, u32, u8)
SIMD_IMPL_INTRIN_2(load_tillz_u8, vu8, qu8, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_u8(
        seq_arg.data.qu8, nlane_arg.data.u32, vec_arg.data.vu8
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u8};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u8 *seq_ptr = seq_arg.data.qu8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_u8(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u8 rvec = npyv_loadn_u8(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u8
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu8, .data = {.vu8=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_u8};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u8 *seq_ptr = seq_arg.data.qu8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_u8(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u8 rvec = npyv_loadn_till_u8(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.u8
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu8, .data = {.vu8=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u8};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u8 *seq_ptr = seq_arg.data.qu8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_u8(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u8 rvec = npyv_loadn_tillz_u8(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u8
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu8, .data = {.vu8=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u8 *seq_ptr = seq_arg.data.qu8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_u8(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_u8(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu8
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu8};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_u8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u8 *seq_ptr = seq_arg.data.qu8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_u8(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_u8(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu8
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu8, simd_data_qu8)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 0

/****************************
 * Lookup tables
 ****************************/
#if 8 == 32
SIMD_IMPL_INTRIN_2(lut32_u8, vu8, qu8, vu8)
#endif
#if 8 == 64
SIMD_IMPL_INTRIN_2(lut16_u8, vu8, qu8, vu8)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_u8, vu8)
SIMD_IMPL_INTRIN_1(setall_u8, vu8, u8)
SIMD_IMPL_INTRIN_3(select_u8, vu8, vb8, vu8, vu8)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_u8, vu8, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_u8, vs8, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_u8, vu16, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_u8, vs16, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_u8, vu32, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_u8, vs32, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_u8, vu64, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_u8, vs64, vu8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_u8, vf32, vu8)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_u8, vf64, vu8)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_u8 and npy_set_u8.
*/
#line 268
static PyObject *
simd__intrin_setf_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u8 *data = simd_sequence_from_iterable(args, simd_data_qu8, npyv_nlanes_u8);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu8 = npyv_setf_u8(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu8);
}

#line 268
static PyObject *
simd__intrin_set_u8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u8 *data = simd_sequence_from_iterable(args, simd_data_qu8, npyv_nlanes_u8);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu8 = npyv_set_u8(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu8);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_u8, vu8, vu8, vu8)

#line 297
SIMD_IMPL_INTRIN_2(combineh_u8, vu8, vu8, vu8)


#line 303
SIMD_IMPL_INTRIN_2(combine_u8, vu8x2, vu8, vu8)

#line 303
SIMD_IMPL_INTRIN_2(zip_u8, vu8x2, vu8, vu8)


#if 1
SIMD_IMPL_INTRIN_1(rev64_u8, vu8, vu8)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
SIMD_IMPL_INTRIN_2(shl_u8, vu8, vu8, u8)
SIMD_IMPL_INTRIN_2(shr_u8, vu8, vu8, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_u8, vu8, vu8, 0)
SIMD_IMPL_INTRIN_2IMM(shri_u8, vu8, vu8, 0)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_u8, vu8, vu8, vu8)

#line 324
SIMD_IMPL_INTRIN_2(or_u8, vu8, vu8, vu8)

#line 324
SIMD_IMPL_INTRIN_2(xor_u8, vu8, vu8, vu8)


SIMD_IMPL_INTRIN_1(not_u8, vu8, vu8)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_u8, vb8, vu8, vu8)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_u8, vb8, vu8, vu8)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_u8, vb8, vu8, vu8)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_u8, vb8, vu8, vu8)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_u8, vb8, vu8, vu8)

#line 332
SIMD_IMPL_INTRIN_2(cmple_u8, vb8, vu8, vu8)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_u8_b8, vu8,  vb8)
SIMD_IMPL_INTRIN_1(cvt_b8_u8, vb8, vu8)
#if 1
SIMD_IMPL_INTRIN_1(expand_u16_u8, vu16x2, vu8)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_u8, vu8, vu8, vu8)

#line 349
SIMD_IMPL_INTRIN_2(sub_u8, vu8, vu8, vu8)


#if 1
#line 356
SIMD_IMPL_INTRIN_2(adds_u8, vu8, vu8, vu8)

#line 356
SIMD_IMPL_INTRIN_2(subs_u8, vu8, vu8, vu8)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_u8, vu8, vu8, vu8)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_u8, vu8, vu8, vu8)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_u8, vu8x3, u8)
SIMD_IMPL_INTRIN_2(divc_u8, vu8, vu8, vu8x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_u8, vu8, vu8, vu8, vu8)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_u8, vu8, vu8, vu8, vu8)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_u8, vu8, vu8, vu8, vu8)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_u8, vu8, vu8, vu8, vu8)

#endif // fused_sup

#if 0
SIMD_IMPL_INTRIN_1(sum_u8, u8, vu8)
#endif // sum_sup

#if 1
SIMD_IMPL_INTRIN_1(sumup_u8, u16, vu8)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(recip_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(abs_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(square_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(rint_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(ceil_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(trunc_u8, vu8, vu8)

#line 396
SIMD_IMPL_INTRIN_1(floor_u8, vu8, vu8)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_u8, vu8, vu8, vu8)

#line 403
SIMD_IMPL_INTRIN_2(min_u8, vu8, vu8, vu8)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_u8, vu8, vu8, vu8)

#line 410
SIMD_IMPL_INTRIN_2(minp_u8, vu8, vu8, vu8)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_u8, vu8, vb8, vu8, vu8, vu8)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_u8, vu8, vb8, vu8, vu8, vu8)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_s8, vs8, qs8)

#line 42
SIMD_IMPL_INTRIN_1(loada_s8, vs8, qs8)

#line 42
SIMD_IMPL_INTRIN_1(loads_s8, vs8, qs8)

#line 42
SIMD_IMPL_INTRIN_1(loadl_s8, vs8, qs8)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_s8(seq_arg.data.qs8, vec_arg.data.vs8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_s8(seq_arg.data.qs8, vec_arg.data.vs8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_s8(seq_arg.data.qs8, vec_arg.data.vs8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_s8(seq_arg.data.qs8, vec_arg.data.vs8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_s8(seq_arg.data.qs8, vec_arg.data.vs8);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_s8, vs8, qs8, u32, s8)
SIMD_IMPL_INTRIN_2(load_tillz_s8, vs8, qs8, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_s8(
        seq_arg.data.qs8, nlane_arg.data.u32, vec_arg.data.vs8
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s8};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s8 *seq_ptr = seq_arg.data.qs8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_s8(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s8 rvec = npyv_loadn_s8(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s8
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs8, .data = {.vs8=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_s8};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s8 *seq_ptr = seq_arg.data.qs8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_s8(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s8 rvec = npyv_loadn_till_s8(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.s8
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs8, .data = {.vs8=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s8};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s8 *seq_ptr = seq_arg.data.qs8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_s8(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s8 rvec = npyv_loadn_tillz_s8(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s8
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs8, .data = {.vs8=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s8 *seq_ptr = seq_arg.data.qs8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_s8(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_s8(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs8
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs8};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs8};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_s8",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s8 *seq_ptr = seq_arg.data.qs8;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s8;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_s8(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_s8(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs8
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs8, simd_data_qs8)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 0

/****************************
 * Lookup tables
 ****************************/
#if 8 == 32
SIMD_IMPL_INTRIN_2(lut32_s8, vs8, qs8, vu8)
#endif
#if 8 == 64
SIMD_IMPL_INTRIN_2(lut16_s8, vs8, qs8, vu8)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_s8, vs8)
SIMD_IMPL_INTRIN_1(setall_s8, vs8, s8)
SIMD_IMPL_INTRIN_3(select_s8, vs8, vb8, vs8, vs8)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_s8, vu8, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_s8, vs8, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_s8, vu16, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_s8, vs16, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_s8, vu32, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_s8, vs32, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_s8, vu64, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_s8, vs64, vs8)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_s8, vf32, vs8)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_s8, vf64, vs8)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_s8 and npy_set_s8.
*/
#line 268
static PyObject *
simd__intrin_setf_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s8 *data = simd_sequence_from_iterable(args, simd_data_qs8, npyv_nlanes_s8);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs8 = npyv_setf_s8(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs8);
}

#line 268
static PyObject *
simd__intrin_set_s8(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s8 *data = simd_sequence_from_iterable(args, simd_data_qs8, npyv_nlanes_s8);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs8 = npyv_set_s8(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs8);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_s8, vs8, vs8, vs8)

#line 297
SIMD_IMPL_INTRIN_2(combineh_s8, vs8, vs8, vs8)


#line 303
SIMD_IMPL_INTRIN_2(combine_s8, vs8x2, vs8, vs8)

#line 303
SIMD_IMPL_INTRIN_2(zip_s8, vs8x2, vs8, vs8)


#if 1
SIMD_IMPL_INTRIN_1(rev64_s8, vs8, vs8)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
SIMD_IMPL_INTRIN_2(shl_s8, vs8, vs8, u8)
SIMD_IMPL_INTRIN_2(shr_s8, vs8, vs8, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_s8, vs8, vs8, 0)
SIMD_IMPL_INTRIN_2IMM(shri_s8, vs8, vs8, 0)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_s8, vs8, vs8, vs8)

#line 324
SIMD_IMPL_INTRIN_2(or_s8, vs8, vs8, vs8)

#line 324
SIMD_IMPL_INTRIN_2(xor_s8, vs8, vs8, vs8)


SIMD_IMPL_INTRIN_1(not_s8, vs8, vs8)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_s8, vb8, vs8, vs8)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_s8, vb8, vs8, vs8)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_s8, vb8, vs8, vs8)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_s8, vb8, vs8, vs8)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_s8, vb8, vs8, vs8)

#line 332
SIMD_IMPL_INTRIN_2(cmple_s8, vb8, vs8, vs8)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_s8_b8, vs8,  vb8)
SIMD_IMPL_INTRIN_1(cvt_b8_s8, vb8, vs8)
#if 0
SIMD_IMPL_INTRIN_1(expand_s8_s8, vs8x2, vs8)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_s8, vs8, vs8, vs8)

#line 349
SIMD_IMPL_INTRIN_2(sub_s8, vs8, vs8, vs8)


#if 1
#line 356
SIMD_IMPL_INTRIN_2(adds_s8, vs8, vs8, vs8)

#line 356
SIMD_IMPL_INTRIN_2(subs_s8, vs8, vs8, vs8)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_s8, vs8, vs8, vs8)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_s8, vs8, vs8, vs8)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_s8, vs8x3, s8)
SIMD_IMPL_INTRIN_2(divc_s8, vs8, vs8, vs8x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_s8, vs8, vs8, vs8, vs8)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_s8, vs8, vs8, vs8, vs8)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_s8, vs8, vs8, vs8, vs8)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_s8, vs8, vs8, vs8, vs8)

#endif // fused_sup

#if 0
SIMD_IMPL_INTRIN_1(sum_s8, s8, vs8)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_s8, s8, vs8)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(recip_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(abs_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(square_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(rint_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(ceil_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(trunc_s8, vs8, vs8)

#line 396
SIMD_IMPL_INTRIN_1(floor_s8, vs8, vs8)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_s8, vs8, vs8, vs8)

#line 403
SIMD_IMPL_INTRIN_2(min_s8, vs8, vs8, vs8)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_s8, vs8, vs8, vs8)

#line 410
SIMD_IMPL_INTRIN_2(minp_s8, vs8, vs8, vs8)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_s8, vs8, vb8, vs8, vs8, vs8)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_s8, vs8, vb8, vs8, vs8, vs8)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_u16, vu16, qu16)

#line 42
SIMD_IMPL_INTRIN_1(loada_u16, vu16, qu16)

#line 42
SIMD_IMPL_INTRIN_1(loads_u16, vu16, qu16)

#line 42
SIMD_IMPL_INTRIN_1(loadl_u16, vu16, qu16)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_u16(seq_arg.data.qu16, vec_arg.data.vu16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_u16(seq_arg.data.qu16, vec_arg.data.vu16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_u16(seq_arg.data.qu16, vec_arg.data.vu16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_u16(seq_arg.data.qu16, vec_arg.data.vu16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_u16(seq_arg.data.qu16, vec_arg.data.vu16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_u16, vu16, qu16, u32, u16)
SIMD_IMPL_INTRIN_2(load_tillz_u16, vu16, qu16, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_u16(
        seq_arg.data.qu16, nlane_arg.data.u32, vec_arg.data.vu16
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u16};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u16 *seq_ptr = seq_arg.data.qu16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_u16(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u16 rvec = npyv_loadn_u16(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u16
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu16, .data = {.vu16=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_u16};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u16 *seq_ptr = seq_arg.data.qu16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_u16(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u16 rvec = npyv_loadn_till_u16(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.u16
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu16, .data = {.vu16=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u16};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u16 *seq_ptr = seq_arg.data.qu16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_u16(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u16 rvec = npyv_loadn_tillz_u16(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u16
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu16, .data = {.vu16=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u16 *seq_ptr = seq_arg.data.qu16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_u16(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_u16(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu16
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu16};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_u16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u16 *seq_ptr = seq_arg.data.qu16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_u16(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_u16(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu16
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu16, simd_data_qu16)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 0

/****************************
 * Lookup tables
 ****************************/
#if 16 == 32
SIMD_IMPL_INTRIN_2(lut32_u16, vu16, qu16, vu16)
#endif
#if 16 == 64
SIMD_IMPL_INTRIN_2(lut16_u16, vu16, qu16, vu16)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_u16, vu16)
SIMD_IMPL_INTRIN_1(setall_u16, vu16, u16)
SIMD_IMPL_INTRIN_3(select_u16, vu16, vb16, vu16, vu16)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_u16, vu8, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_u16, vs8, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_u16, vu16, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_u16, vs16, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_u16, vu32, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_u16, vs32, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_u16, vu64, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_u16, vs64, vu16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_u16, vf32, vu16)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_u16, vf64, vu16)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_u16 and npy_set_u16.
*/
#line 268
static PyObject *
simd__intrin_setf_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u16 *data = simd_sequence_from_iterable(args, simd_data_qu16, npyv_nlanes_u16);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu16 = npyv_setf_u16(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu16);
}

#line 268
static PyObject *
simd__intrin_set_u16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u16 *data = simd_sequence_from_iterable(args, simd_data_qu16, npyv_nlanes_u16);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu16 = npyv_set_u16(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu16);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_u16, vu16, vu16, vu16)

#line 297
SIMD_IMPL_INTRIN_2(combineh_u16, vu16, vu16, vu16)


#line 303
SIMD_IMPL_INTRIN_2(combine_u16, vu16x2, vu16, vu16)

#line 303
SIMD_IMPL_INTRIN_2(zip_u16, vu16x2, vu16, vu16)


#if 1
SIMD_IMPL_INTRIN_1(rev64_u16, vu16, vu16)
#endif

/***************************
 * Operators
 ***************************/
#if 15 > 0
SIMD_IMPL_INTRIN_2(shl_u16, vu16, vu16, u8)
SIMD_IMPL_INTRIN_2(shr_u16, vu16, vu16, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_u16, vu16, vu16, 15)
SIMD_IMPL_INTRIN_2IMM(shri_u16, vu16, vu16, 16)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_u16, vu16, vu16, vu16)

#line 324
SIMD_IMPL_INTRIN_2(or_u16, vu16, vu16, vu16)

#line 324
SIMD_IMPL_INTRIN_2(xor_u16, vu16, vu16, vu16)


SIMD_IMPL_INTRIN_1(not_u16, vu16, vu16)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_u16, vb16, vu16, vu16)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_u16, vb16, vu16, vu16)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_u16, vb16, vu16, vu16)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_u16, vb16, vu16, vu16)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_u16, vb16, vu16, vu16)

#line 332
SIMD_IMPL_INTRIN_2(cmple_u16, vb16, vu16, vu16)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_u16_b16, vu16,  vb16)
SIMD_IMPL_INTRIN_1(cvt_b16_u16, vb16, vu16)
#if 1
SIMD_IMPL_INTRIN_1(expand_u32_u16, vu32x2, vu16)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_u16, vu16, vu16, vu16)

#line 349
SIMD_IMPL_INTRIN_2(sub_u16, vu16, vu16, vu16)


#if 1
#line 356
SIMD_IMPL_INTRIN_2(adds_u16, vu16, vu16, vu16)

#line 356
SIMD_IMPL_INTRIN_2(subs_u16, vu16, vu16, vu16)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_u16, vu16, vu16, vu16)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_u16, vu16, vu16, vu16)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_u16, vu16x3, u16)
SIMD_IMPL_INTRIN_2(divc_u16, vu16, vu16, vu16x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_u16, vu16, vu16, vu16, vu16)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_u16, vu16, vu16, vu16, vu16)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_u16, vu16, vu16, vu16, vu16)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_u16, vu16, vu16, vu16, vu16)

#endif // fused_sup

#if 0
SIMD_IMPL_INTRIN_1(sum_u16, u16, vu16)
#endif // sum_sup

#if 1
SIMD_IMPL_INTRIN_1(sumup_u16, u32, vu16)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(recip_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(abs_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(square_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(rint_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(ceil_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(trunc_u16, vu16, vu16)

#line 396
SIMD_IMPL_INTRIN_1(floor_u16, vu16, vu16)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_u16, vu16, vu16, vu16)

#line 403
SIMD_IMPL_INTRIN_2(min_u16, vu16, vu16, vu16)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_u16, vu16, vu16, vu16)

#line 410
SIMD_IMPL_INTRIN_2(minp_u16, vu16, vu16, vu16)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_u16, vu16, vb16, vu16, vu16, vu16)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_u16, vu16, vb16, vu16, vu16, vu16)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_s16, vs16, qs16)

#line 42
SIMD_IMPL_INTRIN_1(loada_s16, vs16, qs16)

#line 42
SIMD_IMPL_INTRIN_1(loads_s16, vs16, qs16)

#line 42
SIMD_IMPL_INTRIN_1(loadl_s16, vs16, qs16)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_s16(seq_arg.data.qs16, vec_arg.data.vs16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_s16(seq_arg.data.qs16, vec_arg.data.vs16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_s16(seq_arg.data.qs16, vec_arg.data.vs16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_s16(seq_arg.data.qs16, vec_arg.data.vs16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_s16(seq_arg.data.qs16, vec_arg.data.vs16);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_s16, vs16, qs16, u32, s16)
SIMD_IMPL_INTRIN_2(load_tillz_s16, vs16, qs16, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_s16(
        seq_arg.data.qs16, nlane_arg.data.u32, vec_arg.data.vs16
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s16};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s16 *seq_ptr = seq_arg.data.qs16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_s16(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s16 rvec = npyv_loadn_s16(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s16
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs16, .data = {.vs16=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_s16};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s16 *seq_ptr = seq_arg.data.qs16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_s16(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s16 rvec = npyv_loadn_till_s16(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.s16
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs16, .data = {.vs16=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s16};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s16 *seq_ptr = seq_arg.data.qs16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_s16(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s16 rvec = npyv_loadn_tillz_s16(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s16
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs16, .data = {.vs16=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s16 *seq_ptr = seq_arg.data.qs16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_s16(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_s16(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs16
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs16};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs16};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_s16",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s16 *seq_ptr = seq_arg.data.qs16;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s16;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_s16(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_s16(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs16
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs16, simd_data_qs16)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 0

/****************************
 * Lookup tables
 ****************************/
#if 16 == 32
SIMD_IMPL_INTRIN_2(lut32_s16, vs16, qs16, vu16)
#endif
#if 16 == 64
SIMD_IMPL_INTRIN_2(lut16_s16, vs16, qs16, vu16)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_s16, vs16)
SIMD_IMPL_INTRIN_1(setall_s16, vs16, s16)
SIMD_IMPL_INTRIN_3(select_s16, vs16, vb16, vs16, vs16)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_s16, vu8, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_s16, vs8, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_s16, vu16, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_s16, vs16, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_s16, vu32, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_s16, vs32, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_s16, vu64, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_s16, vs64, vs16)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_s16, vf32, vs16)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_s16, vf64, vs16)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_s16 and npy_set_s16.
*/
#line 268
static PyObject *
simd__intrin_setf_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s16 *data = simd_sequence_from_iterable(args, simd_data_qs16, npyv_nlanes_s16);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs16 = npyv_setf_s16(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs16);
}

#line 268
static PyObject *
simd__intrin_set_s16(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s16 *data = simd_sequence_from_iterable(args, simd_data_qs16, npyv_nlanes_s16);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs16 = npyv_set_s16(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs16);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_s16, vs16, vs16, vs16)

#line 297
SIMD_IMPL_INTRIN_2(combineh_s16, vs16, vs16, vs16)


#line 303
SIMD_IMPL_INTRIN_2(combine_s16, vs16x2, vs16, vs16)

#line 303
SIMD_IMPL_INTRIN_2(zip_s16, vs16x2, vs16, vs16)


#if 1
SIMD_IMPL_INTRIN_1(rev64_s16, vs16, vs16)
#endif

/***************************
 * Operators
 ***************************/
#if 15 > 0
SIMD_IMPL_INTRIN_2(shl_s16, vs16, vs16, u8)
SIMD_IMPL_INTRIN_2(shr_s16, vs16, vs16, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_s16, vs16, vs16, 15)
SIMD_IMPL_INTRIN_2IMM(shri_s16, vs16, vs16, 16)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_s16, vs16, vs16, vs16)

#line 324
SIMD_IMPL_INTRIN_2(or_s16, vs16, vs16, vs16)

#line 324
SIMD_IMPL_INTRIN_2(xor_s16, vs16, vs16, vs16)


SIMD_IMPL_INTRIN_1(not_s16, vs16, vs16)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_s16, vb16, vs16, vs16)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_s16, vb16, vs16, vs16)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_s16, vb16, vs16, vs16)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_s16, vb16, vs16, vs16)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_s16, vb16, vs16, vs16)

#line 332
SIMD_IMPL_INTRIN_2(cmple_s16, vb16, vs16, vs16)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_s16_b16, vs16,  vb16)
SIMD_IMPL_INTRIN_1(cvt_b16_s16, vb16, vs16)
#if 0
SIMD_IMPL_INTRIN_1(expand_s16_s16, vs16x2, vs16)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_s16, vs16, vs16, vs16)

#line 349
SIMD_IMPL_INTRIN_2(sub_s16, vs16, vs16, vs16)


#if 1
#line 356
SIMD_IMPL_INTRIN_2(adds_s16, vs16, vs16, vs16)

#line 356
SIMD_IMPL_INTRIN_2(subs_s16, vs16, vs16, vs16)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_s16, vs16, vs16, vs16)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_s16, vs16, vs16, vs16)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_s16, vs16x3, s16)
SIMD_IMPL_INTRIN_2(divc_s16, vs16, vs16, vs16x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_s16, vs16, vs16, vs16, vs16)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_s16, vs16, vs16, vs16, vs16)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_s16, vs16, vs16, vs16, vs16)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_s16, vs16, vs16, vs16, vs16)

#endif // fused_sup

#if 0
SIMD_IMPL_INTRIN_1(sum_s16, s16, vs16)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_s16, s16, vs16)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(recip_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(abs_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(square_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(rint_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(ceil_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(trunc_s16, vs16, vs16)

#line 396
SIMD_IMPL_INTRIN_1(floor_s16, vs16, vs16)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_s16, vs16, vs16, vs16)

#line 403
SIMD_IMPL_INTRIN_2(min_s16, vs16, vs16, vs16)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_s16, vs16, vs16, vs16)

#line 410
SIMD_IMPL_INTRIN_2(minp_s16, vs16, vs16, vs16)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_s16, vs16, vb16, vs16, vs16, vs16)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_s16, vs16, vb16, vs16, vs16, vs16)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_u32, vu32, qu32)

#line 42
SIMD_IMPL_INTRIN_1(loada_u32, vu32, qu32)

#line 42
SIMD_IMPL_INTRIN_1(loads_u32, vu32, qu32)

#line 42
SIMD_IMPL_INTRIN_1(loadl_u32, vu32, qu32)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_u32(seq_arg.data.qu32, vec_arg.data.vu32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_u32(seq_arg.data.qu32, vec_arg.data.vu32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_u32(seq_arg.data.qu32, vec_arg.data.vu32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_u32(seq_arg.data.qu32, vec_arg.data.vu32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_u32(seq_arg.data.qu32, vec_arg.data.vu32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_u32, vu32, qu32, u32, u32)
SIMD_IMPL_INTRIN_2(load_tillz_u32, vu32, qu32, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_u32(
        seq_arg.data.qu32, nlane_arg.data.u32, vec_arg.data.vu32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u32 *seq_ptr = seq_arg.data.qu32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_u32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u32 rvec = npyv_loadn_u32(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu32, .data = {.vu32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u32 *seq_ptr = seq_arg.data.qu32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_u32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u32 rvec = npyv_loadn_till_u32(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.u32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu32, .data = {.vu32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u32 *seq_ptr = seq_arg.data.qu32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_u32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u32 rvec = npyv_loadn_tillz_u32(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu32, .data = {.vu32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u32 *seq_ptr = seq_arg.data.qu32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_u32(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_u32(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu32};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_u32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u32 *seq_ptr = seq_arg.data.qu32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_u32(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_u32(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu32, simd_data_qu32)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 1

/****************************
 * Lookup tables
 ****************************/
#if 32 == 32
SIMD_IMPL_INTRIN_2(lut32_u32, vu32, qu32, vu32)
#endif
#if 32 == 64
SIMD_IMPL_INTRIN_2(lut16_u32, vu32, qu32, vu32)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_u32, vu32)
SIMD_IMPL_INTRIN_1(setall_u32, vu32, u32)
SIMD_IMPL_INTRIN_3(select_u32, vu32, vb32, vu32, vu32)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_u32, vu8, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_u32, vs8, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_u32, vu16, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_u32, vs16, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_u32, vu32, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_u32, vs32, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_u32, vu64, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_u32, vs64, vu32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_u32, vf32, vu32)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_u32, vf64, vu32)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_u32 and npy_set_u32.
*/
#line 268
static PyObject *
simd__intrin_setf_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u32 *data = simd_sequence_from_iterable(args, simd_data_qu32, npyv_nlanes_u32);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu32 = npyv_setf_u32(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu32);
}

#line 268
static PyObject *
simd__intrin_set_u32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u32 *data = simd_sequence_from_iterable(args, simd_data_qu32, npyv_nlanes_u32);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu32 = npyv_set_u32(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu32);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_u32, vu32, vu32, vu32)

#line 297
SIMD_IMPL_INTRIN_2(combineh_u32, vu32, vu32, vu32)


#line 303
SIMD_IMPL_INTRIN_2(combine_u32, vu32x2, vu32, vu32)

#line 303
SIMD_IMPL_INTRIN_2(zip_u32, vu32x2, vu32, vu32)


#if 1
SIMD_IMPL_INTRIN_1(rev64_u32, vu32, vu32)
#endif

/***************************
 * Operators
 ***************************/
#if 31 > 0
SIMD_IMPL_INTRIN_2(shl_u32, vu32, vu32, u8)
SIMD_IMPL_INTRIN_2(shr_u32, vu32, vu32, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_u32, vu32, vu32, 31)
SIMD_IMPL_INTRIN_2IMM(shri_u32, vu32, vu32, 32)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_u32, vu32, vu32, vu32)

#line 324
SIMD_IMPL_INTRIN_2(or_u32, vu32, vu32, vu32)

#line 324
SIMD_IMPL_INTRIN_2(xor_u32, vu32, vu32, vu32)


SIMD_IMPL_INTRIN_1(not_u32, vu32, vu32)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_u32, vb32, vu32, vu32)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_u32, vb32, vu32, vu32)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_u32, vb32, vu32, vu32)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_u32, vb32, vu32, vu32)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_u32, vb32, vu32, vu32)

#line 332
SIMD_IMPL_INTRIN_2(cmple_u32, vb32, vu32, vu32)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_u32_b32, vu32,  vb32)
SIMD_IMPL_INTRIN_1(cvt_b32_u32, vb32, vu32)
#if 0
SIMD_IMPL_INTRIN_1(expand_u32_u32, vu32x2, vu32)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_u32, vu32, vu32, vu32)

#line 349
SIMD_IMPL_INTRIN_2(sub_u32, vu32, vu32, vu32)


#if 0
#line 356
SIMD_IMPL_INTRIN_2(adds_u32, vu32, vu32, vu32)

#line 356
SIMD_IMPL_INTRIN_2(subs_u32, vu32, vu32, vu32)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_u32, vu32, vu32, vu32)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_u32, vu32, vu32, vu32)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_u32, vu32x3, u32)
SIMD_IMPL_INTRIN_2(divc_u32, vu32, vu32, vu32x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_u32, vu32, vu32, vu32, vu32)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_u32, vu32, vu32, vu32, vu32)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_u32, vu32, vu32, vu32, vu32)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_u32, vu32, vu32, vu32, vu32)

#endif // fused_sup

#if 1
SIMD_IMPL_INTRIN_1(sum_u32, u32, vu32)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_u32, u32, vu32)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(recip_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(abs_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(square_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(rint_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(ceil_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(trunc_u32, vu32, vu32)

#line 396
SIMD_IMPL_INTRIN_1(floor_u32, vu32, vu32)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_u32, vu32, vu32, vu32)

#line 403
SIMD_IMPL_INTRIN_2(min_u32, vu32, vu32, vu32)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_u32, vu32, vu32, vu32)

#line 410
SIMD_IMPL_INTRIN_2(minp_u32, vu32, vu32, vu32)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_u32, vu32, vb32, vu32, vu32, vu32)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_u32, vu32, vb32, vu32, vu32, vu32)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_s32, vs32, qs32)

#line 42
SIMD_IMPL_INTRIN_1(loada_s32, vs32, qs32)

#line 42
SIMD_IMPL_INTRIN_1(loads_s32, vs32, qs32)

#line 42
SIMD_IMPL_INTRIN_1(loadl_s32, vs32, qs32)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_s32(seq_arg.data.qs32, vec_arg.data.vs32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_s32(seq_arg.data.qs32, vec_arg.data.vs32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_s32(seq_arg.data.qs32, vec_arg.data.vs32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_s32(seq_arg.data.qs32, vec_arg.data.vs32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_s32(seq_arg.data.qs32, vec_arg.data.vs32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_s32, vs32, qs32, u32, s32)
SIMD_IMPL_INTRIN_2(load_tillz_s32, vs32, qs32, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_s32(
        seq_arg.data.qs32, nlane_arg.data.u32, vec_arg.data.vs32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s32 *seq_ptr = seq_arg.data.qs32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_s32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s32 rvec = npyv_loadn_s32(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs32, .data = {.vs32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_s32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s32 *seq_ptr = seq_arg.data.qs32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_s32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s32 rvec = npyv_loadn_till_s32(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.s32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs32, .data = {.vs32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s32 *seq_ptr = seq_arg.data.qs32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_s32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s32 rvec = npyv_loadn_tillz_s32(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs32, .data = {.vs32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s32 *seq_ptr = seq_arg.data.qs32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_s32(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_s32(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs32};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_s32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s32 *seq_ptr = seq_arg.data.qs32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_s32(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_s32(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs32, simd_data_qs32)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 1

/****************************
 * Lookup tables
 ****************************/
#if 32 == 32
SIMD_IMPL_INTRIN_2(lut32_s32, vs32, qs32, vu32)
#endif
#if 32 == 64
SIMD_IMPL_INTRIN_2(lut16_s32, vs32, qs32, vu32)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_s32, vs32)
SIMD_IMPL_INTRIN_1(setall_s32, vs32, s32)
SIMD_IMPL_INTRIN_3(select_s32, vs32, vb32, vs32, vs32)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_s32, vu8, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_s32, vs8, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_s32, vu16, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_s32, vs16, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_s32, vu32, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_s32, vs32, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_s32, vu64, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_s32, vs64, vs32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_s32, vf32, vs32)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_s32, vf64, vs32)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_s32 and npy_set_s32.
*/
#line 268
static PyObject *
simd__intrin_setf_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s32 *data = simd_sequence_from_iterable(args, simd_data_qs32, npyv_nlanes_s32);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs32 = npyv_setf_s32(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs32);
}

#line 268
static PyObject *
simd__intrin_set_s32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s32 *data = simd_sequence_from_iterable(args, simd_data_qs32, npyv_nlanes_s32);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs32 = npyv_set_s32(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs32);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_s32, vs32, vs32, vs32)

#line 297
SIMD_IMPL_INTRIN_2(combineh_s32, vs32, vs32, vs32)


#line 303
SIMD_IMPL_INTRIN_2(combine_s32, vs32x2, vs32, vs32)

#line 303
SIMD_IMPL_INTRIN_2(zip_s32, vs32x2, vs32, vs32)


#if 1
SIMD_IMPL_INTRIN_1(rev64_s32, vs32, vs32)
#endif

/***************************
 * Operators
 ***************************/
#if 31 > 0
SIMD_IMPL_INTRIN_2(shl_s32, vs32, vs32, u8)
SIMD_IMPL_INTRIN_2(shr_s32, vs32, vs32, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_s32, vs32, vs32, 31)
SIMD_IMPL_INTRIN_2IMM(shri_s32, vs32, vs32, 32)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_s32, vs32, vs32, vs32)

#line 324
SIMD_IMPL_INTRIN_2(or_s32, vs32, vs32, vs32)

#line 324
SIMD_IMPL_INTRIN_2(xor_s32, vs32, vs32, vs32)


SIMD_IMPL_INTRIN_1(not_s32, vs32, vs32)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_s32, vb32, vs32, vs32)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_s32, vb32, vs32, vs32)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_s32, vb32, vs32, vs32)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_s32, vb32, vs32, vs32)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_s32, vb32, vs32, vs32)

#line 332
SIMD_IMPL_INTRIN_2(cmple_s32, vb32, vs32, vs32)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_s32_b32, vs32,  vb32)
SIMD_IMPL_INTRIN_1(cvt_b32_s32, vb32, vs32)
#if 0
SIMD_IMPL_INTRIN_1(expand_s32_s32, vs32x2, vs32)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_s32, vs32, vs32, vs32)

#line 349
SIMD_IMPL_INTRIN_2(sub_s32, vs32, vs32, vs32)


#if 0
#line 356
SIMD_IMPL_INTRIN_2(adds_s32, vs32, vs32, vs32)

#line 356
SIMD_IMPL_INTRIN_2(subs_s32, vs32, vs32, vs32)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_s32, vs32, vs32, vs32)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_s32, vs32, vs32, vs32)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_s32, vs32x3, s32)
SIMD_IMPL_INTRIN_2(divc_s32, vs32, vs32, vs32x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_s32, vs32, vs32, vs32, vs32)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_s32, vs32, vs32, vs32, vs32)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_s32, vs32, vs32, vs32, vs32)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_s32, vs32, vs32, vs32, vs32)

#endif // fused_sup

#if 0
SIMD_IMPL_INTRIN_1(sum_s32, s32, vs32)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_s32, s32, vs32)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(recip_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(abs_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(square_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(rint_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(ceil_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(trunc_s32, vs32, vs32)

#line 396
SIMD_IMPL_INTRIN_1(floor_s32, vs32, vs32)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_s32, vs32, vs32, vs32)

#line 403
SIMD_IMPL_INTRIN_2(min_s32, vs32, vs32, vs32)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_s32, vs32, vs32, vs32)

#line 410
SIMD_IMPL_INTRIN_2(minp_s32, vs32, vs32, vs32)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_s32, vs32, vb32, vs32, vs32, vs32)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_s32, vs32, vb32, vs32, vs32, vs32)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_u64, vu64, qu64)

#line 42
SIMD_IMPL_INTRIN_1(loada_u64, vu64, qu64)

#line 42
SIMD_IMPL_INTRIN_1(loads_u64, vu64, qu64)

#line 42
SIMD_IMPL_INTRIN_1(loadl_u64, vu64, qu64)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_u64(seq_arg.data.qu64, vec_arg.data.vu64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_u64(seq_arg.data.qu64, vec_arg.data.vu64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_u64(seq_arg.data.qu64, vec_arg.data.vu64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_u64(seq_arg.data.qu64, vec_arg.data.vu64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_u64(seq_arg.data.qu64, vec_arg.data.vu64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_u64, vu64, qu64, u32, u64)
SIMD_IMPL_INTRIN_2(load_tillz_u64, vu64, qu64, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_u64(
        seq_arg.data.qu64, nlane_arg.data.u32, vec_arg.data.vu64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u64 *seq_ptr = seq_arg.data.qu64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_u64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u64 rvec = npyv_loadn_u64(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu64, .data = {.vu64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_u64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u64 *seq_ptr = seq_arg.data.qu64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_u64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u64 rvec = npyv_loadn_till_u64(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.u64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu64, .data = {.vu64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_u64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_u64 *seq_ptr = seq_arg.data.qu64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_u64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_u64 rvec = npyv_loadn_tillz_u64(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.u64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vu64, .data = {.vu64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u64 *seq_ptr = seq_arg.data.qu64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_u64(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_u64(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qu64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vu64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_u64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_u64 *seq_ptr = seq_arg.data.qu64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_u64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_u64(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_u64(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vu64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qu64, simd_data_qu64)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 1

/****************************
 * Lookup tables
 ****************************/
#if 64 == 32
SIMD_IMPL_INTRIN_2(lut32_u64, vu64, qu64, vu64)
#endif
#if 64 == 64
SIMD_IMPL_INTRIN_2(lut16_u64, vu64, qu64, vu64)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_u64, vu64)
SIMD_IMPL_INTRIN_1(setall_u64, vu64, u64)
SIMD_IMPL_INTRIN_3(select_u64, vu64, vb64, vu64, vu64)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_u64, vu8, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_u64, vs8, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_u64, vu16, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_u64, vs16, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_u64, vu32, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_u64, vs32, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_u64, vu64, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_u64, vs64, vu64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_u64, vf32, vu64)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_u64, vf64, vu64)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_u64 and npy_set_u64.
*/
#line 268
static PyObject *
simd__intrin_setf_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u64 *data = simd_sequence_from_iterable(args, simd_data_qu64, npyv_nlanes_u64);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu64 = npyv_setf_u64(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu64);
}

#line 268
static PyObject *
simd__intrin_set_u64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_u64 *data = simd_sequence_from_iterable(args, simd_data_qu64, npyv_nlanes_u64);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vu64 = npyv_set_u64(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vu64);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_u64, vu64, vu64, vu64)

#line 297
SIMD_IMPL_INTRIN_2(combineh_u64, vu64, vu64, vu64)


#line 303
SIMD_IMPL_INTRIN_2(combine_u64, vu64x2, vu64, vu64)

#line 303
SIMD_IMPL_INTRIN_2(zip_u64, vu64x2, vu64, vu64)


#if 0
SIMD_IMPL_INTRIN_1(rev64_u64, vu64, vu64)
#endif

/***************************
 * Operators
 ***************************/
#if 63 > 0
SIMD_IMPL_INTRIN_2(shl_u64, vu64, vu64, u8)
SIMD_IMPL_INTRIN_2(shr_u64, vu64, vu64, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_u64, vu64, vu64, 63)
SIMD_IMPL_INTRIN_2IMM(shri_u64, vu64, vu64, 64)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_u64, vu64, vu64, vu64)

#line 324
SIMD_IMPL_INTRIN_2(or_u64, vu64, vu64, vu64)

#line 324
SIMD_IMPL_INTRIN_2(xor_u64, vu64, vu64, vu64)


SIMD_IMPL_INTRIN_1(not_u64, vu64, vu64)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_u64, vb64, vu64, vu64)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_u64, vb64, vu64, vu64)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_u64, vb64, vu64, vu64)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_u64, vb64, vu64, vu64)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_u64, vb64, vu64, vu64)

#line 332
SIMD_IMPL_INTRIN_2(cmple_u64, vb64, vu64, vu64)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_u64_b64, vu64,  vb64)
SIMD_IMPL_INTRIN_1(cvt_b64_u64, vb64, vu64)
#if 0
SIMD_IMPL_INTRIN_1(expand_u64_u64, vu64x2, vu64)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_u64, vu64, vu64, vu64)

#line 349
SIMD_IMPL_INTRIN_2(sub_u64, vu64, vu64, vu64)


#if 0
#line 356
SIMD_IMPL_INTRIN_2(adds_u64, vu64, vu64, vu64)

#line 356
SIMD_IMPL_INTRIN_2(subs_u64, vu64, vu64, vu64)

#endif // sat_sup

#if 0
SIMD_IMPL_INTRIN_2(mul_u64, vu64, vu64, vu64)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_u64, vu64, vu64, vu64)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_u64, vu64x3, u64)
SIMD_IMPL_INTRIN_2(divc_u64, vu64, vu64, vu64x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_u64, vu64, vu64, vu64, vu64)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_u64, vu64, vu64, vu64, vu64)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_u64, vu64, vu64, vu64, vu64)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_u64, vu64, vu64, vu64, vu64)

#endif // fused_sup

#if 1
SIMD_IMPL_INTRIN_1(sum_u64, u64, vu64)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_u64, u64, vu64)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(recip_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(abs_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(square_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(rint_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(ceil_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(trunc_u64, vu64, vu64)

#line 396
SIMD_IMPL_INTRIN_1(floor_u64, vu64, vu64)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_u64, vu64, vu64, vu64)

#line 403
SIMD_IMPL_INTRIN_2(min_u64, vu64, vu64, vu64)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_u64, vu64, vu64, vu64)

#line 410
SIMD_IMPL_INTRIN_2(minp_u64, vu64, vu64, vu64)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_u64, vu64, vb64, vu64, vu64, vu64)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_u64, vu64, vb64, vu64, vu64, vu64)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_s64, vs64, qs64)

#line 42
SIMD_IMPL_INTRIN_1(loada_s64, vs64, qs64)

#line 42
SIMD_IMPL_INTRIN_1(loads_s64, vs64, qs64)

#line 42
SIMD_IMPL_INTRIN_1(loadl_s64, vs64, qs64)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_s64(seq_arg.data.qs64, vec_arg.data.vs64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_s64(seq_arg.data.qs64, vec_arg.data.vs64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_s64(seq_arg.data.qs64, vec_arg.data.vs64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_s64(seq_arg.data.qs64, vec_arg.data.vs64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_s64(seq_arg.data.qs64, vec_arg.data.vs64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_s64, vs64, qs64, u32, s64)
SIMD_IMPL_INTRIN_2(load_tillz_s64, vs64, qs64, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_s64(
        seq_arg.data.qs64, nlane_arg.data.u32, vec_arg.data.vs64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s64 *seq_ptr = seq_arg.data.qs64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_s64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s64 rvec = npyv_loadn_s64(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs64, .data = {.vs64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_s64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s64 *seq_ptr = seq_arg.data.qs64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_s64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s64 rvec = npyv_loadn_till_s64(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.s64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs64, .data = {.vs64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_s64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_s64 *seq_ptr = seq_arg.data.qs64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_s64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_s64 rvec = npyv_loadn_tillz_s64(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.s64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vs64, .data = {.vs64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s64 *seq_ptr = seq_arg.data.qs64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_s64(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_s64(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qs64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vs64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_s64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_s64 *seq_ptr = seq_arg.data.qs64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_s64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_s64(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_s64(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vs64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qs64, simd_data_qs64)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 1

/****************************
 * Lookup tables
 ****************************/
#if 64 == 32
SIMD_IMPL_INTRIN_2(lut32_s64, vs64, qs64, vu64)
#endif
#if 64 == 64
SIMD_IMPL_INTRIN_2(lut16_s64, vs64, qs64, vu64)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_s64, vs64)
SIMD_IMPL_INTRIN_1(setall_s64, vs64, s64)
SIMD_IMPL_INTRIN_3(select_s64, vs64, vb64, vs64, vs64)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_s64, vu8, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_s64, vs8, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_s64, vu16, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_s64, vs16, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_s64, vu32, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_s64, vs32, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_s64, vu64, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_s64, vs64, vs64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_s64, vf32, vs64)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_s64, vf64, vs64)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_s64 and npy_set_s64.
*/
#line 268
static PyObject *
simd__intrin_setf_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s64 *data = simd_sequence_from_iterable(args, simd_data_qs64, npyv_nlanes_s64);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs64 = npyv_setf_s64(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs64);
}

#line 268
static PyObject *
simd__intrin_set_s64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_s64 *data = simd_sequence_from_iterable(args, simd_data_qs64, npyv_nlanes_s64);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vs64 = npyv_set_s64(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vs64);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_s64, vs64, vs64, vs64)

#line 297
SIMD_IMPL_INTRIN_2(combineh_s64, vs64, vs64, vs64)


#line 303
SIMD_IMPL_INTRIN_2(combine_s64, vs64x2, vs64, vs64)

#line 303
SIMD_IMPL_INTRIN_2(zip_s64, vs64x2, vs64, vs64)


#if 0
SIMD_IMPL_INTRIN_1(rev64_s64, vs64, vs64)
#endif

/***************************
 * Operators
 ***************************/
#if 63 > 0
SIMD_IMPL_INTRIN_2(shl_s64, vs64, vs64, u8)
SIMD_IMPL_INTRIN_2(shr_s64, vs64, vs64, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_s64, vs64, vs64, 63)
SIMD_IMPL_INTRIN_2IMM(shri_s64, vs64, vs64, 64)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_s64, vs64, vs64, vs64)

#line 324
SIMD_IMPL_INTRIN_2(or_s64, vs64, vs64, vs64)

#line 324
SIMD_IMPL_INTRIN_2(xor_s64, vs64, vs64, vs64)


SIMD_IMPL_INTRIN_1(not_s64, vs64, vs64)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_s64, vb64, vs64, vs64)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_s64, vb64, vs64, vs64)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_s64, vb64, vs64, vs64)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_s64, vb64, vs64, vs64)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_s64, vb64, vs64, vs64)

#line 332
SIMD_IMPL_INTRIN_2(cmple_s64, vb64, vs64, vs64)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_s64_b64, vs64,  vb64)
SIMD_IMPL_INTRIN_1(cvt_b64_s64, vb64, vs64)
#if 0
SIMD_IMPL_INTRIN_1(expand_s64_s64, vs64x2, vs64)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_s64, vs64, vs64, vs64)

#line 349
SIMD_IMPL_INTRIN_2(sub_s64, vs64, vs64, vs64)


#if 0
#line 356
SIMD_IMPL_INTRIN_2(adds_s64, vs64, vs64, vs64)

#line 356
SIMD_IMPL_INTRIN_2(subs_s64, vs64, vs64, vs64)

#endif // sat_sup

#if 0
SIMD_IMPL_INTRIN_2(mul_s64, vs64, vs64, vs64)
#endif // mul_sup

#if 0
SIMD_IMPL_INTRIN_2(div_s64, vs64, vs64, vs64)
#endif // div_sup

#if 1
SIMD_IMPL_INTRIN_1(divisor_s64, vs64x3, s64)
SIMD_IMPL_INTRIN_2(divc_s64, vs64, vs64, vs64x3)
#endif // intdiv_sup

#if 0
#line 377
SIMD_IMPL_INTRIN_3(muladd_s64, vs64, vs64, vs64, vs64)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_s64, vs64, vs64, vs64, vs64)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_s64, vs64, vs64, vs64, vs64)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_s64, vs64, vs64, vs64, vs64)

#endif // fused_sup

#if 0
SIMD_IMPL_INTRIN_1(sum_s64, s64, vs64)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_s64, s64, vs64)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 0
#line 396
SIMD_IMPL_INTRIN_1(sqrt_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(recip_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(abs_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(square_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(rint_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(ceil_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(trunc_s64, vs64, vs64)

#line 396
SIMD_IMPL_INTRIN_1(floor_s64, vs64, vs64)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_s64, vs64, vs64, vs64)

#line 403
SIMD_IMPL_INTRIN_2(min_s64, vs64, vs64, vs64)


#if 0
#line 410
SIMD_IMPL_INTRIN_2(maxp_s64, vs64, vs64, vs64)

#line 410
SIMD_IMPL_INTRIN_2(minp_s64, vs64, vs64, vs64)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_s64, vs64, vb64, vs64, vs64, vs64)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_s64, vs64, vb64, vs64, vs64, vs64)


#endif // simd_sup

#line 35
#if 1
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_f32, vf32, qf32)

#line 42
SIMD_IMPL_INTRIN_1(loada_f32, vf32, qf32)

#line 42
SIMD_IMPL_INTRIN_1(loads_f32, vf32, qf32)

#line 42
SIMD_IMPL_INTRIN_1(loadl_f32, vf32, qf32)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_f32(seq_arg.data.qf32, vec_arg.data.vf32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_f32(seq_arg.data.qf32, vec_arg.data.vf32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_f32(seq_arg.data.qf32, vec_arg.data.vf32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_f32(seq_arg.data.qf32, vec_arg.data.vf32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_f32(seq_arg.data.qf32, vec_arg.data.vf32);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_f32, vf32, qf32, u32, f32)
SIMD_IMPL_INTRIN_2(load_tillz_f32, vf32, qf32, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_f32(
        seq_arg.data.qf32, nlane_arg.data.u32, vec_arg.data.vf32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_f32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_f32 *seq_ptr = seq_arg.data.qf32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_f32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_f32 rvec = npyv_loadn_f32(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.f32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vf32, .data = {.vf32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_f32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_f32 *seq_ptr = seq_arg.data.qf32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_f32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_f32 rvec = npyv_loadn_till_f32(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.f32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vf32, .data = {.vf32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_f32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_f32 *seq_ptr = seq_arg.data.qf32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_f32(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_f32 rvec = npyv_loadn_tillz_f32(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.f32
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vf32, .data = {.vf32=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_f32 *seq_ptr = seq_arg.data.qf32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_f32(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_f32(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vf32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf32};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vf32};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_f32",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_f32 *seq_ptr = seq_arg.data.qf32;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f32;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_f32(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_f32(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vf32
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf32, simd_data_qf32)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 1

/****************************
 * Lookup tables
 ****************************/
#if 32 == 32
SIMD_IMPL_INTRIN_2(lut32_f32, vf32, qf32, vu32)
#endif
#if 32 == 64
SIMD_IMPL_INTRIN_2(lut16_f32, vf32, qf32, vu32)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_f32, vf32)
SIMD_IMPL_INTRIN_1(setall_f32, vf32, f32)
SIMD_IMPL_INTRIN_3(select_f32, vf32, vb32, vf32, vf32)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_f32, vu8, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_f32, vs8, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_f32, vu16, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_f32, vs16, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_f32, vu32, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_f32, vs32, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_f32, vu64, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_f32, vs64, vf32)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_f32, vf32, vf32)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_f32, vf64, vf32)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_f32 and npy_set_f32.
*/
#line 268
static PyObject *
simd__intrin_setf_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_f32 *data = simd_sequence_from_iterable(args, simd_data_qf32, npyv_nlanes_f32);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vf32 = npyv_setf_f32(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vf32);
}

#line 268
static PyObject *
simd__intrin_set_f32(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_f32 *data = simd_sequence_from_iterable(args, simd_data_qf32, npyv_nlanes_f32);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vf32 = npyv_set_f32(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vf32);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_f32, vf32, vf32, vf32)

#line 297
SIMD_IMPL_INTRIN_2(combineh_f32, vf32, vf32, vf32)


#line 303
SIMD_IMPL_INTRIN_2(combine_f32, vf32x2, vf32, vf32)

#line 303
SIMD_IMPL_INTRIN_2(zip_f32, vf32x2, vf32, vf32)


#if 1
SIMD_IMPL_INTRIN_1(rev64_f32, vf32, vf32)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
SIMD_IMPL_INTRIN_2(shl_f32, vf32, vf32, u8)
SIMD_IMPL_INTRIN_2(shr_f32, vf32, vf32, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_f32, vf32, vf32, 0)
SIMD_IMPL_INTRIN_2IMM(shri_f32, vf32, vf32, 0)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_f32, vf32, vf32, vf32)

#line 324
SIMD_IMPL_INTRIN_2(or_f32, vf32, vf32, vf32)

#line 324
SIMD_IMPL_INTRIN_2(xor_f32, vf32, vf32, vf32)


SIMD_IMPL_INTRIN_1(not_f32, vf32, vf32)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_f32, vb32, vf32, vf32)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_f32, vb32, vf32, vf32)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_f32, vb32, vf32, vf32)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_f32, vb32, vf32, vf32)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_f32, vb32, vf32, vf32)

#line 332
SIMD_IMPL_INTRIN_2(cmple_f32, vb32, vf32, vf32)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_f32_b32, vf32,  vb32)
SIMD_IMPL_INTRIN_1(cvt_b32_f32, vb32, vf32)
#if 0
SIMD_IMPL_INTRIN_1(expand_f32_f32, vf32x2, vf32)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_f32, vf32, vf32, vf32)

#line 349
SIMD_IMPL_INTRIN_2(sub_f32, vf32, vf32, vf32)


#if 0
#line 356
SIMD_IMPL_INTRIN_2(adds_f32, vf32, vf32, vf32)

#line 356
SIMD_IMPL_INTRIN_2(subs_f32, vf32, vf32, vf32)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_f32, vf32, vf32, vf32)
#endif // mul_sup

#if 1
SIMD_IMPL_INTRIN_2(div_f32, vf32, vf32, vf32)
#endif // div_sup

#if 0
SIMD_IMPL_INTRIN_1(divisor_f32, vf32x3, f32)
SIMD_IMPL_INTRIN_2(divc_f32, vf32, vf32, vf32x3)
#endif // intdiv_sup

#if 1
#line 377
SIMD_IMPL_INTRIN_3(muladd_f32, vf32, vf32, vf32, vf32)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_f32, vf32, vf32, vf32, vf32)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_f32, vf32, vf32, vf32, vf32)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_f32, vf32, vf32, vf32, vf32)

#endif // fused_sup

#if 1
SIMD_IMPL_INTRIN_1(sum_f32, f32, vf32)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_f32, f32, vf32)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 1
#line 396
SIMD_IMPL_INTRIN_1(sqrt_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(recip_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(abs_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(square_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(rint_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(ceil_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(trunc_f32, vf32, vf32)

#line 396
SIMD_IMPL_INTRIN_1(floor_f32, vf32, vf32)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_f32, vf32, vf32, vf32)

#line 403
SIMD_IMPL_INTRIN_2(min_f32, vf32, vf32, vf32)


#if 1
#line 410
SIMD_IMPL_INTRIN_2(maxp_f32, vf32, vf32, vf32)

#line 410
SIMD_IMPL_INTRIN_2(minp_f32, vf32, vf32, vf32)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_f32, vf32, vb32, vf32, vf32, vf32)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_f32, vf32, vb32, vf32, vf32, vf32)


#endif // simd_sup

#line 35
#if NPY_SIMD_F64
/***************************
 * Memory
 ***************************/
#line 42
SIMD_IMPL_INTRIN_1(load_f64, vf64, qf64)

#line 42
SIMD_IMPL_INTRIN_1(loada_f64, vf64, qf64)

#line 42
SIMD_IMPL_INTRIN_1(loads_f64, vf64, qf64)

#line 42
SIMD_IMPL_INTRIN_1(loadl_f64, vf64, qf64)

#line 47
// special definition due to the nature of store
static PyObject *
simd__intrin_store_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
    if (!PyArg_ParseTuple(
        args, "O&O&:store_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_f64(seq_arg.data.qf64, vec_arg.data.vf64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storea
static PyObject *
simd__intrin_storea_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storea_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storea_f64(seq_arg.data.qf64, vec_arg.data.vf64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of stores
static PyObject *
simd__intrin_stores_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
    if (!PyArg_ParseTuple(
        args, "O&O&:stores_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_stores_f64(seq_arg.data.qf64, vec_arg.data.vf64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storel
static PyObject *
simd__intrin_storel_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storel_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storel_f64(seq_arg.data.qf64, vec_arg.data.vf64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

#line 47
// special definition due to the nature of storeh
static PyObject *
simd__intrin_storeh_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
    if (!PyArg_ParseTuple(
        args, "O&O&:storeh_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_storeh_f64(seq_arg.data.qf64, vec_arg.data.vf64);
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
// Partial Load
SIMD_IMPL_INTRIN_3(load_till_f64, vf64, qf64, u32, f64)
SIMD_IMPL_INTRIN_2(load_tillz_f64, vf64, qf64, u32)

// Partial Store
static PyObject *
simd__intrin_store_till_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg nlane_arg = {.dtype = simd_data_u32};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
    if (!PyArg_ParseTuple(
        args, "O&O&O&:store_till_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &nlane_arg,
        simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_store_till_f64(
        seq_arg.data.qf64, nlane_arg.data.u32, vec_arg.data.vf64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        simd_arg_free(&seq_arg);
        return NULL;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
}

// Non-contiguous Load
#line 113
static PyObject *
simd__intrin_loadn_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_f64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&:loadn_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_f64 *seq_ptr = seq_arg.data.qf64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_f64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_f64 rvec = npyv_loadn_f64(
        seq_ptr, stride
    #if 0
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.f64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vf64, .data = {.vf64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_till_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 1
    simd_arg fill_arg = {.dtype = simd_data_f64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:loadn_till_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 1
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_f64 *seq_ptr = seq_arg.data.qf64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_till_f64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_f64 rvec = npyv_loadn_till_f64(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 1
        , fill_arg.data.f64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vf64, .data = {.vf64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 113
static PyObject *
simd__intrin_loadn_tillz_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif // till
#if 0
    simd_arg fill_arg = {.dtype = simd_data_f64};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:loadn_tillz_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
#if 0
        ,simd_arg_converter, &fill_arg
#endif
    )) {
        return NULL;
    }
    npyv_lanetype_f64 *seq_ptr = seq_arg.data.qf64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "loadn_tillz_f64(), according to provided stride %d, the "
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_f64 rvec = npyv_loadn_tillz_f64(
        seq_ptr, stride
    #if 1
        , nlane_arg.data.u32
    #endif
    #if 0
        , fill_arg.data.f64
    #endif
    );
    simd_arg ret = {
        .dtype = simd_data_vf64, .data = {.vf64=rvec}
    };
    simd_arg_free(&seq_arg);
    return simd_arg_to_obj(&ret);
err:
    simd_arg_free(&seq_arg);
    return NULL;
}


// Non-contiguous Store
#line 179
static PyObject *
simd__intrin_storen_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
#if 0
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&:storen_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 0
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_f64 *seq_ptr = seq_arg.data.qf64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_f64(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_f64(
        seq_ptr, stride
    #if 0
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vf64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#line 179
static PyObject *
simd__intrin_storen_till_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    simd_arg seq_arg = {.dtype = simd_data_qf64};
    simd_arg stride_arg = {.dtype = simd_data_s64};
    simd_arg vec_arg = {.dtype = simd_data_vf64};
#if 1
    simd_arg nlane_arg = {.dtype = simd_data_u32};
#endif
    if (!PyArg_ParseTuple(
        args, "O&O&O&O&:storen_f64",
        simd_arg_converter, &seq_arg,
        simd_arg_converter, &stride_arg
#if 1
        ,simd_arg_converter, &nlane_arg
#endif
        ,simd_arg_converter, &vec_arg
    )) {
        return NULL;
    }
    npyv_lanetype_f64 *seq_ptr = seq_arg.data.qf64;
    npy_intp stride = (npy_intp)stride_arg.data.s64;
    Py_ssize_t cur_seq_len = simd_sequence_len(seq_ptr);
    Py_ssize_t min_seq_len = stride * npyv_nlanes_f64;
    if (stride < 0) {
        seq_ptr += cur_seq_len -1;
        min_seq_len = -min_seq_len;
    }
    // overflow guard
    if (cur_seq_len < min_seq_len) {
        PyErr_Format(PyExc_ValueError,
            "storen_till_f64(), according to provided stride %d, the"
            "minimum acceptable size of the required sequence is %d, given(%d)",
            stride, min_seq_len, cur_seq_len
        );
        goto err;
    }
    npyv_storen_till_f64(
        seq_ptr, stride
    #if 1
        ,nlane_arg.data.u32
    #endif
        ,vec_arg.data.vf64
    );
    // write-back
    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.qf64, simd_data_qf64)) {
        goto err;
    }
    simd_arg_free(&seq_arg);
    Py_RETURN_NONE;
err:
    simd_arg_free(&seq_arg);
    return NULL;
}

#endif // 1

/****************************
 * Lookup tables
 ****************************/
#if 64 == 32
SIMD_IMPL_INTRIN_2(lut32_f64, vf64, qf64, vu64)
#endif
#if 64 == 64
SIMD_IMPL_INTRIN_2(lut16_f64, vf64, qf64, vu64)
#endif
/***************************
 * Misc
 ***************************/
SIMD_IMPL_INTRIN_0(zero_f64, vf64)
SIMD_IMPL_INTRIN_1(setall_f64, vf64, f64)
SIMD_IMPL_INTRIN_3(select_f64, vf64, vb64, vf64, vf64)

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u8_f64, vu8, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s8_f64, vs8, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u16_f64, vu16, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s16_f64, vs16, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u32_f64, vu32, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s32_f64, vs32, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_u64_f64, vu64, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_s64_f64, vs64, vf64)
#endif // simd_sup2

#line 256
#if 1
SIMD_IMPL_INTRIN_1(reinterpret_f32_f64, vf32, vf64)
#endif // simd_sup2

#line 256
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(reinterpret_f64_f64, vf64, vf64)
#endif // simd_sup2


/**
 * special definition due to the nature of intrinsics
 * npyv_setf_f64 and npy_set_f64.
*/
#line 268
static PyObject *
simd__intrin_setf_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_f64 *data = simd_sequence_from_iterable(args, simd_data_qf64, npyv_nlanes_f64);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vf64 = npyv_setf_f64(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vf64);
}

#line 268
static PyObject *
simd__intrin_set_f64(PyObject* NPY_UNUSED(self), PyObject *args)
{
    npyv_lanetype_f64 *data = simd_sequence_from_iterable(args, simd_data_qf64, npyv_nlanes_f64);
    if (data == NULL) {
        return NULL;
    }
    simd_data r = {.vf64 = npyv_set_f64(
        data[0],  data[1],  data[2],  data[3],  data[4],  data[5],  data[6],  data[7],
        data[8],  data[9],  data[10], data[11], data[12], data[13], data[14], data[15],
        data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
        data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
        data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39],
        data[40], data[41], data[42], data[43], data[44], data[45], data[46], data[47],
        data[48], data[49], data[50], data[51], data[52], data[53], data[54], data[55],
        data[56], data[57], data[58], data[59], data[60], data[61], data[62], data[63],
        data[64] // for setf
    )};
    simd_sequence_free(data);
    return (PyObject*)PySIMDVector_FromData(r, simd_data_vf64);
}


/***************************
 * Reorder
 ***************************/
#line 297
SIMD_IMPL_INTRIN_2(combinel_f64, vf64, vf64, vf64)

#line 297
SIMD_IMPL_INTRIN_2(combineh_f64, vf64, vf64, vf64)


#line 303
SIMD_IMPL_INTRIN_2(combine_f64, vf64x2, vf64, vf64)

#line 303
SIMD_IMPL_INTRIN_2(zip_f64, vf64x2, vf64, vf64)


#if 0
SIMD_IMPL_INTRIN_1(rev64_f64, vf64, vf64)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
SIMD_IMPL_INTRIN_2(shl_f64, vf64, vf64, u8)
SIMD_IMPL_INTRIN_2(shr_f64, vf64, vf64, u8)
// immediate constant
SIMD_IMPL_INTRIN_2IMM(shli_f64, vf64, vf64, 0)
SIMD_IMPL_INTRIN_2IMM(shri_f64, vf64, vf64, 0)
#endif // shl_imm

#line 324
SIMD_IMPL_INTRIN_2(and_f64, vf64, vf64, vf64)

#line 324
SIMD_IMPL_INTRIN_2(or_f64, vf64, vf64, vf64)

#line 324
SIMD_IMPL_INTRIN_2(xor_f64, vf64, vf64, vf64)


SIMD_IMPL_INTRIN_1(not_f64, vf64, vf64)

#line 332
SIMD_IMPL_INTRIN_2(cmpeq_f64, vb64, vf64, vf64)

#line 332
SIMD_IMPL_INTRIN_2(cmpneq_f64, vb64, vf64, vf64)

#line 332
SIMD_IMPL_INTRIN_2(cmpgt_f64, vb64, vf64, vf64)

#line 332
SIMD_IMPL_INTRIN_2(cmpge_f64, vb64, vf64, vf64)

#line 332
SIMD_IMPL_INTRIN_2(cmplt_f64, vb64, vf64, vf64)

#line 332
SIMD_IMPL_INTRIN_2(cmple_f64, vb64, vf64, vf64)


/***************************
 * Conversion
 ***************************/
SIMD_IMPL_INTRIN_1(cvt_f64_b64, vf64,  vb64)
SIMD_IMPL_INTRIN_1(cvt_b64_f64, vb64, vf64)
#if 0
SIMD_IMPL_INTRIN_1(expand_f64_f64, vf64x2, vf64)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 349
SIMD_IMPL_INTRIN_2(add_f64, vf64, vf64, vf64)

#line 349
SIMD_IMPL_INTRIN_2(sub_f64, vf64, vf64, vf64)


#if 0
#line 356
SIMD_IMPL_INTRIN_2(adds_f64, vf64, vf64, vf64)

#line 356
SIMD_IMPL_INTRIN_2(subs_f64, vf64, vf64, vf64)

#endif // sat_sup

#if 1
SIMD_IMPL_INTRIN_2(mul_f64, vf64, vf64, vf64)
#endif // mul_sup

#if 1
SIMD_IMPL_INTRIN_2(div_f64, vf64, vf64, vf64)
#endif // div_sup

#if 0
SIMD_IMPL_INTRIN_1(divisor_f64, vf64x3, f64)
SIMD_IMPL_INTRIN_2(divc_f64, vf64, vf64, vf64x3)
#endif // intdiv_sup

#if 1
#line 377
SIMD_IMPL_INTRIN_3(muladd_f64, vf64, vf64, vf64, vf64)

#line 377
SIMD_IMPL_INTRIN_3(mulsub_f64, vf64, vf64, vf64, vf64)

#line 377
SIMD_IMPL_INTRIN_3(nmuladd_f64, vf64, vf64, vf64, vf64)

#line 377
SIMD_IMPL_INTRIN_3(nmulsub_f64, vf64, vf64, vf64, vf64)

#endif // fused_sup

#if 1
SIMD_IMPL_INTRIN_1(sum_f64, f64, vf64)
#endif // sum_sup

#if 0
SIMD_IMPL_INTRIN_1(sumup_f64, f64, vf64)
#endif // sumup_sup

/***************************
 * Math
 ***************************/
#if 1
#line 396
SIMD_IMPL_INTRIN_1(sqrt_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(recip_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(abs_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(square_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(rint_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(ceil_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(trunc_f64, vf64, vf64)

#line 396
SIMD_IMPL_INTRIN_1(floor_f64, vf64, vf64)

#endif

#line 403
SIMD_IMPL_INTRIN_2(max_f64, vf64, vf64, vf64)

#line 403
SIMD_IMPL_INTRIN_2(min_f64, vf64, vf64, vf64)


#if 1
#line 410
SIMD_IMPL_INTRIN_2(maxp_f64, vf64, vf64, vf64)

#line 410
SIMD_IMPL_INTRIN_2(minp_f64, vf64, vf64, vf64)

#endif

/***************************
 * Mask operations
 ***************************/
#line 420
 SIMD_IMPL_INTRIN_4(ifadd_f64, vf64, vb64, vf64, vf64, vf64)

#line 420
 SIMD_IMPL_INTRIN_4(ifsub_f64, vf64, vb64, vf64, vf64, vf64)


#endif // simd_sup

/*************************************************************************
 * Variant
 ************************************************************************/
SIMD_IMPL_INTRIN_0N(cleanup)

/*************************************************************************
 * A special section for f32/f64 intrinsics outside the main repeater
 ************************************************************************/
/***************************
 * Operators
 ***************************/
// check special cases
SIMD_IMPL_INTRIN_1(notnan_f32, vb32, vf32)
#if NPY_SIMD_F64
    SIMD_IMPL_INTRIN_1(notnan_f64, vb64, vf64)
#endif
/***************************
 * Conversions
 ***************************/
// round to nearest integer (assume even)
SIMD_IMPL_INTRIN_1(round_s32_f32, vs32, vf32)
#if NPY_SIMD_F64
    SIMD_IMPL_INTRIN_2(round_s32_f64, vs32, vf64, vf64)
#endif

/*************************************************************************
 * A special section for boolean intrinsics outside the main repeater
 ************************************************************************/
/***************************
 * Operators
 ***************************/
// Logical
#line 460
SIMD_IMPL_INTRIN_2(and_b8, vb8, vb8, vb8)
SIMD_IMPL_INTRIN_2(or_b8,  vb8, vb8, vb8)
SIMD_IMPL_INTRIN_2(xor_b8, vb8, vb8, vb8)
SIMD_IMPL_INTRIN_1(not_b8, vb8, vb8)

#line 460
SIMD_IMPL_INTRIN_2(and_b16, vb16, vb16, vb16)
SIMD_IMPL_INTRIN_2(or_b16,  vb16, vb16, vb16)
SIMD_IMPL_INTRIN_2(xor_b16, vb16, vb16, vb16)
SIMD_IMPL_INTRIN_1(not_b16, vb16, vb16)

#line 460
SIMD_IMPL_INTRIN_2(and_b32, vb32, vb32, vb32)
SIMD_IMPL_INTRIN_2(or_b32,  vb32, vb32, vb32)
SIMD_IMPL_INTRIN_2(xor_b32, vb32, vb32, vb32)
SIMD_IMPL_INTRIN_1(not_b32, vb32, vb32)

#line 460
SIMD_IMPL_INTRIN_2(and_b64, vb64, vb64, vb64)
SIMD_IMPL_INTRIN_2(or_b64,  vb64, vb64, vb64)
SIMD_IMPL_INTRIN_2(xor_b64, vb64, vb64, vb64)
SIMD_IMPL_INTRIN_1(not_b64, vb64, vb64)

/***************************
 * Conversions
 ***************************/
// Convert mask vector to integer bitfield
#line 472
SIMD_IMPL_INTRIN_1(tobits_b8, u64, vb8)

#line 472
SIMD_IMPL_INTRIN_1(tobits_b16, u64, vb16)

#line 472
SIMD_IMPL_INTRIN_1(tobits_b32, u64, vb32)

#line 472
SIMD_IMPL_INTRIN_1(tobits_b64, u64, vb64)



//#########################################################################
//## Attach module functions
//#########################################################################
static PyMethodDef simd__intrinsics_methods[] = {
#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_u8)

#line 508
SIMD_INTRIN_DEF(loada_u8)

#line 508
SIMD_INTRIN_DEF(loads_u8)

#line 508
SIMD_INTRIN_DEF(loadl_u8)

#line 508
SIMD_INTRIN_DEF(store_u8)

#line 508
SIMD_INTRIN_DEF(storea_u8)

#line 508
SIMD_INTRIN_DEF(stores_u8)

#line 508
SIMD_INTRIN_DEF(storel_u8)

#line 508
SIMD_INTRIN_DEF(storeh_u8)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
#line 519
SIMD_INTRIN_DEF(load_till_u8)

#line 519
SIMD_INTRIN_DEF(load_tillz_u8)

#line 519
SIMD_INTRIN_DEF(loadn_u8)

#line 519
SIMD_INTRIN_DEF(loadn_till_u8)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_u8)

#line 519
SIMD_INTRIN_DEF(store_till_u8)

#line 519
SIMD_INTRIN_DEF(storen_u8)

#line 519
SIMD_INTRIN_DEF(storen_till_u8)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 8 == 32
SIMD_INTRIN_DEF(lut32_u8)
#endif
#if 8 == 64
SIMD_INTRIN_DEF(lut16_u8)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_u8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_u8)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_u8)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_u8)

#line 547
SIMD_INTRIN_DEF(setf_u8)

#line 547
SIMD_INTRIN_DEF(setall_u8)

#line 547
SIMD_INTRIN_DEF(zero_u8)

#line 547
SIMD_INTRIN_DEF(select_u8)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_u8)

#line 556
SIMD_INTRIN_DEF(combineh_u8)

#line 556
SIMD_INTRIN_DEF(combine_u8)

#line 556
SIMD_INTRIN_DEF(zip_u8)


#if 1
SIMD_INTRIN_DEF(rev64_u8)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
#line 570
SIMD_INTRIN_DEF(shl_u8)

#line 570
SIMD_INTRIN_DEF(shr_u8)

#line 570
SIMD_INTRIN_DEF(shli_u8)

#line 570
SIMD_INTRIN_DEF(shri_u8)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_u8)

#line 577
SIMD_INTRIN_DEF(or_u8)

#line 577
SIMD_INTRIN_DEF(xor_u8)

#line 577
SIMD_INTRIN_DEF(not_u8)

#line 577
SIMD_INTRIN_DEF(cmpeq_u8)

#line 577
SIMD_INTRIN_DEF(cmpneq_u8)

#line 577
SIMD_INTRIN_DEF(cmpgt_u8)

#line 577
SIMD_INTRIN_DEF(cmpge_u8)

#line 577
SIMD_INTRIN_DEF(cmplt_u8)

#line 577
SIMD_INTRIN_DEF(cmple_u8)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_u8_b8)
SIMD_INTRIN_DEF(cvt_b8_u8)
#if 1
SIMD_INTRIN_DEF(expand_u16_u8)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_u8)

#line 594
SIMD_INTRIN_DEF(sub_u8)


#if 1
#line 601
SIMD_INTRIN_DEF(adds_u8)

#line 601
SIMD_INTRIN_DEF(subs_u8)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_u8)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_u8)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_u8)
SIMD_INTRIN_DEF(divc_u8)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_u8)

#line 622
SIMD_INTRIN_DEF(mulsub_u8)

#line 622
SIMD_INTRIN_DEF(nmuladd_u8)

#line 622
SIMD_INTRIN_DEF(nmulsub_u8)

#endif // fused_sup

#if 0
SIMD_INTRIN_DEF(sum_u8)
#endif // sum_sup

#if 1
SIMD_INTRIN_DEF(sumup_u8)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_u8)

#line 640
SIMD_INTRIN_DEF(recip_u8)

#line 640
SIMD_INTRIN_DEF(abs_u8)

#line 640
SIMD_INTRIN_DEF(square_u8)

#line 640
SIMD_INTRIN_DEF(rint_u8)

#line 640
SIMD_INTRIN_DEF(ceil_u8)

#line 640
SIMD_INTRIN_DEF(trunc_u8)

#line 640
SIMD_INTRIN_DEF(floor_u8)

#endif

#line 647
SIMD_INTRIN_DEF(max_u8)

#line 647
SIMD_INTRIN_DEF(min_u8)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_u8)

#line 654
SIMD_INTRIN_DEF(minp_u8)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_u8)

#line 664
 SIMD_INTRIN_DEF(ifsub_u8)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_s8)

#line 508
SIMD_INTRIN_DEF(loada_s8)

#line 508
SIMD_INTRIN_DEF(loads_s8)

#line 508
SIMD_INTRIN_DEF(loadl_s8)

#line 508
SIMD_INTRIN_DEF(store_s8)

#line 508
SIMD_INTRIN_DEF(storea_s8)

#line 508
SIMD_INTRIN_DEF(stores_s8)

#line 508
SIMD_INTRIN_DEF(storel_s8)

#line 508
SIMD_INTRIN_DEF(storeh_s8)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
#line 519
SIMD_INTRIN_DEF(load_till_s8)

#line 519
SIMD_INTRIN_DEF(load_tillz_s8)

#line 519
SIMD_INTRIN_DEF(loadn_s8)

#line 519
SIMD_INTRIN_DEF(loadn_till_s8)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_s8)

#line 519
SIMD_INTRIN_DEF(store_till_s8)

#line 519
SIMD_INTRIN_DEF(storen_s8)

#line 519
SIMD_INTRIN_DEF(storen_till_s8)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 8 == 32
SIMD_INTRIN_DEF(lut32_s8)
#endif
#if 8 == 64
SIMD_INTRIN_DEF(lut16_s8)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_s8)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_s8)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_s8)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_s8)

#line 547
SIMD_INTRIN_DEF(setf_s8)

#line 547
SIMD_INTRIN_DEF(setall_s8)

#line 547
SIMD_INTRIN_DEF(zero_s8)

#line 547
SIMD_INTRIN_DEF(select_s8)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_s8)

#line 556
SIMD_INTRIN_DEF(combineh_s8)

#line 556
SIMD_INTRIN_DEF(combine_s8)

#line 556
SIMD_INTRIN_DEF(zip_s8)


#if 1
SIMD_INTRIN_DEF(rev64_s8)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
#line 570
SIMD_INTRIN_DEF(shl_s8)

#line 570
SIMD_INTRIN_DEF(shr_s8)

#line 570
SIMD_INTRIN_DEF(shli_s8)

#line 570
SIMD_INTRIN_DEF(shri_s8)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_s8)

#line 577
SIMD_INTRIN_DEF(or_s8)

#line 577
SIMD_INTRIN_DEF(xor_s8)

#line 577
SIMD_INTRIN_DEF(not_s8)

#line 577
SIMD_INTRIN_DEF(cmpeq_s8)

#line 577
SIMD_INTRIN_DEF(cmpneq_s8)

#line 577
SIMD_INTRIN_DEF(cmpgt_s8)

#line 577
SIMD_INTRIN_DEF(cmpge_s8)

#line 577
SIMD_INTRIN_DEF(cmplt_s8)

#line 577
SIMD_INTRIN_DEF(cmple_s8)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_s8_b8)
SIMD_INTRIN_DEF(cvt_b8_s8)
#if 0
SIMD_INTRIN_DEF(expand_s8_s8)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_s8)

#line 594
SIMD_INTRIN_DEF(sub_s8)


#if 1
#line 601
SIMD_INTRIN_DEF(adds_s8)

#line 601
SIMD_INTRIN_DEF(subs_s8)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_s8)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_s8)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_s8)
SIMD_INTRIN_DEF(divc_s8)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_s8)

#line 622
SIMD_INTRIN_DEF(mulsub_s8)

#line 622
SIMD_INTRIN_DEF(nmuladd_s8)

#line 622
SIMD_INTRIN_DEF(nmulsub_s8)

#endif // fused_sup

#if 0
SIMD_INTRIN_DEF(sum_s8)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_s8)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_s8)

#line 640
SIMD_INTRIN_DEF(recip_s8)

#line 640
SIMD_INTRIN_DEF(abs_s8)

#line 640
SIMD_INTRIN_DEF(square_s8)

#line 640
SIMD_INTRIN_DEF(rint_s8)

#line 640
SIMD_INTRIN_DEF(ceil_s8)

#line 640
SIMD_INTRIN_DEF(trunc_s8)

#line 640
SIMD_INTRIN_DEF(floor_s8)

#endif

#line 647
SIMD_INTRIN_DEF(max_s8)

#line 647
SIMD_INTRIN_DEF(min_s8)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_s8)

#line 654
SIMD_INTRIN_DEF(minp_s8)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_s8)

#line 664
 SIMD_INTRIN_DEF(ifsub_s8)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_u16)

#line 508
SIMD_INTRIN_DEF(loada_u16)

#line 508
SIMD_INTRIN_DEF(loads_u16)

#line 508
SIMD_INTRIN_DEF(loadl_u16)

#line 508
SIMD_INTRIN_DEF(store_u16)

#line 508
SIMD_INTRIN_DEF(storea_u16)

#line 508
SIMD_INTRIN_DEF(stores_u16)

#line 508
SIMD_INTRIN_DEF(storel_u16)

#line 508
SIMD_INTRIN_DEF(storeh_u16)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
#line 519
SIMD_INTRIN_DEF(load_till_u16)

#line 519
SIMD_INTRIN_DEF(load_tillz_u16)

#line 519
SIMD_INTRIN_DEF(loadn_u16)

#line 519
SIMD_INTRIN_DEF(loadn_till_u16)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_u16)

#line 519
SIMD_INTRIN_DEF(store_till_u16)

#line 519
SIMD_INTRIN_DEF(storen_u16)

#line 519
SIMD_INTRIN_DEF(storen_till_u16)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 16 == 32
SIMD_INTRIN_DEF(lut32_u16)
#endif
#if 16 == 64
SIMD_INTRIN_DEF(lut16_u16)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_u16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_u16)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_u16)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_u16)

#line 547
SIMD_INTRIN_DEF(setf_u16)

#line 547
SIMD_INTRIN_DEF(setall_u16)

#line 547
SIMD_INTRIN_DEF(zero_u16)

#line 547
SIMD_INTRIN_DEF(select_u16)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_u16)

#line 556
SIMD_INTRIN_DEF(combineh_u16)

#line 556
SIMD_INTRIN_DEF(combine_u16)

#line 556
SIMD_INTRIN_DEF(zip_u16)


#if 1
SIMD_INTRIN_DEF(rev64_u16)
#endif

/***************************
 * Operators
 ***************************/
#if 15 > 0
#line 570
SIMD_INTRIN_DEF(shl_u16)

#line 570
SIMD_INTRIN_DEF(shr_u16)

#line 570
SIMD_INTRIN_DEF(shli_u16)

#line 570
SIMD_INTRIN_DEF(shri_u16)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_u16)

#line 577
SIMD_INTRIN_DEF(or_u16)

#line 577
SIMD_INTRIN_DEF(xor_u16)

#line 577
SIMD_INTRIN_DEF(not_u16)

#line 577
SIMD_INTRIN_DEF(cmpeq_u16)

#line 577
SIMD_INTRIN_DEF(cmpneq_u16)

#line 577
SIMD_INTRIN_DEF(cmpgt_u16)

#line 577
SIMD_INTRIN_DEF(cmpge_u16)

#line 577
SIMD_INTRIN_DEF(cmplt_u16)

#line 577
SIMD_INTRIN_DEF(cmple_u16)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_u16_b16)
SIMD_INTRIN_DEF(cvt_b16_u16)
#if 1
SIMD_INTRIN_DEF(expand_u32_u16)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_u16)

#line 594
SIMD_INTRIN_DEF(sub_u16)


#if 1
#line 601
SIMD_INTRIN_DEF(adds_u16)

#line 601
SIMD_INTRIN_DEF(subs_u16)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_u16)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_u16)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_u16)
SIMD_INTRIN_DEF(divc_u16)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_u16)

#line 622
SIMD_INTRIN_DEF(mulsub_u16)

#line 622
SIMD_INTRIN_DEF(nmuladd_u16)

#line 622
SIMD_INTRIN_DEF(nmulsub_u16)

#endif // fused_sup

#if 0
SIMD_INTRIN_DEF(sum_u16)
#endif // sum_sup

#if 1
SIMD_INTRIN_DEF(sumup_u16)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_u16)

#line 640
SIMD_INTRIN_DEF(recip_u16)

#line 640
SIMD_INTRIN_DEF(abs_u16)

#line 640
SIMD_INTRIN_DEF(square_u16)

#line 640
SIMD_INTRIN_DEF(rint_u16)

#line 640
SIMD_INTRIN_DEF(ceil_u16)

#line 640
SIMD_INTRIN_DEF(trunc_u16)

#line 640
SIMD_INTRIN_DEF(floor_u16)

#endif

#line 647
SIMD_INTRIN_DEF(max_u16)

#line 647
SIMD_INTRIN_DEF(min_u16)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_u16)

#line 654
SIMD_INTRIN_DEF(minp_u16)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_u16)

#line 664
 SIMD_INTRIN_DEF(ifsub_u16)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_s16)

#line 508
SIMD_INTRIN_DEF(loada_s16)

#line 508
SIMD_INTRIN_DEF(loads_s16)

#line 508
SIMD_INTRIN_DEF(loadl_s16)

#line 508
SIMD_INTRIN_DEF(store_s16)

#line 508
SIMD_INTRIN_DEF(storea_s16)

#line 508
SIMD_INTRIN_DEF(stores_s16)

#line 508
SIMD_INTRIN_DEF(storel_s16)

#line 508
SIMD_INTRIN_DEF(storeh_s16)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 0
#line 519
SIMD_INTRIN_DEF(load_till_s16)

#line 519
SIMD_INTRIN_DEF(load_tillz_s16)

#line 519
SIMD_INTRIN_DEF(loadn_s16)

#line 519
SIMD_INTRIN_DEF(loadn_till_s16)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_s16)

#line 519
SIMD_INTRIN_DEF(store_till_s16)

#line 519
SIMD_INTRIN_DEF(storen_s16)

#line 519
SIMD_INTRIN_DEF(storen_till_s16)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 16 == 32
SIMD_INTRIN_DEF(lut32_s16)
#endif
#if 16 == 64
SIMD_INTRIN_DEF(lut16_s16)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_s16)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_s16)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_s16)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_s16)

#line 547
SIMD_INTRIN_DEF(setf_s16)

#line 547
SIMD_INTRIN_DEF(setall_s16)

#line 547
SIMD_INTRIN_DEF(zero_s16)

#line 547
SIMD_INTRIN_DEF(select_s16)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_s16)

#line 556
SIMD_INTRIN_DEF(combineh_s16)

#line 556
SIMD_INTRIN_DEF(combine_s16)

#line 556
SIMD_INTRIN_DEF(zip_s16)


#if 1
SIMD_INTRIN_DEF(rev64_s16)
#endif

/***************************
 * Operators
 ***************************/
#if 15 > 0
#line 570
SIMD_INTRIN_DEF(shl_s16)

#line 570
SIMD_INTRIN_DEF(shr_s16)

#line 570
SIMD_INTRIN_DEF(shli_s16)

#line 570
SIMD_INTRIN_DEF(shri_s16)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_s16)

#line 577
SIMD_INTRIN_DEF(or_s16)

#line 577
SIMD_INTRIN_DEF(xor_s16)

#line 577
SIMD_INTRIN_DEF(not_s16)

#line 577
SIMD_INTRIN_DEF(cmpeq_s16)

#line 577
SIMD_INTRIN_DEF(cmpneq_s16)

#line 577
SIMD_INTRIN_DEF(cmpgt_s16)

#line 577
SIMD_INTRIN_DEF(cmpge_s16)

#line 577
SIMD_INTRIN_DEF(cmplt_s16)

#line 577
SIMD_INTRIN_DEF(cmple_s16)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_s16_b16)
SIMD_INTRIN_DEF(cvt_b16_s16)
#if 0
SIMD_INTRIN_DEF(expand_s16_s16)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_s16)

#line 594
SIMD_INTRIN_DEF(sub_s16)


#if 1
#line 601
SIMD_INTRIN_DEF(adds_s16)

#line 601
SIMD_INTRIN_DEF(subs_s16)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_s16)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_s16)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_s16)
SIMD_INTRIN_DEF(divc_s16)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_s16)

#line 622
SIMD_INTRIN_DEF(mulsub_s16)

#line 622
SIMD_INTRIN_DEF(nmuladd_s16)

#line 622
SIMD_INTRIN_DEF(nmulsub_s16)

#endif // fused_sup

#if 0
SIMD_INTRIN_DEF(sum_s16)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_s16)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_s16)

#line 640
SIMD_INTRIN_DEF(recip_s16)

#line 640
SIMD_INTRIN_DEF(abs_s16)

#line 640
SIMD_INTRIN_DEF(square_s16)

#line 640
SIMD_INTRIN_DEF(rint_s16)

#line 640
SIMD_INTRIN_DEF(ceil_s16)

#line 640
SIMD_INTRIN_DEF(trunc_s16)

#line 640
SIMD_INTRIN_DEF(floor_s16)

#endif

#line 647
SIMD_INTRIN_DEF(max_s16)

#line 647
SIMD_INTRIN_DEF(min_s16)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_s16)

#line 654
SIMD_INTRIN_DEF(minp_s16)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_s16)

#line 664
 SIMD_INTRIN_DEF(ifsub_s16)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_u32)

#line 508
SIMD_INTRIN_DEF(loada_u32)

#line 508
SIMD_INTRIN_DEF(loads_u32)

#line 508
SIMD_INTRIN_DEF(loadl_u32)

#line 508
SIMD_INTRIN_DEF(store_u32)

#line 508
SIMD_INTRIN_DEF(storea_u32)

#line 508
SIMD_INTRIN_DEF(stores_u32)

#line 508
SIMD_INTRIN_DEF(storel_u32)

#line 508
SIMD_INTRIN_DEF(storeh_u32)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
#line 519
SIMD_INTRIN_DEF(load_till_u32)

#line 519
SIMD_INTRIN_DEF(load_tillz_u32)

#line 519
SIMD_INTRIN_DEF(loadn_u32)

#line 519
SIMD_INTRIN_DEF(loadn_till_u32)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_u32)

#line 519
SIMD_INTRIN_DEF(store_till_u32)

#line 519
SIMD_INTRIN_DEF(storen_u32)

#line 519
SIMD_INTRIN_DEF(storen_till_u32)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 32 == 32
SIMD_INTRIN_DEF(lut32_u32)
#endif
#if 32 == 64
SIMD_INTRIN_DEF(lut16_u32)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_u32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_u32)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_u32)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_u32)

#line 547
SIMD_INTRIN_DEF(setf_u32)

#line 547
SIMD_INTRIN_DEF(setall_u32)

#line 547
SIMD_INTRIN_DEF(zero_u32)

#line 547
SIMD_INTRIN_DEF(select_u32)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_u32)

#line 556
SIMD_INTRIN_DEF(combineh_u32)

#line 556
SIMD_INTRIN_DEF(combine_u32)

#line 556
SIMD_INTRIN_DEF(zip_u32)


#if 1
SIMD_INTRIN_DEF(rev64_u32)
#endif

/***************************
 * Operators
 ***************************/
#if 31 > 0
#line 570
SIMD_INTRIN_DEF(shl_u32)

#line 570
SIMD_INTRIN_DEF(shr_u32)

#line 570
SIMD_INTRIN_DEF(shli_u32)

#line 570
SIMD_INTRIN_DEF(shri_u32)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_u32)

#line 577
SIMD_INTRIN_DEF(or_u32)

#line 577
SIMD_INTRIN_DEF(xor_u32)

#line 577
SIMD_INTRIN_DEF(not_u32)

#line 577
SIMD_INTRIN_DEF(cmpeq_u32)

#line 577
SIMD_INTRIN_DEF(cmpneq_u32)

#line 577
SIMD_INTRIN_DEF(cmpgt_u32)

#line 577
SIMD_INTRIN_DEF(cmpge_u32)

#line 577
SIMD_INTRIN_DEF(cmplt_u32)

#line 577
SIMD_INTRIN_DEF(cmple_u32)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_u32_b32)
SIMD_INTRIN_DEF(cvt_b32_u32)
#if 0
SIMD_INTRIN_DEF(expand_u32_u32)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_u32)

#line 594
SIMD_INTRIN_DEF(sub_u32)


#if 0
#line 601
SIMD_INTRIN_DEF(adds_u32)

#line 601
SIMD_INTRIN_DEF(subs_u32)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_u32)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_u32)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_u32)
SIMD_INTRIN_DEF(divc_u32)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_u32)

#line 622
SIMD_INTRIN_DEF(mulsub_u32)

#line 622
SIMD_INTRIN_DEF(nmuladd_u32)

#line 622
SIMD_INTRIN_DEF(nmulsub_u32)

#endif // fused_sup

#if 1
SIMD_INTRIN_DEF(sum_u32)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_u32)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_u32)

#line 640
SIMD_INTRIN_DEF(recip_u32)

#line 640
SIMD_INTRIN_DEF(abs_u32)

#line 640
SIMD_INTRIN_DEF(square_u32)

#line 640
SIMD_INTRIN_DEF(rint_u32)

#line 640
SIMD_INTRIN_DEF(ceil_u32)

#line 640
SIMD_INTRIN_DEF(trunc_u32)

#line 640
SIMD_INTRIN_DEF(floor_u32)

#endif

#line 647
SIMD_INTRIN_DEF(max_u32)

#line 647
SIMD_INTRIN_DEF(min_u32)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_u32)

#line 654
SIMD_INTRIN_DEF(minp_u32)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_u32)

#line 664
 SIMD_INTRIN_DEF(ifsub_u32)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_s32)

#line 508
SIMD_INTRIN_DEF(loada_s32)

#line 508
SIMD_INTRIN_DEF(loads_s32)

#line 508
SIMD_INTRIN_DEF(loadl_s32)

#line 508
SIMD_INTRIN_DEF(store_s32)

#line 508
SIMD_INTRIN_DEF(storea_s32)

#line 508
SIMD_INTRIN_DEF(stores_s32)

#line 508
SIMD_INTRIN_DEF(storel_s32)

#line 508
SIMD_INTRIN_DEF(storeh_s32)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
#line 519
SIMD_INTRIN_DEF(load_till_s32)

#line 519
SIMD_INTRIN_DEF(load_tillz_s32)

#line 519
SIMD_INTRIN_DEF(loadn_s32)

#line 519
SIMD_INTRIN_DEF(loadn_till_s32)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_s32)

#line 519
SIMD_INTRIN_DEF(store_till_s32)

#line 519
SIMD_INTRIN_DEF(storen_s32)

#line 519
SIMD_INTRIN_DEF(storen_till_s32)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 32 == 32
SIMD_INTRIN_DEF(lut32_s32)
#endif
#if 32 == 64
SIMD_INTRIN_DEF(lut16_s32)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_s32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_s32)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_s32)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_s32)

#line 547
SIMD_INTRIN_DEF(setf_s32)

#line 547
SIMD_INTRIN_DEF(setall_s32)

#line 547
SIMD_INTRIN_DEF(zero_s32)

#line 547
SIMD_INTRIN_DEF(select_s32)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_s32)

#line 556
SIMD_INTRIN_DEF(combineh_s32)

#line 556
SIMD_INTRIN_DEF(combine_s32)

#line 556
SIMD_INTRIN_DEF(zip_s32)


#if 1
SIMD_INTRIN_DEF(rev64_s32)
#endif

/***************************
 * Operators
 ***************************/
#if 31 > 0
#line 570
SIMD_INTRIN_DEF(shl_s32)

#line 570
SIMD_INTRIN_DEF(shr_s32)

#line 570
SIMD_INTRIN_DEF(shli_s32)

#line 570
SIMD_INTRIN_DEF(shri_s32)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_s32)

#line 577
SIMD_INTRIN_DEF(or_s32)

#line 577
SIMD_INTRIN_DEF(xor_s32)

#line 577
SIMD_INTRIN_DEF(not_s32)

#line 577
SIMD_INTRIN_DEF(cmpeq_s32)

#line 577
SIMD_INTRIN_DEF(cmpneq_s32)

#line 577
SIMD_INTRIN_DEF(cmpgt_s32)

#line 577
SIMD_INTRIN_DEF(cmpge_s32)

#line 577
SIMD_INTRIN_DEF(cmplt_s32)

#line 577
SIMD_INTRIN_DEF(cmple_s32)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_s32_b32)
SIMD_INTRIN_DEF(cvt_b32_s32)
#if 0
SIMD_INTRIN_DEF(expand_s32_s32)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_s32)

#line 594
SIMD_INTRIN_DEF(sub_s32)


#if 0
#line 601
SIMD_INTRIN_DEF(adds_s32)

#line 601
SIMD_INTRIN_DEF(subs_s32)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_s32)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_s32)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_s32)
SIMD_INTRIN_DEF(divc_s32)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_s32)

#line 622
SIMD_INTRIN_DEF(mulsub_s32)

#line 622
SIMD_INTRIN_DEF(nmuladd_s32)

#line 622
SIMD_INTRIN_DEF(nmulsub_s32)

#endif // fused_sup

#if 0
SIMD_INTRIN_DEF(sum_s32)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_s32)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_s32)

#line 640
SIMD_INTRIN_DEF(recip_s32)

#line 640
SIMD_INTRIN_DEF(abs_s32)

#line 640
SIMD_INTRIN_DEF(square_s32)

#line 640
SIMD_INTRIN_DEF(rint_s32)

#line 640
SIMD_INTRIN_DEF(ceil_s32)

#line 640
SIMD_INTRIN_DEF(trunc_s32)

#line 640
SIMD_INTRIN_DEF(floor_s32)

#endif

#line 647
SIMD_INTRIN_DEF(max_s32)

#line 647
SIMD_INTRIN_DEF(min_s32)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_s32)

#line 654
SIMD_INTRIN_DEF(minp_s32)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_s32)

#line 664
 SIMD_INTRIN_DEF(ifsub_s32)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_u64)

#line 508
SIMD_INTRIN_DEF(loada_u64)

#line 508
SIMD_INTRIN_DEF(loads_u64)

#line 508
SIMD_INTRIN_DEF(loadl_u64)

#line 508
SIMD_INTRIN_DEF(store_u64)

#line 508
SIMD_INTRIN_DEF(storea_u64)

#line 508
SIMD_INTRIN_DEF(stores_u64)

#line 508
SIMD_INTRIN_DEF(storel_u64)

#line 508
SIMD_INTRIN_DEF(storeh_u64)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
#line 519
SIMD_INTRIN_DEF(load_till_u64)

#line 519
SIMD_INTRIN_DEF(load_tillz_u64)

#line 519
SIMD_INTRIN_DEF(loadn_u64)

#line 519
SIMD_INTRIN_DEF(loadn_till_u64)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_u64)

#line 519
SIMD_INTRIN_DEF(store_till_u64)

#line 519
SIMD_INTRIN_DEF(storen_u64)

#line 519
SIMD_INTRIN_DEF(storen_till_u64)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 64 == 32
SIMD_INTRIN_DEF(lut32_u64)
#endif
#if 64 == 64
SIMD_INTRIN_DEF(lut16_u64)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_u64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_u64)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_u64)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_u64)

#line 547
SIMD_INTRIN_DEF(setf_u64)

#line 547
SIMD_INTRIN_DEF(setall_u64)

#line 547
SIMD_INTRIN_DEF(zero_u64)

#line 547
SIMD_INTRIN_DEF(select_u64)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_u64)

#line 556
SIMD_INTRIN_DEF(combineh_u64)

#line 556
SIMD_INTRIN_DEF(combine_u64)

#line 556
SIMD_INTRIN_DEF(zip_u64)


#if 0
SIMD_INTRIN_DEF(rev64_u64)
#endif

/***************************
 * Operators
 ***************************/
#if 63 > 0
#line 570
SIMD_INTRIN_DEF(shl_u64)

#line 570
SIMD_INTRIN_DEF(shr_u64)

#line 570
SIMD_INTRIN_DEF(shli_u64)

#line 570
SIMD_INTRIN_DEF(shri_u64)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_u64)

#line 577
SIMD_INTRIN_DEF(or_u64)

#line 577
SIMD_INTRIN_DEF(xor_u64)

#line 577
SIMD_INTRIN_DEF(not_u64)

#line 577
SIMD_INTRIN_DEF(cmpeq_u64)

#line 577
SIMD_INTRIN_DEF(cmpneq_u64)

#line 577
SIMD_INTRIN_DEF(cmpgt_u64)

#line 577
SIMD_INTRIN_DEF(cmpge_u64)

#line 577
SIMD_INTRIN_DEF(cmplt_u64)

#line 577
SIMD_INTRIN_DEF(cmple_u64)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_u64_b64)
SIMD_INTRIN_DEF(cvt_b64_u64)
#if 0
SIMD_INTRIN_DEF(expand_u64_u64)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_u64)

#line 594
SIMD_INTRIN_DEF(sub_u64)


#if 0
#line 601
SIMD_INTRIN_DEF(adds_u64)

#line 601
SIMD_INTRIN_DEF(subs_u64)

#endif // sat_sup

#if 0
SIMD_INTRIN_DEF(mul_u64)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_u64)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_u64)
SIMD_INTRIN_DEF(divc_u64)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_u64)

#line 622
SIMD_INTRIN_DEF(mulsub_u64)

#line 622
SIMD_INTRIN_DEF(nmuladd_u64)

#line 622
SIMD_INTRIN_DEF(nmulsub_u64)

#endif // fused_sup

#if 1
SIMD_INTRIN_DEF(sum_u64)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_u64)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_u64)

#line 640
SIMD_INTRIN_DEF(recip_u64)

#line 640
SIMD_INTRIN_DEF(abs_u64)

#line 640
SIMD_INTRIN_DEF(square_u64)

#line 640
SIMD_INTRIN_DEF(rint_u64)

#line 640
SIMD_INTRIN_DEF(ceil_u64)

#line 640
SIMD_INTRIN_DEF(trunc_u64)

#line 640
SIMD_INTRIN_DEF(floor_u64)

#endif

#line 647
SIMD_INTRIN_DEF(max_u64)

#line 647
SIMD_INTRIN_DEF(min_u64)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_u64)

#line 654
SIMD_INTRIN_DEF(minp_u64)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_u64)

#line 664
 SIMD_INTRIN_DEF(ifsub_u64)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_s64)

#line 508
SIMD_INTRIN_DEF(loada_s64)

#line 508
SIMD_INTRIN_DEF(loads_s64)

#line 508
SIMD_INTRIN_DEF(loadl_s64)

#line 508
SIMD_INTRIN_DEF(store_s64)

#line 508
SIMD_INTRIN_DEF(storea_s64)

#line 508
SIMD_INTRIN_DEF(stores_s64)

#line 508
SIMD_INTRIN_DEF(storel_s64)

#line 508
SIMD_INTRIN_DEF(storeh_s64)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
#line 519
SIMD_INTRIN_DEF(load_till_s64)

#line 519
SIMD_INTRIN_DEF(load_tillz_s64)

#line 519
SIMD_INTRIN_DEF(loadn_s64)

#line 519
SIMD_INTRIN_DEF(loadn_till_s64)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_s64)

#line 519
SIMD_INTRIN_DEF(store_till_s64)

#line 519
SIMD_INTRIN_DEF(storen_s64)

#line 519
SIMD_INTRIN_DEF(storen_till_s64)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 64 == 32
SIMD_INTRIN_DEF(lut32_s64)
#endif
#if 64 == 64
SIMD_INTRIN_DEF(lut16_s64)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_s64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_s64)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_s64)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_s64)

#line 547
SIMD_INTRIN_DEF(setf_s64)

#line 547
SIMD_INTRIN_DEF(setall_s64)

#line 547
SIMD_INTRIN_DEF(zero_s64)

#line 547
SIMD_INTRIN_DEF(select_s64)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_s64)

#line 556
SIMD_INTRIN_DEF(combineh_s64)

#line 556
SIMD_INTRIN_DEF(combine_s64)

#line 556
SIMD_INTRIN_DEF(zip_s64)


#if 0
SIMD_INTRIN_DEF(rev64_s64)
#endif

/***************************
 * Operators
 ***************************/
#if 63 > 0
#line 570
SIMD_INTRIN_DEF(shl_s64)

#line 570
SIMD_INTRIN_DEF(shr_s64)

#line 570
SIMD_INTRIN_DEF(shli_s64)

#line 570
SIMD_INTRIN_DEF(shri_s64)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_s64)

#line 577
SIMD_INTRIN_DEF(or_s64)

#line 577
SIMD_INTRIN_DEF(xor_s64)

#line 577
SIMD_INTRIN_DEF(not_s64)

#line 577
SIMD_INTRIN_DEF(cmpeq_s64)

#line 577
SIMD_INTRIN_DEF(cmpneq_s64)

#line 577
SIMD_INTRIN_DEF(cmpgt_s64)

#line 577
SIMD_INTRIN_DEF(cmpge_s64)

#line 577
SIMD_INTRIN_DEF(cmplt_s64)

#line 577
SIMD_INTRIN_DEF(cmple_s64)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_s64_b64)
SIMD_INTRIN_DEF(cvt_b64_s64)
#if 0
SIMD_INTRIN_DEF(expand_s64_s64)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_s64)

#line 594
SIMD_INTRIN_DEF(sub_s64)


#if 0
#line 601
SIMD_INTRIN_DEF(adds_s64)

#line 601
SIMD_INTRIN_DEF(subs_s64)

#endif // sat_sup

#if 0
SIMD_INTRIN_DEF(mul_s64)
#endif // mul_sup

#if 0
SIMD_INTRIN_DEF(div_s64)
#endif // div_sup

#if 1
SIMD_INTRIN_DEF(divisor_s64)
SIMD_INTRIN_DEF(divc_s64)
#endif // intdiv_sup

#if 0
#line 622
SIMD_INTRIN_DEF(muladd_s64)

#line 622
SIMD_INTRIN_DEF(mulsub_s64)

#line 622
SIMD_INTRIN_DEF(nmuladd_s64)

#line 622
SIMD_INTRIN_DEF(nmulsub_s64)

#endif // fused_sup

#if 0
SIMD_INTRIN_DEF(sum_s64)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_s64)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 0
#line 640
SIMD_INTRIN_DEF(sqrt_s64)

#line 640
SIMD_INTRIN_DEF(recip_s64)

#line 640
SIMD_INTRIN_DEF(abs_s64)

#line 640
SIMD_INTRIN_DEF(square_s64)

#line 640
SIMD_INTRIN_DEF(rint_s64)

#line 640
SIMD_INTRIN_DEF(ceil_s64)

#line 640
SIMD_INTRIN_DEF(trunc_s64)

#line 640
SIMD_INTRIN_DEF(floor_s64)

#endif

#line 647
SIMD_INTRIN_DEF(max_s64)

#line 647
SIMD_INTRIN_DEF(min_s64)


#if 0
#line 654
SIMD_INTRIN_DEF(maxp_s64)

#line 654
SIMD_INTRIN_DEF(minp_s64)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_s64)

#line 664
 SIMD_INTRIN_DEF(ifsub_s64)


#endif // simd_sup

#line 500
#if 1

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_f32)

#line 508
SIMD_INTRIN_DEF(loada_f32)

#line 508
SIMD_INTRIN_DEF(loads_f32)

#line 508
SIMD_INTRIN_DEF(loadl_f32)

#line 508
SIMD_INTRIN_DEF(store_f32)

#line 508
SIMD_INTRIN_DEF(storea_f32)

#line 508
SIMD_INTRIN_DEF(stores_f32)

#line 508
SIMD_INTRIN_DEF(storel_f32)

#line 508
SIMD_INTRIN_DEF(storeh_f32)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
#line 519
SIMD_INTRIN_DEF(load_till_f32)

#line 519
SIMD_INTRIN_DEF(load_tillz_f32)

#line 519
SIMD_INTRIN_DEF(loadn_f32)

#line 519
SIMD_INTRIN_DEF(loadn_till_f32)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_f32)

#line 519
SIMD_INTRIN_DEF(store_till_f32)

#line 519
SIMD_INTRIN_DEF(storen_f32)

#line 519
SIMD_INTRIN_DEF(storen_till_f32)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 32 == 32
SIMD_INTRIN_DEF(lut32_f32)
#endif
#if 32 == 64
SIMD_INTRIN_DEF(lut16_f32)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_f32)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_f32)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_f32)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_f32)

#line 547
SIMD_INTRIN_DEF(setf_f32)

#line 547
SIMD_INTRIN_DEF(setall_f32)

#line 547
SIMD_INTRIN_DEF(zero_f32)

#line 547
SIMD_INTRIN_DEF(select_f32)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_f32)

#line 556
SIMD_INTRIN_DEF(combineh_f32)

#line 556
SIMD_INTRIN_DEF(combine_f32)

#line 556
SIMD_INTRIN_DEF(zip_f32)


#if 1
SIMD_INTRIN_DEF(rev64_f32)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
#line 570
SIMD_INTRIN_DEF(shl_f32)

#line 570
SIMD_INTRIN_DEF(shr_f32)

#line 570
SIMD_INTRIN_DEF(shli_f32)

#line 570
SIMD_INTRIN_DEF(shri_f32)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_f32)

#line 577
SIMD_INTRIN_DEF(or_f32)

#line 577
SIMD_INTRIN_DEF(xor_f32)

#line 577
SIMD_INTRIN_DEF(not_f32)

#line 577
SIMD_INTRIN_DEF(cmpeq_f32)

#line 577
SIMD_INTRIN_DEF(cmpneq_f32)

#line 577
SIMD_INTRIN_DEF(cmpgt_f32)

#line 577
SIMD_INTRIN_DEF(cmpge_f32)

#line 577
SIMD_INTRIN_DEF(cmplt_f32)

#line 577
SIMD_INTRIN_DEF(cmple_f32)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_f32_b32)
SIMD_INTRIN_DEF(cvt_b32_f32)
#if 0
SIMD_INTRIN_DEF(expand_f32_f32)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_f32)

#line 594
SIMD_INTRIN_DEF(sub_f32)


#if 0
#line 601
SIMD_INTRIN_DEF(adds_f32)

#line 601
SIMD_INTRIN_DEF(subs_f32)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_f32)
#endif // mul_sup

#if 1
SIMD_INTRIN_DEF(div_f32)
#endif // div_sup

#if 0
SIMD_INTRIN_DEF(divisor_f32)
SIMD_INTRIN_DEF(divc_f32)
#endif // intdiv_sup

#if 1
#line 622
SIMD_INTRIN_DEF(muladd_f32)

#line 622
SIMD_INTRIN_DEF(mulsub_f32)

#line 622
SIMD_INTRIN_DEF(nmuladd_f32)

#line 622
SIMD_INTRIN_DEF(nmulsub_f32)

#endif // fused_sup

#if 1
SIMD_INTRIN_DEF(sum_f32)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_f32)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 1
#line 640
SIMD_INTRIN_DEF(sqrt_f32)

#line 640
SIMD_INTRIN_DEF(recip_f32)

#line 640
SIMD_INTRIN_DEF(abs_f32)

#line 640
SIMD_INTRIN_DEF(square_f32)

#line 640
SIMD_INTRIN_DEF(rint_f32)

#line 640
SIMD_INTRIN_DEF(ceil_f32)

#line 640
SIMD_INTRIN_DEF(trunc_f32)

#line 640
SIMD_INTRIN_DEF(floor_f32)

#endif

#line 647
SIMD_INTRIN_DEF(max_f32)

#line 647
SIMD_INTRIN_DEF(min_f32)


#if 1
#line 654
SIMD_INTRIN_DEF(maxp_f32)

#line 654
SIMD_INTRIN_DEF(minp_f32)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_f32)

#line 664
 SIMD_INTRIN_DEF(ifsub_f32)


#endif // simd_sup

#line 500
#if NPY_SIMD_F64

/***************************
 * Memory
 ***************************/
#line 508
SIMD_INTRIN_DEF(load_f64)

#line 508
SIMD_INTRIN_DEF(loada_f64)

#line 508
SIMD_INTRIN_DEF(loads_f64)

#line 508
SIMD_INTRIN_DEF(loadl_f64)

#line 508
SIMD_INTRIN_DEF(store_f64)

#line 508
SIMD_INTRIN_DEF(storea_f64)

#line 508
SIMD_INTRIN_DEF(stores_f64)

#line 508
SIMD_INTRIN_DEF(storel_f64)

#line 508
SIMD_INTRIN_DEF(storeh_f64)


/****************************************
 * Non-contiguous/Partial Memory access
 ****************************************/
#if 1
#line 519
SIMD_INTRIN_DEF(load_till_f64)

#line 519
SIMD_INTRIN_DEF(load_tillz_f64)

#line 519
SIMD_INTRIN_DEF(loadn_f64)

#line 519
SIMD_INTRIN_DEF(loadn_till_f64)

#line 519
SIMD_INTRIN_DEF(loadn_tillz_f64)

#line 519
SIMD_INTRIN_DEF(store_till_f64)

#line 519
SIMD_INTRIN_DEF(storen_f64)

#line 519
SIMD_INTRIN_DEF(storen_till_f64)

#endif // ncont_sup

/****************************
 * Lookup tables
 ****************************/
#if 64 == 32
SIMD_INTRIN_DEF(lut32_f64)
#endif
#if 64 == 64
SIMD_INTRIN_DEF(lut16_f64)
#endif
/***************************
 * Misc
 ***************************/
#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u8_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s8_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u16_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s16_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u32_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s32_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_u64_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_s64_f64)
#endif // simd_sup2

#line 539
#if 1
SIMD_INTRIN_DEF(reinterpret_f32_f64)
#endif // simd_sup2

#line 539
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(reinterpret_f64_f64)
#endif // simd_sup2


#line 547
SIMD_INTRIN_DEF(set_f64)

#line 547
SIMD_INTRIN_DEF(setf_f64)

#line 547
SIMD_INTRIN_DEF(setall_f64)

#line 547
SIMD_INTRIN_DEF(zero_f64)

#line 547
SIMD_INTRIN_DEF(select_f64)


/***************************
 * Reorder
 ***************************/
#line 556
SIMD_INTRIN_DEF(combinel_f64)

#line 556
SIMD_INTRIN_DEF(combineh_f64)

#line 556
SIMD_INTRIN_DEF(combine_f64)

#line 556
SIMD_INTRIN_DEF(zip_f64)


#if 0
SIMD_INTRIN_DEF(rev64_f64)
#endif

/***************************
 * Operators
 ***************************/
#if 0 > 0
#line 570
SIMD_INTRIN_DEF(shl_f64)

#line 570
SIMD_INTRIN_DEF(shr_f64)

#line 570
SIMD_INTRIN_DEF(shli_f64)

#line 570
SIMD_INTRIN_DEF(shri_f64)

#endif // shl_imm

#line 577
SIMD_INTRIN_DEF(and_f64)

#line 577
SIMD_INTRIN_DEF(or_f64)

#line 577
SIMD_INTRIN_DEF(xor_f64)

#line 577
SIMD_INTRIN_DEF(not_f64)

#line 577
SIMD_INTRIN_DEF(cmpeq_f64)

#line 577
SIMD_INTRIN_DEF(cmpneq_f64)

#line 577
SIMD_INTRIN_DEF(cmpgt_f64)

#line 577
SIMD_INTRIN_DEF(cmpge_f64)

#line 577
SIMD_INTRIN_DEF(cmplt_f64)

#line 577
SIMD_INTRIN_DEF(cmple_f64)


/***************************
 * Conversion
 ***************************/
SIMD_INTRIN_DEF(cvt_f64_b64)
SIMD_INTRIN_DEF(cvt_b64_f64)
#if 0
SIMD_INTRIN_DEF(expand_f64_f64)
#endif // expand_sup
/***************************
 * Arithmetic
 ***************************/
#line 594
SIMD_INTRIN_DEF(add_f64)

#line 594
SIMD_INTRIN_DEF(sub_f64)


#if 0
#line 601
SIMD_INTRIN_DEF(adds_f64)

#line 601
SIMD_INTRIN_DEF(subs_f64)

#endif // sat_sup

#if 1
SIMD_INTRIN_DEF(mul_f64)
#endif // mul_sup

#if 1
SIMD_INTRIN_DEF(div_f64)
#endif // div_sup

#if 0
SIMD_INTRIN_DEF(divisor_f64)
SIMD_INTRIN_DEF(divc_f64)
#endif // intdiv_sup

#if 1
#line 622
SIMD_INTRIN_DEF(muladd_f64)

#line 622
SIMD_INTRIN_DEF(mulsub_f64)

#line 622
SIMD_INTRIN_DEF(nmuladd_f64)

#line 622
SIMD_INTRIN_DEF(nmulsub_f64)

#endif // fused_sup

#if 1
SIMD_INTRIN_DEF(sum_f64)
#endif // sum_sup

#if 0
SIMD_INTRIN_DEF(sumup_f64)
#endif // sumup_sup
/***************************
 * Math
 ***************************/
#if 1
#line 640
SIMD_INTRIN_DEF(sqrt_f64)

#line 640
SIMD_INTRIN_DEF(recip_f64)

#line 640
SIMD_INTRIN_DEF(abs_f64)

#line 640
SIMD_INTRIN_DEF(square_f64)

#line 640
SIMD_INTRIN_DEF(rint_f64)

#line 640
SIMD_INTRIN_DEF(ceil_f64)

#line 640
SIMD_INTRIN_DEF(trunc_f64)

#line 640
SIMD_INTRIN_DEF(floor_f64)

#endif

#line 647
SIMD_INTRIN_DEF(max_f64)

#line 647
SIMD_INTRIN_DEF(min_f64)


#if 1
#line 654
SIMD_INTRIN_DEF(maxp_f64)

#line 654
SIMD_INTRIN_DEF(minp_f64)

#endif

/***************************
 * Mask operations
 ***************************/
#line 664
 SIMD_INTRIN_DEF(ifadd_f64)

#line 664
 SIMD_INTRIN_DEF(ifsub_f64)


#endif // simd_sup

/*************************************************************************
 * Variant
 ************************************************************************/
SIMD_INTRIN_DEF(cleanup)

/*************************************************************************
 * A special section for f32/f64 intrinsics outside the main repeater
 ************************************************************************/
/***************************
 * Operators
 ***************************/
// check special cases
SIMD_INTRIN_DEF(notnan_f32)
#if NPY_SIMD_F64
    SIMD_INTRIN_DEF(notnan_f64)
#endif
/***************************
 * Conversions
 ***************************/
// round to nearest integer (assume even)
SIMD_INTRIN_DEF(round_s32_f32)
#if NPY_SIMD_F64
    SIMD_INTRIN_DEF(round_s32_f64)
#endif

/*************************************************************************
 * A special section for boolean intrinsics outside the main repeater
 ************************************************************************/
/***************************
 * Operators
 ***************************/
// Logical
#line 704
SIMD_INTRIN_DEF(and_b8)
SIMD_INTRIN_DEF(or_b8)
SIMD_INTRIN_DEF(xor_b8)
SIMD_INTRIN_DEF(not_b8)

#line 704
SIMD_INTRIN_DEF(and_b16)
SIMD_INTRIN_DEF(or_b16)
SIMD_INTRIN_DEF(xor_b16)
SIMD_INTRIN_DEF(not_b16)

#line 704
SIMD_INTRIN_DEF(and_b32)
SIMD_INTRIN_DEF(or_b32)
SIMD_INTRIN_DEF(xor_b32)
SIMD_INTRIN_DEF(not_b32)

#line 704
SIMD_INTRIN_DEF(and_b64)
SIMD_INTRIN_DEF(or_b64)
SIMD_INTRIN_DEF(xor_b64)
SIMD_INTRIN_DEF(not_b64)

/***************************
 * Conversions
 ***************************/
// Convert mask vector to integer bitfield
#line 716
SIMD_INTRIN_DEF(tobits_b8)

#line 716
SIMD_INTRIN_DEF(tobits_b16)

#line 716
SIMD_INTRIN_DEF(tobits_b32)

#line 716
SIMD_INTRIN_DEF(tobits_b64)


/************************************************************************/
{NULL, NULL, 0, NULL}
}; // PyMethodDef

#endif // NPY_SIMD

//#########################################################################
//## Defining a separate module for each target
//#########################################################################
NPY_VISIBILITY_HIDDEN PyObject *
NPY_CPU_DISPATCH_CURFX(simd_create_module)(void)
{
    static struct PyModuleDef defs = {
        .m_base = PyModuleDef_HEAD_INIT,
        .m_size = -1,
    #ifdef NPY__CPU_TARGET_CURRENT
        .m_name = "numpy.core._simd." NPY_TOSTRING(NPY__CPU_TARGET_CURRENT),
    #else
        .m_name = "numpy.core._simd.baseline",
    #endif
    #if NPY_SIMD
        .m_methods = simd__intrinsics_methods
    #else
        .m_methods = NULL
    #endif
    };
    PyObject *m = PyModule_Create(&defs);
    if (m == NULL) {
        return NULL;
    }
    if (PyModule_AddIntConstant(m, "simd", NPY_SIMD)) {
        goto err;
    }
    if (PyModule_AddIntConstant(m, "simd_f64", NPY_SIMD_F64)) {
        goto err;
    }
    if (PyModule_AddIntConstant(m, "simd_fma3", NPY_SIMD_FMA3)) {
        goto err;
    }
    if (PyModule_AddIntConstant(m, "simd_width", NPY_SIMD_WIDTH)) {
        goto err;
    }
#if NPY_SIMD
    if (PySIMDVectorType_Init(m)) {
        goto err;
    }
    #line 768
    if (PyModule_AddIntConstant(m, "nlanes_u8", npyv_nlanes_u8)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_s8", npyv_nlanes_s8)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_u16", npyv_nlanes_u16)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_s16", npyv_nlanes_s16)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_u32", npyv_nlanes_u32)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_s32", npyv_nlanes_s32)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_u64", npyv_nlanes_u64)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_s64", npyv_nlanes_s64)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_f32", npyv_nlanes_f32)) {
        goto err;
    }
    
#line 768
    if (PyModule_AddIntConstant(m, "nlanes_f64", npyv_nlanes_f64)) {
        goto err;
    }
    
#endif // NPY_SIMD
    return m;
err:
    Py_DECREF(m);
    return NULL;
}

