extract explicit mesh with topology information from implicit surfaces with boolean operations, and do surface/volume integrating on them.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3246 lines
109 KiB

#ifndef CBLAS_H
#define CBLAS_H
#include <stddef.h> // skipped
// We need to #include "bli_type_defs.h" in order to pull in the definition of
// f77_int. But in order to #include that header, we also need to pull in the
// headers that precede it in blis.h.
// NOTE: bli_config.h must be included before any other BLIS header. It is
// bootstrapped by ./configure and does not depend on later headers. Moreover
// these configuration variables are necessary to change some default behaviors
// (e.g. disable OS detection in bli_system.h in case of --disable-system).
// begin bli_config.h
#line 1 ".//bli_config.h"
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
// Enabled configuration "family" (config_name)
#define BLIS_FAMILY_X86_64
// Enabled sub-configurations (config_list)
#define BLIS_CONFIG_SKX
#define BLIS_CONFIG_KNL
#define BLIS_CONFIG_HASWELL
#define BLIS_CONFIG_SANDYBRIDGE
#define BLIS_CONFIG_PENRYN
#define BLIS_CONFIG_ZEN3
#define BLIS_CONFIG_ZEN2
#define BLIS_CONFIG_ZEN
#define BLIS_CONFIG_EXCAVATOR
#define BLIS_CONFIG_STEAMROLLER
#define BLIS_CONFIG_PILEDRIVER
#define BLIS_CONFIG_BULLDOZER
#define BLIS_CONFIG_GENERIC
// Enabled kernel sets (kernel_list)
#define BLIS_KERNELS_SKX
#define BLIS_KERNELS_KNL
#define BLIS_KERNELS_SANDYBRIDGE
#define BLIS_KERNELS_PENRYN
#define BLIS_KERNELS_ZEN3
#define BLIS_KERNELS_ZEN2
#define BLIS_KERNELS_HASWELL
#define BLIS_KERNELS_ZEN
#define BLIS_KERNELS_PILEDRIVER
#define BLIS_KERNELS_BULLDOZER
#define BLIS_KERNELS_GENERIC
// Disabled symbols (symbol_omit_list)
#define BLIS_VERSION_STRING "1.0"
#if 1
#define BLIS_ENABLE_SYSTEM
#else
#define BLIS_DISABLE_SYSTEM
#endif
#if 1
#define BLIS_ENABLE_TLS
#else
#define BLIS_DISABLE_TLS
#endif
#if 1
#define BLIS_ENABLE_OPENMP
#if 1
#define BLIS_ENABLE_OPENMP_AS_DEFAULT
#endif
#endif
#if 0
#define BLIS_ENABLE_PTHREADS
#if 0
#define BLIS_ENABLE_PTHREADS_AS_DEFAULT
#endif
#endif
#if 0
#define BLIS_ENABLE_HPX
#if 0
#define BLIS_ENABLE_HPX_AS_DEFAULT
#endif
#endif
#if 1
#define BLIS_ENABLE_JRIR_SLAB
#endif
#if 0
#define BLIS_ENABLE_JRIR_RR
#endif
#if 0
#define BLIS_ENABLE_JRIR_TLB
#endif
#if 1
#define BLIS_ENABLE_PBA_POOLS
#else
#define BLIS_DISABLE_PBA_POOLS
#endif
#if 1
#define BLIS_ENABLE_SBA_POOLS
#else
#define BLIS_DISABLE_SBA_POOLS
#endif
#if 0
#define BLIS_ENABLE_MEM_TRACING
#else
#define BLIS_DISABLE_MEM_TRACING
#endif
#if 0
#define BLIS_ENABLE_SCALAPACK_COMPAT
#else
#define BLIS_DISABLE_SCALAPACK_COMPAT
#endif
#if 0 == 64
#define BLIS_INT_TYPE_SIZE 64
#elif 0 == 32
#define BLIS_INT_TYPE_SIZE 32
#else
// determine automatically
#endif
#if 32 == 64
#define BLIS_BLAS_INT_TYPE_SIZE 64
#elif 32 == 32
#define BLIS_BLAS_INT_TYPE_SIZE 32
#else
// determine automatically
#endif
#ifndef BLIS_ENABLE_BLAS
#ifndef BLIS_DISABLE_BLAS
#if 1
#define BLIS_ENABLE_BLAS
#else
#define BLIS_DISABLE_BLAS
#endif
#endif
#endif
#ifndef BLIS_ENABLE_CBLAS
#ifndef BLIS_DISABLE_CBLAS
#if 1
#define BLIS_ENABLE_CBLAS
#else
#define BLIS_DISABLE_CBLAS
#endif
#endif
#endif
#if 1
#define BLIS_ENABLE_SUP_HANDLING
#else
#define BLIS_DISABLE_SUP_HANDLING
#endif
#if 0
#define BLIS_ENABLE_MEMKIND
#else
#define BLIS_DISABLE_MEMKIND
#endif
#if 1
#define BLIS_ENABLE_TRSM_PREINVERSION
#else
#define BLIS_DISABLE_TRSM_PREINVERSION
#endif
#if 1
#define BLIS_ENABLE_PRAGMA_OMP_SIMD
#else
#define BLIS_DISABLE_PRAGMA_OMP_SIMD
#endif
#if 0
#define BLIS_ENABLE_SANDBOX
#else
#define BLIS_DISABLE_SANDBOX
#endif
#if 0
#define BLIS_ENABLE_SHARED
#else
#define BLIS_DISABLE_SHARED
#endif
#if 0
#define BLIS_ENABLE_COMPLEX_RETURN_INTEL
#else
#define BLIS_DISABLE_COMPLEX_RETURN_INTEL
#endif
#endif
// end bli_config.h
#line 15 "./frame/compat/cblas/src/cblas.h"
// begin bli_system.h
#line 1 "./frame/include//bli_system.h"
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_SYSTEM_H
#define BLIS_SYSTEM_H
// NOTE: If not yet defined, we define _POSIX_C_SOURCE to make sure that
// various parts of POSIX are defined and made available.
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809L
#endif
#include <stdio.h> // skipped
#include <stdlib.h> // skipped
#include <math.h> // skipped
#include <string.h> // skipped
#include <stdarg.h> // skipped
#include <float.h> // skipped
#include <errno.h> // skipped
#include <ctype.h> // skipped
// Determine the compiler (hopefully) and define conveniently named macros
// accordingly.
#if defined(__ICC) || defined(__INTEL_COMPILER)
#define BLIS_ICC
#elif defined(__clang__)
#define BLIS_CLANG
#elif defined(__GNUC__)
#define BLIS_GCC
#endif
// Determine if we are on a 64-bit or 32-bit architecture.
#if defined(_M_X64) || defined(__x86_64) || defined(__aarch64__) || \
defined(_ARCH_PPC64) || defined(__s390x__) || defined(_LP64)
#define BLIS_ARCH_64
#else
#define BLIS_ARCH_32
#endif
// Determine the target operating system.
#if defined(BLIS_ENABLE_SYSTEM)
#if defined(_WIN32) || defined(__CYGWIN__)
#define BLIS_OS_WINDOWS 1
#elif defined(__gnu_hurd__)
#define BLIS_OS_GNU 1
#elif defined(__APPLE__) || defined(__MACH__)
#define BLIS_OS_OSX 1
#elif defined(__ANDROID__)
#define BLIS_OS_ANDROID 1
#elif defined(__linux__)
#define BLIS_OS_LINUX 1
#elif defined(__bgq__)
#define BLIS_OS_BGQ 1
#elif defined(__bg__)
#define BLIS_OS_BGP 1
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__bsdi__) || defined(__DragonFly__) || \
defined(__FreeBSD_kernel__) || defined(__HAIKU__)
#define BLIS_OS_BSD 1
#elif defined(EMSCRIPTEN)
#define BLIS_OS_EMSCRIPTEN
#else
#error "Cannot determine operating system"
#endif
#else // #if defined(BLIS_DISABLE_SYSTEM)
#define BLIS_OS_NONE
#endif
// A few changes that may be necessary in Windows environments.
#if BLIS_OS_WINDOWS
// Include Windows header file.
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h> // skipped
#if !defined(__clang__) && !defined(__GNUC__)
// Undefine attribute specifiers in Windows.
#define __attribute__(x)
// Undefine restrict.
#define restrict
#endif
#endif
// time.h provides clock_gettime().
#if BLIS_OS_WINDOWS
#include <time.h> // skipped
#elif BLIS_OS_OSX
#include <mach/mach_time.h> // skipped
#else
//#include <sys/time.h>
#include <time.h> // skipped
#endif
#endif
// end bli_system.h
#line 16 "./frame/compat/cblas/src/cblas.h"
// begin bli_config_macro_defs.h
#line 1 "./frame/include//bli_config_macro_defs.h"
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_MACRO_DEFS_H
#define BLIS_CONFIG_MACRO_DEFS_H
// NOTE: This file should ONLY contain processing of macros that are set by
// configure and output into bli_config.h. Any other macro processing --
// especially such as for those macros that are expected to be optionally
// set within a configuration's bli_family_<conf>.h header -- MUST be placed
// in bli_kernel_macro_defs.h instead. The reason: bli_arch_config.h (which
// #includes the configuration's bli_family_<conf>.h header) is #included
// much later in blis.h than this file (bli_config_macro_defs.h), and so any
// macros set in bli_family_<conf>.h would have no effect on the processing
// that happens below.
// -- INTEGER PROPERTIES -------------------------------------------------------
// The bit size of the integer type used to track values such as dimensions,
// strides, diagonal offsets. A value of 32 results in BLIS using 32-bit signed
// integers while 64 results in 64-bit integers. Any other value results in use
// of the C99 type "long int". Note that this ONLY affects integers used
// internally within BLIS as well as those exposed in the native BLAS-like BLIS
// interface.
#ifndef BLIS_INT_TYPE_SIZE
#ifdef BLIS_ARCH_64
#define BLIS_INT_TYPE_SIZE 64
#else
#define BLIS_INT_TYPE_SIZE 32
#endif
#endif
// -- FLOATING-POINT PROPERTIES ------------------------------------------------
// Enable use of built-in C99 "float complex" and "double complex" types and
// associated overloaded operations and functions? Disabling results in
// scomplex and dcomplex being defined in terms of simple structs.
// NOTE: AVOID USING THIS FEATURE. IT IS PROBABLY BROKEN.
#ifdef BLIS_ENABLE_C99_COMPLEX
// No additional definitions needed.
#else
// Default behavior is disabled.
#endif
// -- MEMORY SUBSYSTEM PROPERTIES ----------------------------------------------
// Size of a cache line (in bytes).
#ifndef BLIS_CACHE_LINE_SIZE
#define BLIS_CACHE_LINE_SIZE 64
#endif
// -- MULTITHREADING -----------------------------------------------------------
// Enable caching of queried cntx_t pointers in the gks?
#ifdef BLIS_DISABLE_GKS_CACHING
#undef BLIS_ENABLE_GKS_CACHING
#else
// Default behavior is enabled.
#define BLIS_ENABLE_GKS_CACHING
#endif
// -- MULTITHREADING -----------------------------------------------------------
// Enable multithreading via POSIX threads.
#ifdef BLIS_ENABLE_PTHREADS
// No additional definitions needed.
#else
// Default behavior is disabled.
#endif
// Enable multithreading via OpenMP.
#ifdef BLIS_ENABLE_OPENMP
// No additional definitions needed.
#else
// Default behavior is disabled.
#endif
// Enable multithreading via HPX.
#ifdef BLIS_ENABLE_HPX
// No additional definitions needed.
#else
// Default behavior is disabled.
#endif
// Here, we define BLIS_ENABLE_MULTITHREADING if either OpenMP
// or pthreads are enabled. This macro is useful in situations when
// we want to detect use of either OpenMP or pthreads, or both (as
// opposed to neither being used).
#if defined ( BLIS_ENABLE_OPENMP ) || \
defined ( BLIS_ENABLE_PTHREADS ) || \
defined ( BLIS_ENABLE_HPX )
#define BLIS_ENABLE_MULTITHREADING
#endif
// Enable the use of prime numbers of threads when requesting automatic thread
// factorization. When disabled, requesting a prime number of threads will
// result in a reduction (by one) of the number of threads, provided that the
// prime number exceeds a minimum threshold (see below).
#ifdef BLIS_ENABLE_AUTO_PRIME_NUM_THREADS
#undef BLIS_DISABLE_AUTO_PRIME_NUM_THREADS
#else
// Default behavior is disabled.
#undef BLIS_DISABLE_AUTO_PRIME_NUM_THREADS // In case user explicitly disabled.
#define BLIS_DISABLE_AUTO_PRIME_NUM_THREADS
#endif
// Set the maximum requested number of threads that BLIS will accept from the
// user that may be prime. If a larger prime number of threads is requested,
// it will be reduced by one to allow for more efficient thread factorizations.
// This value will only be used if BLIS_ENABLE_AUTO_PRIME_NUM_THREADS is defined.
#ifndef BLIS_NT_MAX_PRIME
#define BLIS_NT_MAX_PRIME 11
#endif
// -- MISCELLANEOUS OPTIONS ----------------------------------------------------
// Do NOT require the cross-blocksize constraints. That is, do not enforce
// MC % NR = 0 and NC % MR = 0 in bli_kernel_macro_defs.h. These are ONLY
// needed when implementing trsm_r by allowing the right-hand matrix B to
// be triangular.
#ifndef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
#define BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
#endif
// -- BLAS COMPATIBILITY LAYER -------------------------------------------------
// Enable the BLAS compatibility layer?
#ifdef BLIS_DISABLE_BLAS
#undef BLIS_ENABLE_BLAS
#else
// Default behavior is enabled.
#undef BLIS_ENABLE_BLAS // In case user explicitly enabled.
#define BLIS_ENABLE_BLAS
#endif
// The bit size of the integer type used to track values such as dimensions and
// leading dimensions (ie: column strides) within the BLAS compatibility layer.
// A value of 32 results in the compatibility layer using 32-bit signed integers
// while 64 results in 64-bit integers. Any other value results in use of the
// C99 type "long int". Note that this ONLY affects integers used within the
// BLAS compatibility layer.
#ifndef BLIS_BLAS_INT_TYPE_SIZE
#define BLIS_BLAS_INT_TYPE_SIZE 32
#endif
// By default, the level-3 BLAS routines are implemented by directly calling
// the BLIS object API. Alternatively, they may first call the typed BLIS
// API, which will then call the object API.
//#define BLIS_BLAS3_CALLS_TAPI
#ifdef BLIS_BLAS3_CALLS_TAPI
#undef BLIS_BLAS3_CALLS_OAPI
#else
// Default behavior is to call object API directly.
#undef BLIS_BLAS3_CALLS_OAPI // In case user explicitly enabled.
#define BLIS_BLAS3_CALLS_OAPI
#endif
// -- CBLAS COMPATIBILITY LAYER ------------------------------------------------
// Enable the CBLAS compatibility layer?
// NOTE: Enabling CBLAS will automatically enable the BLAS compatibility layer
// regardless of whether or not it was explicitly enabled above. Furthermore,
// the CBLAS compatibility layer will use the integer type size definition
// specified above when defining the size of its own integers (regardless of
// whether the BLAS layer was enabled directly or indirectly).
#ifdef BLIS_ENABLE_CBLAS
// No additional definitions needed.
#else
// Default behavior is disabled.
#endif
// -- SHARED LIBRARY SYMBOL EXPORT ---------------------------------------------
// When building shared libraries, we can control which symbols are exported for
// linking by external applications. BLIS annotates all function prototypes that
// are meant to be "public" with BLIS_EXPORT_BLIS (with BLIS_EXPORT_BLAS playing
// a similar role for BLAS compatibility routines). Which symbols are exported
// is controlled by the default symbol visibility, as specifed by the gcc option
// -fvisibility=[default|hidden]. The default for this option is 'default', or,
// "public", which, if allowed to stand, causes all symbols in BLIS to be
// linkable from the outside. But when compiling with -fvisibility=hidden, all
// symbols start out hidden (that is, restricted only for internal use by BLIS),
// with that setting overridden only for function prototypes or variable
// declarations that are annotated with BLIS_EXPORT_BLIS.
#ifndef BLIS_EXPORT
#if !defined(BLIS_ENABLE_SHARED)
#define BLIS_EXPORT
#else
#if defined(_WIN32) || defined(__CYGWIN__)
#ifdef BLIS_IS_BUILDING_LIBRARY
#define BLIS_EXPORT __declspec(dllexport)
#else
#define BLIS_EXPORT __declspec(dllimport)
#endif
#elif defined(__GNUC__) && __GNUC__ >= 4
#define BLIS_EXPORT __attribute__ ((visibility ("default")))
#else
#define BLIS_EXPORT
#endif
#endif
#endif
#define BLIS_EXPORT_BLIS BLIS_EXPORT
#define BLIS_EXPORT_BLAS BLIS_EXPORT
#define BLIS_EXPORT_ADDON BLIS_EXPORT
// -- OVERRIDABLE (WEAK) SYMBOLS -----------------------------------------------
// On Linux, functions called from a shared library can be overriden by the main
// program simply by providing a new definition. However, macOS uses a "two-level
// namespace" which causes calls to shared library functions to be tied to the
// library and not overridable. As a workaround, certain symbols can be defined
// as "weak" and are given lower preference during linking.
#ifndef BLIS_OVERRIDABLE
#if BLIS_OS_OSX
#define BLIS_OVERRIDABLE __attribute__((weak))
#else
#define BLIS_OVERRIDABLE
#endif
#endif
// -- STATIC INLINE FUNCTIONS --------------------------------------------------
// C and C++ have different semantics for defining "inline" functions. In C,
// the keyword phrase "static inline" accomplishes this, though the "inline"
// is optional. In C++, the "inline" keyword is required and obviates "static"
// altogether. Why does this matter? While BLIS is compiled in C99, blis.h may
// be #included by a source file that is compiled with C++.
#ifdef __cplusplus
#define BLIS_INLINE inline
#else
//#define BLIS_INLINE static inline
#define BLIS_INLINE static
#endif
#endif
// end bli_config_macro_defs.h
#line 17 "./frame/compat/cblas/src/cblas.h"
// begin bli_type_defs.h
#line 1 "./frame/include//bli_type_defs.h"
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
Copyright (C) 2020, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_TYPE_DEFS_H
#define BLIS_TYPE_DEFS_H
//
// -- BLIS basic types ---------------------------------------------------------
//
#ifdef __cplusplus
// For C++, include stdint.h.
#include <cstdint> // skipped
#elif __STDC_VERSION__ >= 199901L
// For C99 (or later), include stdint.h.
#include <stddef.h> // skipped
#include <stdint.h> // skipped
#include <stdbool.h> // skipped
#else
// When stdint.h is not available, manually typedef the types we will use.
#ifdef _WIN32
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
#error "Attempting to compile on pre-C99 system without stdint.h."
#endif
#endif
// -- General-purpose integers --
// If BLAS integers are 64 bits, mandate that BLIS integers also be 64 bits.
// NOTE: This cpp guard will only meaningfully change BLIS's behavior on
// systems where the BLIS integer size would have been automatically selected
// to be 32 bits, since explicit selection of 32 bits is prohibited at
// configure-time (and explicit or automatic selection of 64 bits is fine
// and would have had the same result).
#if BLIS_BLAS_INT_TYPE_SIZE == 64
#undef BLIS_INT_TYPE_SIZE
#define BLIS_INT_TYPE_SIZE 64
#endif
// Define integer types depending on what size integer was requested.
#if BLIS_INT_TYPE_SIZE == 32
typedef int32_t gint_t;
typedef uint32_t guint_t;
#elif BLIS_INT_TYPE_SIZE == 64
typedef int64_t gint_t;
typedef uint64_t guint_t;
#else
typedef signed long int gint_t;
typedef unsigned long int guint_t;
#endif
// -- Boolean type --
// NOTE: bool_t is no longer used and has been replaced with C99's bool type.
//typedef bool bool_t;
// BLIS uses TRUE and FALSE macro constants as possible boolean values, but we
// define these macros in terms of true and false, respectively, which are
// defined by C99 in stdbool.h.
#ifndef TRUE
#define TRUE true
#endif
#ifndef FALSE
#define FALSE false
#endif
// -- Special-purpose integers --
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_DIM_T
#define _DEFINED_DIM_T
typedef gint_t dim_t; // dimension type
#endif
typedef gint_t inc_t; // increment/stride type
typedef gint_t doff_t; // diagonal offset type
typedef guint_t siz_t; // byte size type
typedef uint32_t objbits_t; // object information bit field
// -- Real types --
// Define the number of floating-point types supported, and the size of the
// largest type.
#define BLIS_NUM_FP_TYPES 4
#define BLIS_MAX_TYPE_SIZE sizeof(dcomplex)
// There are some places where we need to use sizeof() inside of a C
// preprocessor #if conditional, and so here we define the various sizes
// for those purposes.
#define BLIS_SIZEOF_S 4 // sizeof(float)
#define BLIS_SIZEOF_D 8 // sizeof(double)
#define BLIS_SIZEOF_C 8 // sizeof(scomplex)
#define BLIS_SIZEOF_Z 16 // sizeof(dcomplex)
// -- Complex types --
#if defined(__cplusplus) && defined(BLIS_ENABLE_STD_COMPLEX)
} //extern "C"
#include <complex> // skipped
// Typedef official C++ complex types to BLIS complex type names.
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_SCOMPLEX
#define _DEFINED_SCOMPLEX
typedef std::complex<float> scomplex;
#endif
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_DCOMPLEX
#define _DEFINED_DCOMPLEX
typedef std::complex<double> dcomplex;
#endif
extern "C"
{
#elif defined(BLIS_ENABLE_C99_COMPLEX)
#if __STDC_VERSION__ >= 199901L
#include <complex.h> // skipped
// Typedef official C99 complex types to BLIS complex type names.
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_SCOMPLEX
#define _DEFINED_SCOMPLEX
typedef float complex scomplex;
#endif
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_DCOMPLEX
#define _DEFINED_DCOMPLEX
typedef double complex dcomplex;
#endif
#else
#error "Configuration requested C99 complex types, but C99 does not appear to be supported."
#endif
#else // ifndef BLIS_ENABLE_C99_COMPLEX
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_SCOMPLEX
#define _DEFINED_SCOMPLEX
typedef struct scomplex
{
float real;
float imag;
} scomplex;
#endif
// This cpp guard provides a temporary hack to allow libflame
// interoperability with BLIS.
#ifndef _DEFINED_DCOMPLEX
#define _DEFINED_DCOMPLEX
typedef struct dcomplex
{
double real;
double imag;
} dcomplex;
#endif
#endif // BLIS_ENABLE_C99_COMPLEX
// -- Atom type --
// Note: atom types are used to hold "bufferless" scalar object values. Note
// that it needs to be as large as the largest possible scalar value we might
// want to hold. Thus, for now, it is a dcomplex.
typedef dcomplex atom_t;
// -- Fortran-77 types --
// Note: These types are typically only used by BLAS compatibility layer, but
// we must define them even when the compatibility layer isn't being built
// because they also occur in bli_slamch() and bli_dlamch().
// Define f77_int depending on what size of integer was requested.
#if BLIS_BLAS_INT_TYPE_SIZE == 32
typedef int32_t f77_int;
#elif BLIS_BLAS_INT_TYPE_SIZE == 64
typedef int64_t f77_int;
#else
typedef long int f77_int;
#endif
typedef char f77_char;
typedef float f77_float;
typedef double f77_double;
typedef scomplex f77_scomplex;
typedef dcomplex f77_dcomplex;
// -- Misc. function pointer types --
// Note: This type should be used in any situation where the address of a
// *function* will be conveyed or stored prior to it being typecast back
// to the correct function type. It does not need to be used when conveying
// or storing the address of *data* (such as an array of float or double).
//typedef void (*void_fp)( void );
typedef void* void_fp;
// Typedef function pointer types for malloc() and free() substitutes.
typedef void* (*malloc_ft)( size_t size );
typedef void (*free_ft) ( void* p );
//
// -- BLIS info bit field sizes ------------------------------------------------
//
#define BLIS_DATATYPE_NUM_BITS ( BLIS_DOMAIN_NUM_BITS + BLIS_PRECISION_NUM_BITS )
#define BLIS_DOMAIN_NUM_BITS 1
#define BLIS_PRECISION_NUM_BITS 2
#define BLIS_CONJTRANS_NUM_BITS ( BLIS_TRANS_NUM_BITS + BLIS_CONJ_NUM_BITS )
#define BLIS_TRANS_NUM_BITS 1
#define BLIS_CONJ_NUM_BITS 1
#define BLIS_UPLO_NUM_BITS ( BLIS_UPPER_NUM_BITS + BLIS_DIAG_NUM_BITS + BLIS_LOWER_NUM_BITS )
#define BLIS_UPPER_NUM_BITS 1
#define BLIS_DIAG_NUM_BITS 1
#define BLIS_LOWER_NUM_BITS 1
#define BLIS_UNIT_DIAG_NUM_BITS 1
#define BLIS_INVERT_DIAG_NUM_BITS 1
#define BLIS_PACK_SCHEMA_NUM_BITS ( BLIS_PACK_PANEL_NUM_BITS + BLIS_PACK_FORMAT_NUM_BITS + BLIS_PACK_NUM_BITS )
#define BLIS_PACK_PANEL_NUM_BITS 1
#define BLIS_PACK_FORMAT_NUM_BITS 4
#define BLIS_PACK_NUM_BITS 1
#define BLIS_PACK_REV_IF_UPPER_NUM_BITS 1
#define BLIS_PACK_REV_IF_LOWER_NUM_BITS 1
#define BLIS_PACK_BUFFER_NUM_BITS 2
#define BLIS_STRUC_NUM_BITS 2
//
// -- BLIS info bit field offsets ----------------------------------------------
//
#define BLIS_DATATYPE_SHIFT 0
#define BLIS_DOMAIN_SHIFT ( BLIS_DATATYPE_SHIFT )
#define BLIS_PRECISION_SHIFT ( BLIS_DOMAIN_SHIFT + BLIS_DOMAIN_NUM_BITS )
#define BLIS_CONJTRANS_SHIFT ( BLIS_DATATYPE_SHIFT + BLIS_DATATYPE_NUM_BITS )
#define BLIS_TRANS_SHIFT ( BLIS_CONJTRANS_SHIFT )
#define BLIS_CONJ_SHIFT ( BLIS_TRANS_SHIFT + BLIS_TRANS_NUM_BITS )
#define BLIS_UPLO_SHIFT ( BLIS_CONJTRANS_SHIFT + BLIS_CONJTRANS_NUM_BITS )
#define BLIS_UPPER_SHIFT ( BLIS_UPLO_SHIFT )
#define BLIS_DIAG_SHIFT ( BLIS_UPPER_SHIFT + BLIS_UPPER_NUM_BITS )
#define BLIS_LOWER_SHIFT ( BLIS_DIAG_SHIFT + BLIS_DIAG_NUM_BITS )
#define BLIS_UNIT_DIAG_SHIFT ( BLIS_UPLO_SHIFT + BLIS_UPLO_NUM_BITS )
#define BLIS_INVERT_DIAG_SHIFT ( BLIS_UNIT_DIAG_SHIFT + BLIS_UNIT_DIAG_NUM_BITS )
#define BLIS_PACK_SCHEMA_SHIFT ( BLIS_INVERT_DIAG_SHIFT + BLIS_INVERT_DIAG_NUM_BITS )
#define BLIS_PACK_PANEL_SHIFT ( BLIS_PACK_SCHEMA_SHIFT )
#define BLIS_PACK_FORMAT_SHIFT ( BLIS_PACK_PANEL_SHIFT + BLIS_PACK_PANEL_NUM_BITS )
#define BLIS_PACK_SHIFT ( BLIS_PACK_FORMAT_SHIFT + BLIS_PACK_FORMAT_NUM_BITS )
#define BLIS_PACK_REV_IF_UPPER_SHIFT ( BLIS_PACK_SCHEMA_SHIFT + BLIS_PACK_SCHEMA_NUM_BITS )
#define BLIS_PACK_REV_IF_LOWER_SHIFT ( BLIS_PACK_REV_IF_UPPER_SHIFT + BLIS_PACK_REV_IF_UPPER_NUM_BITS )
#define BLIS_PACK_BUFFER_SHIFT ( BLIS_PACK_REV_IF_LOWER_SHIFT + BLIS_PACK_REV_IF_LOWER_NUM_BITS )
#define BLIS_STRUC_SHIFT ( BLIS_PACK_BUFFER_SHIFT + BLIS_PACK_BUFFER_NUM_BITS )
#define BLIS_COMP_PREC_SHIFT ( BLIS_STRUC_SHIFT + BLIS_STRUC_NUM_BITS )
#define BLIS_SCALAR_DT_SHIFT ( BLIS_COMP_PREC_SHIFT + BLIS_PRECISION_NUM_BITS )
#define BLIS_SCALAR_DOMAIN_SHIFT ( BLIS_SCALAR_DT_SHIFT )
#define BLIS_SCALAR_PREC_SHIFT ( BLIS_SCALAR_DOMAIN_SHIFT + BLIS_DOMAIN_NUM_BITS )
// This is the total number of bits, which should always be <= 32
#define BLIS_INFO_NUM_BITS ( BLIS_SCALAR_DT_SHIFT + BLIS_DATATYPE_NUM_BITS )
//
// -- BLIS info bit field masks ------------------------------------------------
//
#define BLIS_DATATYPE_BITS ( ( ( 1 << BLIS_DATATYPE_NUM_BITS ) - 1 ) << BLIS_DATATYPE_SHIFT )
#define BLIS_DOMAIN_BIT ( ( ( 1 << BLIS_DOMAIN_NUM_BITS ) - 1 ) << BLIS_DOMAIN_SHIFT )
#define BLIS_PRECISION_BIT ( ( ( 1 << BLIS_PRECISION_NUM_BITS ) - 1 ) << BLIS_PRECISION_SHIFT )
#define BLIS_CONJTRANS_BITS ( ( ( 1 << BLIS_CONJTRANS_NUM_BITS ) - 1 ) << BLIS_CONJTRANS_SHIFT )
#define BLIS_TRANS_BIT ( ( ( 1 << BLIS_TRANS_NUM_BITS ) - 1 ) << BLIS_TRANS_SHIFT )
#define BLIS_CONJ_BIT ( ( ( 1 << BLIS_CONJ_NUM_BITS ) - 1 ) << BLIS_CONJ_SHIFT )
#define BLIS_UPLO_BITS ( ( ( 1 << BLIS_UPLO_NUM_BITS ) - 1 ) << BLIS_UPLO_SHIFT )
#define BLIS_UPPER_BIT ( ( ( 1 << BLIS_UPPER_NUM_BITS ) - 1 ) << BLIS_UPPER_SHIFT )
#define BLIS_DIAG_BIT ( ( ( 1 << BLIS_DIAG_NUM_BITS ) - 1 ) << BLIS_DIAG_SHIFT )
#define BLIS_LOWER_BIT ( ( ( 1 << BLIS_LOWER_NUM_BITS ) - 1 ) << BLIS_LOWER_SHIFT )
#define BLIS_UNIT_DIAG_BIT ( ( ( 1 << BLIS_UNIT_DIAG_NUM_BITS ) - 1 ) << BLIS_UNIT_DIAG_SHIFT )
#define BLIS_INVERT_DIAG_BIT ( ( ( 1 << BLIS_INVERT_DIAG_NUM_BITS ) - 1 ) << BLIS_INVERT_DIAG_SHIFT )
#define BLIS_PACK_SCHEMA_BITS ( ( ( 1 << BLIS_PACK_SCHEMA_NUM_BITS ) - 1 ) << BLIS_PACK_SCHEMA_SHIFT )
#define BLIS_PACK_PANEL_BIT ( ( ( 1 << BLIS_PACK_PANEL_NUM_BITS ) - 1 ) << BLIS_PACK_PANEL_SHIFT )
#define BLIS_PACK_FORMAT_BITS ( ( ( 1 << BLIS_PACK_FORMAT_NUM_BITS ) - 1 ) << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_PACK_BIT ( ( ( 1 << BLIS_PACK_NUM_BITS ) - 1 ) << BLIS_PACK_SHIFT )
#define BLIS_PACK_REV_IF_UPPER_BIT ( ( ( 1 << BLIS_PACK_REV_IF_UPPER_NUM_BITS ) - 1 ) << BLIS_PACK_REV_IF_UPPER_SHIFT )
#define BLIS_PACK_REV_IF_LOWER_BIT ( ( ( 1 << BLIS_PACK_REV_IF_LOWER_NUM_BITS ) - 1 ) << BLIS_PACK_REV_IF_LOWER_SHIFT )
#define BLIS_PACK_BUFFER_BITS ( ( ( 1 << BLIS_PACK_BUFFER_NUM_BITS ) - 1 ) << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_STRUC_BITS ( ( ( 1 << BLIS_STRUC_NUM_BITS ) - 1 ) << BLIS_STRUC_SHIFT )
#define BLIS_COMP_PREC_BIT ( ( ( 1 << BLIS_PRECISION_NUM_BITS ) - 1 ) << BLIS_COMP_PREC_SHIFT )
#define BLIS_SCALAR_DT_BITS ( ( ( 1 << BLIS_DATATYPE_NUM_BITS ) - 1 ) << BLIS_SCALAR_DT_SHIFT )
#define BLIS_SCALAR_DOMAIN_BIT ( ( ( 1 << BLIS_DOMAIN_NUM_BITS ) - 1 ) << BLIS_SCALAR_DOMAIN_SHIFT )
#define BLIS_SCALAR_PREC_BIT ( ( ( 1 << BLIS_PRECISION_NUM_BITS ) - 1 ) << BLIS_SCALAR_PREC_SHIFT )
//
// -- BLIS enumerated type value definitions -----------------------------------
//
#define BLIS_BITVAL_REAL 0x0
#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT
#define BLIS_BITVAL_SINGLE_PREC 0x0
#define BLIS_BITVAL_DOUBLE_PREC ( 0x1 << BLIS_PRECISION_SHIFT )
#define BLIS_BITVAL_FLOAT_TYPE 0x0
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
#define BLIS_BITVAL_DOUBLE_TYPE BLIS_BITVAL_DOUBLE_PREC
#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_BITVAL_DOUBLE_PREC )
#define BLIS_BITVAL_INT_TYPE 0x04
#define BLIS_BITVAL_CONST_TYPE 0x05
#define BLIS_BITVAL_NO_TRANS 0x0
#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT
#define BLIS_BITVAL_NO_CONJ 0x0
#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
#define BLIS_BITVAL_ZEROS 0x0
#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
#define BLIS_BITVAL_NOT_PACKED 0x0
#define BLIS_BITVAL_1E ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_1R ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RO ( 0x3 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_PACKED_UNSPEC ( BLIS_PACK_BIT )
#define BLIS_BITVAL_PACKED_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_PANELS_1R ( BLIS_PACK_BIT | BLIS_BITVAL_1R | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT
#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0
#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_GENERAL 0x0
#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT )
//
// -- BLIS enumerated type definitions -----------------------------------------
//
// -- Operational parameter types --
typedef enum
{
BLIS_NO_TRANSPOSE = 0x0,
BLIS_TRANSPOSE = BLIS_BITVAL_TRANS,
BLIS_CONJ_NO_TRANSPOSE = BLIS_BITVAL_CONJ,
BLIS_CONJ_TRANSPOSE = BLIS_BITVAL_CONJ_TRANS
} trans_t;
typedef enum
{
BLIS_NO_CONJUGATE = 0x0,
BLIS_CONJUGATE = BLIS_BITVAL_CONJ
} conj_t;
typedef enum
{
BLIS_ZEROS = BLIS_BITVAL_ZEROS,
BLIS_LOWER = BLIS_BITVAL_LOWER,
BLIS_UPPER = BLIS_BITVAL_UPPER,
BLIS_DENSE = BLIS_BITVAL_DENSE
} uplo_t;
typedef enum
{
BLIS_LEFT = 0x0,
BLIS_RIGHT
} side_t;
typedef enum
{
BLIS_NONUNIT_DIAG = 0x0,
BLIS_UNIT_DIAG = BLIS_BITVAL_UNIT_DIAG
} diag_t;
typedef enum
{
BLIS_NO_INVERT_DIAG = 0x0,
BLIS_INVERT_DIAG = BLIS_BITVAL_INVERT_DIAG
} invdiag_t;
typedef enum
{
BLIS_GENERAL = BLIS_BITVAL_GENERAL,
BLIS_HERMITIAN = BLIS_BITVAL_HERMITIAN,
BLIS_SYMMETRIC = BLIS_BITVAL_SYMMETRIC,
BLIS_TRIANGULAR = BLIS_BITVAL_TRIANGULAR
} struc_t;
// -- Data type --
typedef enum
{
BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE,
BLIS_DOUBLE = BLIS_BITVAL_DOUBLE_TYPE,
BLIS_SCOMPLEX = BLIS_BITVAL_SCOMPLEX_TYPE,
BLIS_DCOMPLEX = BLIS_BITVAL_DCOMPLEX_TYPE,
BLIS_INT = BLIS_BITVAL_INT_TYPE,
BLIS_CONSTANT = BLIS_BITVAL_CONST_TYPE,
BLIS_DT_LO = BLIS_FLOAT,
BLIS_DT_HI = BLIS_DCOMPLEX
} num_t;
typedef enum
{
BLIS_REAL = BLIS_BITVAL_REAL,
BLIS_COMPLEX = BLIS_BITVAL_COMPLEX
} dom_t;
typedef enum
{
BLIS_SINGLE_PREC = BLIS_BITVAL_SINGLE_PREC,
BLIS_DOUBLE_PREC = BLIS_BITVAL_DOUBLE_PREC
} prec_t;
// -- Pack schema type --
typedef enum
{
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_MATRIX = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_PANELS = BLIS_BITVAL_PACKED_PANELS,
BLIS_PACKED_PANELS_1E = BLIS_BITVAL_PACKED_PANELS_1E,
BLIS_PACKED_PANELS_1R = BLIS_BITVAL_PACKED_PANELS_1R,
BLIS_PACKED_PANELS_RO = BLIS_BITVAL_PACKED_PANELS_RO,
// BLIS_NUM_PACK_SCHEMA_TYPES must be last!
// We start with BLIS_PACKED_PANELS.
BLIS_NUM_PACK_SCHEMA_TYPES_,
BLIS_NUM_PACK_SCHEMA_TYPES = ((( BLIS_NUM_PACK_SCHEMA_TYPES_ - BLIS_PACKED_PANELS - 1 ) >> BLIS_PACK_FORMAT_SHIFT ) + 1)
} pack_t;
// -- Pack order type --
typedef enum
{
BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER,
BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER,
BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER,
BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER
} packord_t;
// -- Pack buffer type --
typedef enum
{
BLIS_BUFFER_FOR_A_BLOCK = BLIS_BITVAL_BUFFER_FOR_A_BLOCK,
BLIS_BUFFER_FOR_B_PANEL = BLIS_BITVAL_BUFFER_FOR_B_PANEL,
BLIS_BUFFER_FOR_C_PANEL = BLIS_BITVAL_BUFFER_FOR_C_PANEL,
BLIS_BUFFER_FOR_GEN_USE = BLIS_BITVAL_BUFFER_FOR_GEN_USE
} packbuf_t;
// -- Partitioning direction --
typedef enum
{
BLIS_FWD,
BLIS_BWD
} dir_t;
// -- Subpartition type --
typedef enum
{
BLIS_SUBPART0,
BLIS_SUBPART1,
BLIS_SUBPART2,
BLIS_SUBPART1AND0,
BLIS_SUBPART1AND2,
BLIS_SUBPART1A,
BLIS_SUBPART1B,
BLIS_SUBPART00,
BLIS_SUBPART10,
BLIS_SUBPART20,
BLIS_SUBPART01,
BLIS_SUBPART11,
BLIS_SUBPART21,
BLIS_SUBPART02,
BLIS_SUBPART12,
BLIS_SUBPART22
} subpart_t;
// -- Matrix dimension type --
typedef enum
{
BLIS_M = 0,
BLIS_N = 1
} mdim_t;
// -- Machine parameter types --
typedef enum
{
BLIS_MACH_EPS = 0,
BLIS_MACH_SFMIN,
BLIS_MACH_BASE,
BLIS_MACH_PREC,
BLIS_MACH_NDIGMANT,
BLIS_MACH_RND,
BLIS_MACH_EMIN,
BLIS_MACH_RMIN,
BLIS_MACH_EMAX,
BLIS_MACH_RMAX,
BLIS_MACH_EPS2,
// BLIS_NUM_MACH_PARAMS must be last!
BLIS_NUM_MACH_PARAMS
} machval_t;
#define BLIS_MACH_PARAM_FIRST BLIS_MACH_EPS
#define BLIS_MACH_PARAM_LAST BLIS_MACH_EPS2
// -- Induced method types --
typedef enum
{
BLIS_1M = 0,
BLIS_NAT,
BLIS_IND_FIRST = 0,
BLIS_IND_LAST = BLIS_NAT,
// BLIS_NUM_IND_METHODS must be last!
BLIS_NUM_IND_METHODS
} ind_t;
// These are used in bli_l3_*_oapi.c to construct the ind_t values from
// the induced method substrings that go into function names.
#define bli_1m BLIS_1M
#define bli_nat BLIS_NAT
// -- Threading implementation type --
typedef enum
{
BLIS_SINGLE = 0,
BLIS_OPENMP,
BLIS_POSIX,
BLIS_HPX,
// BLIS_NUM_THREAD_IMPLS must be last!
BLIS_NUM_THREAD_IMPLS
} timpl_t;
// -- Kernel ID types --
// Encode the number of independent type parameters in the high
// bits of the kernel ID. This lets us identify kernel IDs as the
// appropriate type while also using them as linear indices after
// masking out these bits.
#define BLIS_NTYPE_KER_SHIFT 28
#define BLIS_NTYPE_KER_BITS (0xFu << BLIS_NTYPE_KER_SHIFT)
#define BLIS_1TYPE_KER ( 0u << BLIS_NTYPE_KER_SHIFT)
#define BLIS_2TYPE_KER ( 1u << BLIS_NTYPE_KER_SHIFT)
#define BLIS_3TYPE_KER ( 2u << BLIS_NTYPE_KER_SHIFT)
#define bli_ker_idx( ker ) ((ker) & ~BLIS_NTYPE_KER_BITS)
#define bli_ker_ntype( ker ) ((((ker) & BLIS_NTYPE_KER_BITS) >> BLIS_NTYPE_KER_SHIFT) + 1)
// Sentinel constant used to indicate the end of a variable argument function
// (See bli_cntx.c)
#define BLIS_VA_END ((siz_t)-1)
typedef enum
{
// -- Single-type kernels --
// l1v kernels
BLIS_ADDV_KER = BLIS_1TYPE_KER,
BLIS_AMAXV_KER,
BLIS_AXPBYV_KER,
BLIS_AXPYV_KER,
BLIS_COPYV_KER,
BLIS_DOTV_KER,
BLIS_DOTXV_KER,
BLIS_INVERTV_KER,
BLIS_INVSCALV_KER,
BLIS_SCALV_KER,
BLIS_SCAL2V_KER,
BLIS_SETV_KER,
BLIS_SUBV_KER,
BLIS_SWAPV_KER,
BLIS_XPBYV_KER,
BLIS_AXPY2V_KER,
BLIS_DOTAXPYV_KER,
// l1f kernels
BLIS_AXPYF_KER,
BLIS_DOTXF_KER,
BLIS_DOTXAXPYF_KER,
// l3 native kernels
BLIS_GEMMTRSM_L_UKR,
BLIS_GEMMTRSM_U_UKR,
BLIS_TRSM_L_UKR,
BLIS_TRSM_U_UKR,
// l3 1m kernels
BLIS_GEMMTRSM1M_L_UKR,
BLIS_GEMMTRSM1M_U_UKR,
// gemmsup kernels
BLIS_GEMMSUP_RRR_UKR,
BLIS_GEMMSUP_RRC_UKR,
BLIS_GEMMSUP_RCR_UKR,
BLIS_GEMMSUP_RCC_UKR,
BLIS_GEMMSUP_CRR_UKR,
BLIS_GEMMSUP_CRC_UKR,
BLIS_GEMMSUP_CCR_UKR,
BLIS_GEMMSUP_CCC_UKR,
BLIS_GEMMSUP_XXX_UKR,
// BLIS_NUM_UKRS must after all 1-type kernels and before 2-type kernels!
BLIS_NUM_UKRS_, BLIS_NUM_UKRS = bli_ker_idx( BLIS_NUM_UKRS_ ),
// -- Two-type kernels --
// pack kernels
BLIS_PACKM_KER = BLIS_2TYPE_KER,
BLIS_PACKM_1ER_KER,
BLIS_PACKM_RO_KER,
BLIS_PACKM_DIAG_KER,
BLIS_PACKM_DIAG_1ER_KER,
BLIS_PACKM_DIAG_RO_KER,
// unpack kernels
BLIS_UNPACKM_KER,
// l3 native kernels
BLIS_GEMM_UKR,
// l3 1m kernels
BLIS_GEMM1M_UKR,
// mixed-domain kernels
BLIS_GEMM_CCR_UKR,
BLIS_GEMM_RCC_UKR,
BLIS_GEMM_CRR_UKR,
// BLIS_NUM_UKR2S must come after all kernels!
BLIS_NUM_UKR2S_, BLIS_NUM_UKR2S = bli_ker_idx( BLIS_NUM_UKR2S_ ),
// Force the size of ukr_t values to be as large as siz_t
BLIS_UKRS_END_ = BLIS_VA_END
} ukr_t;
typedef enum
{
// l3 kernel row preferences
BLIS_GEMM_UKR_ROW_PREF,
BLIS_GEMMTRSM_L_UKR_ROW_PREF,
BLIS_GEMMTRSM_U_UKR_ROW_PREF,
BLIS_TRSM_L_UKR_ROW_PREF,
BLIS_TRSM_U_UKR_ROW_PREF,
// gemmsup kernel row preferences
BLIS_GEMMSUP_RRR_UKR_ROW_PREF,
BLIS_GEMMSUP_RRC_UKR_ROW_PREF,
BLIS_GEMMSUP_RCR_UKR_ROW_PREF,
BLIS_GEMMSUP_RCC_UKR_ROW_PREF,
BLIS_GEMMSUP_CRR_UKR_ROW_PREF,
BLIS_GEMMSUP_CRC_UKR_ROW_PREF,
BLIS_GEMMSUP_CCR_UKR_ROW_PREF,
BLIS_GEMMSUP_CCC_UKR_ROW_PREF,
BLIS_GEMMSUP_XXX_UKR_ROW_PREF,
// BLIS_NUM_UKR_PREFS must be last!
BLIS_NUM_UKR_PREFS,
// Force the size of ukr_pref_t values to be as large as siz_t
BLIS_UKR_PREFS_END_ = BLIS_VA_END
} ukr_pref_t;
typedef enum
{
BLIS_REFERENCE_UKERNEL = 0,
BLIS_VIRTUAL_UKERNEL,
BLIS_OPTIMIZED_UKERNEL,
BLIS_NOTAPPLIC_UKERNEL,
// BLIS_NUM_UKR_IMPL_TYPES must be last!
BLIS_NUM_UKR_IMPL_TYPES
} kimpl_t;
#if 0
typedef enum
{
// RV = row-stored, contiguous vector-loading
// RG = row-stored, non-contiguous gather-loading
// CV = column-stored, contiguous vector-loading
// CG = column-stored, non-contiguous gather-loading
// RD = row-stored, dot-based
// CD = col-stored, dot-based
// RC = row-stored, column-times-column
// CR = column-stored, row-times-row
// GX = general-stored generic implementation
BLIS_GEMMSUP_RV_UKR = 0,
BLIS_GEMMSUP_RG_UKR,
BLIS_GEMMSUP_CV_UKR,
BLIS_GEMMSUP_CG_UKR,
BLIS_GEMMSUP_RD_UKR,
BLIS_GEMMSUP_CD_UKR,
BLIS_GEMMSUP_RC_UKR,
BLIS_GEMMSUP_CR_UKR,
BLIS_GEMMSUP_GX_UKR,
// BLIS_NUM_LEVEL3_SUP_UKRS must be last!
BLIS_NUM_LEVEL3_SUP_UKRS
} l3sup_t;
#endif
typedef enum
{
// 3-operand storage combinations
BLIS_RRR = 0,
BLIS_RRC, // 1
BLIS_RCR, // 2
BLIS_RCC, // 3
BLIS_CRR, // 4
BLIS_CRC, // 5
BLIS_CCR, // 6
BLIS_CCC, // 7
BLIS_XXX, // 8
#if 0
BLIS_RRG,
BLIS_RCG,
BLIS_RGR,
BLIS_RGC,
BLIS_RGG,
BLIS_CRG,
BLIS_CCG,
BLIS_CGR,
BLIS_CGC,
BLIS_CGG,
BLIS_GRR,
BLIS_GRC,
BLIS_GRG,
BLIS_GCR,
BLIS_GCC,
BLIS_GCG,
BLIS_GGR,
BLIS_GGC,
BLIS_GGG,
#endif
// BLIS_NUM_3OP_RC_COMBOS must be last!
BLIS_NUM_3OP_RC_COMBOS
} stor3_t;
#if 0
typedef enum
{
BLIS_JC_IDX = 0,
BLIS_PC_IDX,
BLIS_IC_IDX,
BLIS_JR_IDX,
BLIS_IR_IDX,
BLIS_PR_IDX
} thridx_t;
#endif
#define BLIS_NUM_LOOPS 6
// -- Operation ID type --
typedef enum
{
//
// NOTE: If/when additional type values are added to this enum,
// you must either:
// - keep the level-3 values (starting with _GEMM) beginning at
// index 0; or
// - if the value range is moved such that it does not begin at
// index 0, implement something like a BLIS_OPID_LEVEL3_RANGE_START
// value that can be subtracted from the opid_t value to map it
// to a zero-based range.
// This is needed because these level-3 opid_t values are used in
// bli_l3_ind.c to index into arrays.
//
BLIS_GEMM = 0,
BLIS_GEMMT,
BLIS_HEMM,
BLIS_HERK,
BLIS_HER2K,
BLIS_SYMM,
BLIS_SYRK,
BLIS_SYR2K,
BLIS_TRMM3,
BLIS_TRMM,
BLIS_TRSM,
// BLIS_NOID (= BLIS_NUM_LEVEL3_OPS) must be last!
BLIS_NOID,
BLIS_NUM_LEVEL3_OPS = BLIS_NOID,
// Force the size of opid_t values to be as large as siz_t
BLIS_LEVEL3_OPS_END_ = BLIS_VA_END
} opid_t;
// -- Blocksize ID type --
typedef enum
{
// NOTE: the level-3 blocksizes MUST be indexed starting at zero.
// At one point, we made this assumption in bli_cntx_set_blkszs()
// and friends.
BLIS_KR,
BLIS_MR,
BLIS_NR,
BLIS_MC,
BLIS_KC,
BLIS_NC,
// broadcast factors for packing
BLIS_BBM,
BLIS_BBN,
// level-2 blocksizes
BLIS_M2, // level-2 blocksize in m dimension
BLIS_N2, // level-2 blocksize in n dimension
// level-1f blocksizes
BLIS_AF, // level-1f axpyf fusing factor
BLIS_DF, // level-1f dotxf fusing factor
BLIS_XF, // level-1f dotxaxpyf fusing factor
// gemmsup thresholds
BLIS_MT, // level-3 small/unpacked matrix threshold in m dimension
BLIS_NT, // level-3 small/unpacked matrix threshold in n dimension
BLIS_KT, // level-3 small/unpacked matrix threshold in k dimension
// gemmsup block sizes
BLIS_KR_SUP,
BLIS_MR_SUP,
BLIS_NR_SUP,
BLIS_MC_SUP,
BLIS_KC_SUP,
BLIS_NC_SUP,
// BLIS_NO_PART (= BLIS_NUM_BLKSZS) must be last!
BLIS_NO_PART, // used as a placeholder when blocksizes are not applicable,
// such as when characterizing a packm operation.
BLIS_NUM_BLKSZS = BLIS_NO_PART,
// Force the size of bszid_t values to be as large as siz_t
BLIS_BLKSZS_END_ = BLIS_VA_END
} bszid_t;
// A convenient version of the BLIS_XX block size IDs which can be used in bitfields.
enum
{
BLIS_THREAD_NONE = 0,
BLIS_THREAD_KR = 1 << BLIS_KR,
BLIS_THREAD_MR = 1 << BLIS_MR,
BLIS_THREAD_NR = 1 << BLIS_NR,
BLIS_THREAD_MC = 1 << BLIS_MC,
BLIS_THREAD_KC = 1 << BLIS_KC,
BLIS_THREAD_NC = 1 << BLIS_NC,
};
// -- Architecture ID type --
// NOTE: This typedef enum must be kept up-to-date with the arch_t
// string array in bli_arch.c. Whenever values are added/inserted
// OR if values are rearranged, be sure to update the string array
// in bli_arch.c.
typedef enum
{
// NOTE: The C language standard guarantees that the first enum value
// starts at 0.
// Intel
BLIS_ARCH_SKX,
BLIS_ARCH_KNL,
BLIS_ARCH_KNC,
BLIS_ARCH_HASWELL,
BLIS_ARCH_SANDYBRIDGE,
BLIS_ARCH_PENRYN,
// AMD
BLIS_ARCH_ZEN3,
BLIS_ARCH_ZEN2,
BLIS_ARCH_ZEN,
BLIS_ARCH_EXCAVATOR,
BLIS_ARCH_STEAMROLLER,
BLIS_ARCH_PILEDRIVER,
BLIS_ARCH_BULLDOZER,
// ARM-SVE
BLIS_ARCH_ARMSVE,
BLIS_ARCH_A64FX,
// ARM-NEON (4 pipes x 128-bit vectors)
BLIS_ARCH_ALTRAMAX,
BLIS_ARCH_ALTRA,
BLIS_ARCH_FIRESTORM,
// ARM (2 pipes x 128-bit vectors)
BLIS_ARCH_THUNDERX2,
BLIS_ARCH_CORTEXA57,
BLIS_ARCH_CORTEXA53,
// ARM 32-bit (vintage)
BLIS_ARCH_CORTEXA15,
BLIS_ARCH_CORTEXA9,
// IBM/Power
BLIS_ARCH_POWER10,
BLIS_ARCH_POWER9,
BLIS_ARCH_POWER7,
BLIS_ARCH_BGQ,
// RISC-V
BLIS_ARCH_RV32I,
BLIS_ARCH_RV64I,
BLIS_ARCH_RV32IV,
BLIS_ARCH_RV64IV,
// SiFive
BLIS_ARCH_SIFIVE_X280,
// Generic architecture/configuration
BLIS_ARCH_GENERIC,
// The total number of defined architectures. This must be last in the
// list of enums since its definition assumes that the previous enum
// value (BLIS_ARCH_GENERIC) is given index num_archs-1.
BLIS_NUM_ARCHS
} arch_t;
//
// -- BLIS misc. structure types -----------------------------------------------
//
// This header must be included here (or earlier) because definitions it
// provides are needed in the pool_t and related structs.
// begin bli_pthread.h
#line 1 "./frame/thread//bli_pthread.h"
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, Southern Methodist University
Copyright (C) 2018, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_PTHREAD_H
#define BLIS_PTHREAD_H
// -- Type and macro definitions -----------------------------------------------
#if defined(BLIS_DISABLE_SYSTEM)
// This branch defines a pthread-like API, bli_pthread_*(), and implements it
// in terms of "dummy" code that doesn't depend on POSIX threads or any other
// threading mechanism. See issue #454 to see the use case that prompted this
// feature.
// NOTE: THIS CODE DOES NOT IMPLEMENT THREADING AND IS NOT THREAD-SAFE!
// -- pthread types --
typedef int bli_pthread_t;
typedef int bli_pthread_attr_t;
typedef int bli_pthread_mutex_t;
typedef int bli_pthread_mutexattr_t;
typedef int bli_pthread_cond_t;
typedef int bli_pthread_condattr_t;
typedef int bli_pthread_once_t;
typedef int bli_pthread_barrier_t;
typedef int bli_pthread_barrierattr_t;
// -- pthreads macros --
#define BLIS_PTHREAD_MUTEX_INITIALIZER 0
#define BLIS_PTHREAD_COND_INITIALIZER 0
#define BLIS_PTHREAD_ONCE_INIT 0
#elif defined(_MSC_VER) // !defined(BLIS_DISABLE_SYSTEM)
// This branch defines a pthread-like API, bli_pthread_*(), and implements it
// in terms of Windows API calls.
// -- pthread types --
typedef struct
{
HANDLE handle;
void* retval;
} bli_pthread_t;
typedef void bli_pthread_attr_t;
typedef SRWLOCK bli_pthread_mutex_t;
typedef void bli_pthread_mutexattr_t;
typedef CONDITION_VARIABLE bli_pthread_cond_t;
typedef void bli_pthread_condattr_t;
typedef INIT_ONCE bli_pthread_once_t;
typedef struct
{
bli_pthread_mutex_t mutex;
bli_pthread_cond_t cond;
int count;
int tripCount;
} bli_pthread_barrier_t;
typedef void bli_pthread_barrierattr_t;
// -- pthreads macros --
#define BLIS_PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT
#define BLIS_PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
#define BLIS_PTHREAD_COND_INITIALIZER CONDITION_VARIABLE_INIT
#else // !defined(BLIS_DISABLE_SYSTEM) && !defined(_MSC_VER)
#include <pthread.h> // skipped
// This branch defines a pthreads-like API, bli_pthreads_*(), and implements it
// in terms of the corresponding pthreads_*() types, macros, and function calls.
// -- pthread types --
typedef pthread_t bli_pthread_t;
typedef pthread_attr_t bli_pthread_attr_t;
typedef pthread_mutex_t bli_pthread_mutex_t;
typedef pthread_mutexattr_t bli_pthread_mutexattr_t;
typedef pthread_cond_t bli_pthread_cond_t;
typedef pthread_condattr_t bli_pthread_condattr_t;
typedef pthread_once_t bli_pthread_once_t;
#if defined(__APPLE__)
// For OS X, we must define the barrier types ourselves since Apple does
// not implement barriers in their variant of pthreads.
typedef void bli_pthread_barrierattr_t;
typedef struct
{
bli_pthread_mutex_t mutex;
bli_pthread_cond_t cond;
int count;
int tripCount;
} bli_pthread_barrier_t;
#else
// For other non-Windows OSes (primarily Linux), we can define the barrier
// types in terms of existing pthreads barrier types since we expect they
// will be provided by the pthreads implementation.
typedef pthread_barrier_t bli_pthread_barrier_t;
typedef pthread_barrierattr_t bli_pthread_barrierattr_t;
#endif
// -- pthreads macros --
#define BLIS_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
#define BLIS_PTHREAD_COND_INITIALIZER PTHREAD_COND_INITIALIZER
#define BLIS_PTHREAD_ONCE_INIT PTHREAD_ONCE_INIT
#endif
// -- Function definitions -----------------------------------------------------
// -- pthread_create(), pthread_join() --
BLIS_EXPORT_BLIS int bli_pthread_create
(
bli_pthread_t* thread,
const bli_pthread_attr_t* attr,
void* (*start_routine)(void*),
void* arg
);
BLIS_EXPORT_BLIS int bli_pthread_join
(
bli_pthread_t thread,
void** retval
);
// -- pthread_mutex_*() --
BLIS_EXPORT_BLIS int bli_pthread_mutex_init
(
bli_pthread_mutex_t* mutex,
const bli_pthread_mutexattr_t* attr
);
BLIS_EXPORT_BLIS int bli_pthread_mutex_destroy
(
bli_pthread_mutex_t* mutex
);
BLIS_EXPORT_BLIS int bli_pthread_mutex_lock
(
bli_pthread_mutex_t* mutex
);
BLIS_EXPORT_BLIS int bli_pthread_mutex_trylock
(
bli_pthread_mutex_t* mutex
);
BLIS_EXPORT_BLIS int bli_pthread_mutex_unlock
(
bli_pthread_mutex_t* mutex
);
// -- pthread_cond_*() --
BLIS_EXPORT_BLIS int bli_pthread_cond_init
(
bli_pthread_cond_t* cond,
const bli_pthread_condattr_t* attr
);
BLIS_EXPORT_BLIS int bli_pthread_cond_destroy
(
bli_pthread_cond_t* cond
);
BLIS_EXPORT_BLIS int bli_pthread_cond_wait
(
bli_pthread_cond_t* cond,
bli_pthread_mutex_t* mutex
);
BLIS_EXPORT_BLIS int bli_pthread_cond_broadcast
(
bli_pthread_cond_t* cond
);
// -- pthread_once() --
BLIS_EXPORT_BLIS void bli_pthread_once
(
bli_pthread_once_t* once,
void (*init)(void)
);
#if 0
// NOTE: This part of the API is disabled because (1) we don't actually need
// _self() or _equal() yet, and (2) when we do try to include these functions,
// AppVeyor for some reason fails on all the Windows/clang builds with the
// error:
// libblis.a(bli_pthread.o) : error LNK2019: unresolved external symbol
// __imp_CompareObjectHandles referenced in function bli_pthread_equal
// -- pthread_self() --
BLIS_EXPORT_BLIS bli_pthread_t bli_pthread_self
(
void
);
// -- pthread_equal() --
BLIS_EXPORT_BLIS int bli_pthread_equal
(
bli_pthread_t t1,
bli_pthread_t t2
);
#endif
// -- pthread_barrier_*() --
BLIS_EXPORT_BLIS int bli_pthread_barrier_init
(
bli_pthread_barrier_t* barrier,
const bli_pthread_barrierattr_t* attr,
unsigned int count
);
BLIS_EXPORT_BLIS int bli_pthread_barrier_destroy
(
bli_pthread_barrier_t* barrier
);
BLIS_EXPORT_BLIS int bli_pthread_barrier_wait
(
bli_pthread_barrier_t* barrier
);
// -- Non-standard extensions --------------------------------------------------
// -- pthread_switch --
typedef struct
{
int status;
bli_pthread_mutex_t mutex;
} bli_pthread_switch_t;
#define BLIS_PTHREAD_SWITCH_INIT { .status = 0, \
.mutex = BLIS_PTHREAD_MUTEX_INITIALIZER }
int bli_pthread_switch_on
(
bli_pthread_switch_t* sw,
int (*init)(void)
);
int bli_pthread_switch_off
(
bli_pthread_switch_t* sw,
int (*deinit)(void)
);
#endif // BLIS_PTHREAD_H
// end bli_pthread.h
#line 1026 "./frame/include//bli_type_defs.h"
// -- Pool block type --
typedef struct
{
void* buf;
siz_t block_size;
} pblk_t;
// -- Pool type --
typedef struct
{
void* block_ptrs;
dim_t block_ptrs_len;
dim_t top_index;
dim_t num_blocks;
siz_t block_size;
siz_t align_size;
siz_t offset_size;
malloc_ft malloc_fp;
free_ft free_fp;
} pool_t;
// -- Array type --
typedef struct
{
void* buf;
siz_t num_elem;
siz_t elem_size;
} array_t;
// -- Locked pool-of-arrays-of-pools type --
typedef struct
{
bli_pthread_mutex_t mutex;
pool_t pool;
siz_t def_array_len;
} apool_t;
// -- packing block allocator: Locked set of pools type --
typedef struct pba_s
{
pool_t pools[3];
bli_pthread_mutex_t mutex;
// These fields are used for general-purpose allocation.
siz_t align_size;
malloc_ft malloc_fp;
free_ft free_fp;
} pba_t;
// -- Memory object type --
typedef struct mem_s
{
pblk_t pblk;
packbuf_t buf_type;
pool_t* pool;
siz_t size;
} mem_t;
// -- Control tree node type --
#define BLIS_MAX_SUB_NODES 2
struct cntl_s
{
// Actually this is a l3_var_oft, but that type hasn't been defined yet
void_fp var_func;
struct
{
dim_t ways;
struct cntl_s* sub_node;
} sub_nodes[ BLIS_MAX_SUB_NODES ];
};
typedef struct cntl_s cntl_t;
// -- Blocksize object type --
typedef struct blksz_s
{
// Primary blocksize values.
dim_t v[BLIS_NUM_FP_TYPES];
// Blocksize extensions.
dim_t e[BLIS_NUM_FP_TYPES];
} blksz_t;
// -- Function pointer object type --
typedef struct func_s
{
// Kernel function address.
void_fp ptr[BLIS_NUM_FP_TYPES];
} func_t;
typedef struct func2_s
{
// Kernel function address.
// A func2_t* can be cast to a func_t* in order to access
// only the "diagonal" elements (dt,dt) (but note that to accomplish
// this those elements are not stored in ptr[dt][dt]...see bli_func.c
// for more details).
void_fp ptr[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES];
} func2_t;
// -- Multi-boolean object type --
typedef struct mbool_s
{
bool v[BLIS_NUM_FP_TYPES];
} mbool_t;
// -- Auxiliary kernel info type --
// Note: This struct is used by macro-kernels to package together extra
// parameter values that may be of use to the micro-kernel without
// cluttering up the micro-kernel interface itself.
typedef struct
{
// The pack schemas of A and B.
pack_t schema_a;
pack_t schema_b;
// Pointers to the micro-panels of A and B which will be used by the
// next call to the micro-kernel.
const void* a_next;
const void* b_next;
// The imaginary strides of A and B.
inc_t is_a;
inc_t is_b;
// The panel strides of A and B.
// NOTE: These are only used in situations where iteration over the
// micropanels takes place in part within the kernel code (e.g. sup
// millikernels).
inc_t ps_a;
inc_t ps_b;
// The row and column offset of the current micro-tile in C.
dim_t off_m;
dim_t off_n;
// The type to convert to on output.
//num_t dt_on_output;
// (Virtual) microkernel address and additional parameters.
void_fp ukr;
const void* params;
} auxinfo_t;
// -- Global scalar constant data struct --
// Note: This struct is used only when statically initializing the
// global scalar constants in bli_const.c.
typedef struct constdata_s
{
float s;
double d;
scomplex c;
dcomplex z;
gint_t i;
} constdata_t;
//
// -- BLIS object type definitions ---------------------------------------------
//
typedef struct obj_s
{
// Basic fields
struct obj_s* root;
dim_t off[2];
dim_t dim[2];
doff_t diag_off;
objbits_t info;
objbits_t info2;
siz_t elem_size;
void* buffer;
inc_t rs;
inc_t cs;
inc_t is;
// Bufferless scalar storage
atom_t scalar;
// Pack-related fields
dim_t m_padded; // m dimension of matrix, including any padding
dim_t n_padded; // n dimension of matrix, including any padding
inc_t ps; // panel stride (distance to next panel)
inc_t pd; // panel dimension (the "width" of a panel:
// usually MR or NR)
dim_t m_panel; // m dimension of a "full" panel
dim_t n_panel; // n dimension of a "full" panel
} obj_t;
// Pre-initializors. Things that must be set afterwards:
// - root object pointer
// - info bitfields: dt, target_dt, exec_dt, comp_dt
// - info2 bitfields: scalar_dt
// - elem_size
// - dims, strides
// - buffer
// - internal scalar buffer (must always set imaginary component)
#define BLIS_OBJECT_INITIALIZER \
{ \
/* .root = */ NULL, \
\
/* .off = */ { 0, 0 }, \
/* .dim = */ { 0, 0 }, \
/* .diag_off = */ 0, \
\
/* .info = */ 0x0 | BLIS_BITVAL_DENSE | \
/* */ BLIS_BITVAL_GENERAL, \
/* .info2 = */ 0x0, \
/* .elem_size = */ sizeof( float ), /* this is changed later. */ \
\
/* .buffer = */ NULL, \
/* .rs = */ 0, \
/* .cs = */ 0, \
/* .is = */ 1, \
\
/* .scalar = */ { 0.0, 0.0 }, \
\
/* .m_padded = */ 0, \
/* .n_padded = */ 0, \
/* .ps = */ 0, \
/* .pd = */ 0, \
/* .m_panel = */ 0, \
/* .n_panel = */ 0, \
}
#define BLIS_OBJECT_INITIALIZER_1X1 \
{ \
/* .root = */ NULL, \
\
/* .off = */ { 0, 0 }, \
/* .dim = */ { 1, 1 }, \
/* .diag_off = */ 0, \
\
/* .info = */ 0x0 | BLIS_BITVAL_DENSE | \
/* */ BLIS_BITVAL_GENERAL, \
/* .info2 = */ 0x0, \
/* .elem_size = */ sizeof( float ), /* this is changed later. */ \
\
/* .buffer = */ NULL, \
/* .rs = */ 0, \
/* .cs = */ 0, \
/* .is = */ 1, \
\
/* .scalar = */ { 0.0, 0.0 }, \
\
/* .m_padded = */ 0, \
/* .n_padded = */ 0, \
/* .ps = */ 0, \
/* .pd = */ 0, \
/* .m_panel = */ 0, \
/* .n_panel = */ 0, \
}
// Define these macros here since they must be updated if contents of
// obj_t changes.
BLIS_INLINE void bli_obj_init_full_shallow_copy_of( const obj_t* a, obj_t* b )
{
b->root = a->root;
b->off[0] = a->off[0];
b->off[1] = a->off[1];
b->dim[0] = a->dim[0];
b->dim[1] = a->dim[1];
b->diag_off = a->diag_off;
b->info = a->info;
b->info2 = a->info2;
b->elem_size = a->elem_size;
b->buffer = a->buffer;
b->rs = a->rs;
b->cs = a->cs;
b->is = a->is;
b->scalar = a->scalar;
//b->pack_mem = a->pack_mem;
b->m_padded = a->m_padded;
b->n_padded = a->n_padded;
b->ps = a->ps;
b->pd = a->pd;
b->m_panel = a->m_panel;
b->n_panel = a->n_panel;
}
BLIS_INLINE void bli_obj_init_subpart_from( const obj_t* a, obj_t* b )
{
b->root = a->root;
b->off[0] = a->off[0];
b->off[1] = a->off[1];
// Avoid copying m and n since they will be overwritten.
//b->dim[0] = a->dim[0];
//b->dim[1] = a->dim[1];
b->diag_off = a->diag_off;
b->info = a->info;
b->info2 = a->info2;
b->elem_size = a->elem_size;
b->buffer = a->buffer;
b->rs = a->rs;
b->cs = a->cs;
b->is = a->is;
b->scalar = a->scalar;
// Avoid copying pack_mem entry.
// FGVZ: You should probably make sure this is right.
//b->pack_mem = a->pack_mem;
b->m_padded = a->m_padded;
b->n_padded = a->n_padded;
b->ps = a->ps;
b->pd = a->pd;
b->m_panel = a->m_panel;
b->n_panel = a->n_panel;
}
// Initializors for global scalar constants.
// NOTE: These must remain cpp macros since they are initializor
// expressions, not functions.
#define bli_obj_init_const( buffer0 ) \
{ \
.root = NULL, \
\
.off = { 0, 0 }, \
.dim = { 1, 1 }, \
.diag_off = 0, \
\
.info = 0x0 | BLIS_BITVAL_CONST_TYPE | \
BLIS_BITVAL_DENSE | \
BLIS_BITVAL_GENERAL, \
.info2 = 0x0, \
.elem_size = sizeof( constdata_t ), \
\
.buffer = buffer0, \
.rs = 1, \
.cs = 1, \
.is = 1 \
}
#define bli_obj_init_constdata( val ) \
{ \
.s = ( float )val, \
.d = ( double )val, \
.c = { .real = ( float )val, .imag = 0.0f }, \
.z = { .real = ( double )val, .imag = 0.0 }, \
.i = ( gint_t )val, \
}
#define bli_obj_init_constdata_ri( valr, vali ) \
{ \
.s = ( float )valr, \
.d = ( double )valr, \
.c = { .real = ( float )valr, .imag = ( float )vali }, \
.z = { .real = ( double )valr, .imag = ( double )vali }, \
.i = ( gint_t )valr, \
}
// -- Stack type --
// NB: stack_t is already taken by <signal.h>
typedef struct
{
siz_t elem_size;
siz_t block_len;
siz_t max_blocks;
siz_t size;
siz_t capacity;
void** blocks;
bli_pthread_mutex_t lock;
} stck_t;
// -- Context type --
typedef struct cntx_s
{
stck_t blkszs;
stck_t bmults;
stck_t ukrs;
stck_t ukr2s;
stck_t ukr_prefs;
stck_t l3_sup_handlers;
} cntx_t;
// -- Runtime type --
// NOTE: The order of these fields must be kept consistent with the definition
// of the BLIS_RNTM_INITIALIZER macro in bli_rntm.h.
typedef struct rntm_s
{
// "External" fields: these may be queried by the end-user.
timpl_t thread_impl;
bool auto_factor;
dim_t num_threads;
dim_t thrloop[ BLIS_NUM_LOOPS ];
bool pack_a; // enable/disable packing of left-hand matrix A.
bool pack_b; // enable/disable packing of right-hand matrix B.
bool l3_sup; // enable/disable small matrix handling in level-3 ops.
} rntm_t;
// -- Error types --
typedef enum
{
BLIS_NO_ERROR_CHECKING = 0,
BLIS_FULL_ERROR_CHECKING
} errlev_t;
typedef enum
{
// Generic error codes
BLIS_SUCCESS = ( -1),
BLIS_FAILURE = ( -2),
BLIS_ERROR_CODE_MIN = ( -9),
// General errors
BLIS_INVALID_ERROR_CHECKING_LEVEL = ( -10),
BLIS_UNDEFINED_ERROR_CODE = ( -11),
BLIS_NULL_POINTER = ( -12),
BLIS_NOT_YET_IMPLEMENTED = ( -13),
BLIS_OUT_OF_BOUNDS = ( -14),
BLIS_LOCK_FAILURE = ( -15),
// Parameter-specific errors
BLIS_INVALID_SIDE = ( -20),
BLIS_INVALID_UPLO = ( -21),
BLIS_INVALID_TRANS = ( -22),
BLIS_INVALID_CONJ = ( -23),
BLIS_INVALID_DIAG = ( -24),
BLIS_INVALID_MACHVAL = ( -25),
BLIS_EXPECTED_NONUNIT_DIAG = ( -26),
// Datatype-specific errors
BLIS_INVALID_DATATYPE = ( -30),
BLIS_EXPECTED_FLOATING_POINT_DATATYPE = ( -31),
BLIS_EXPECTED_NONINTEGER_DATATYPE = ( -32),
BLIS_EXPECTED_NONCONSTANT_DATATYPE = ( -33),
BLIS_EXPECTED_REAL_DATATYPE = ( -34),
BLIS_EXPECTED_INTEGER_DATATYPE = ( -35),
BLIS_INCONSISTENT_DATATYPES = ( -36),
BLIS_EXPECTED_REAL_PROJ_OF = ( -37),
BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38),
BLIS_INCONSISTENT_PRECISIONS = ( -39),
// Dimension-specific errors
BLIS_NONCONFORMAL_DIMENSIONS = ( -40),
BLIS_EXPECTED_SCALAR_OBJECT = ( -41),
BLIS_EXPECTED_VECTOR_OBJECT = ( -42),
BLIS_UNEQUAL_VECTOR_LENGTHS = ( -43),
BLIS_EXPECTED_SQUARE_OBJECT = ( -44),
BLIS_UNEXPECTED_OBJECT_LENGTH = ( -45),
BLIS_UNEXPECTED_OBJECT_WIDTH = ( -46),
BLIS_UNEXPECTED_VECTOR_DIM = ( -47),
BLIS_UNEXPECTED_DIAG_OFFSET = ( -48),
BLIS_NEGATIVE_DIMENSION = ( -49),
// Stride-specific errors
BLIS_INVALID_ROW_STRIDE = ( -50),
BLIS_INVALID_COL_STRIDE = ( -51),
BLIS_INVALID_DIM_STRIDE_COMBINATION = ( -52),
// Structure-specific errors
BLIS_EXPECTED_GENERAL_OBJECT = ( -60),
BLIS_EXPECTED_HERMITIAN_OBJECT = ( -61),
BLIS_EXPECTED_SYMMETRIC_OBJECT = ( -62),
BLIS_EXPECTED_TRIANGULAR_OBJECT = ( -63),
// Storage-specific errors
BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT = ( -70),
// Partitioning-specific errors
BLIS_INVALID_3x1_SUBPART = ( -80),
BLIS_INVALID_1x3_SUBPART = ( -81),
BLIS_INVALID_3x3_SUBPART = ( -82),
// Control tree-specific errors
BLIS_UNEXPECTED_NULL_CONTROL_TREE = ( -90),
// Packing-specific errors
BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-100),
BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_PART = (-101),
// Buffer-specific errors
BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110),
// Memory errors
BLIS_MALLOC_RETURNED_NULL = (-120),
// Internal memory pool errors
BLIS_INVALID_PACKBUF = (-130),
BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-131),
BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-132),
BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-133),
BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-134),
// Object-related errors
BLIS_EXPECTED_OBJECT_ALIAS = (-140),
// Architecture-related errors
BLIS_INVALID_ARCH_ID = (-150),
BLIS_UNINITIALIZED_GKS_CNTX = (-151),
BLIS_INVALID_UKR_ID = (-152),
// Blocksize-related errors
BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-160),
BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-161),
BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-162),
BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-163),
BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-164),
BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-165),
BLIS_MR_NOT_EVEN_FOR_REAL_TYPE = (-166),
BLIS_PACKMR_NOT_EVEN_FOR_REAL_TYPE = (-167),
BLIS_NR_NOT_EVEN_FOR_REAL_TYPE = (-168),
BLIS_PACKNR_NOT_EVEN_FOR_REAL_TYPE = (-169),
BLIS_ERROR_CODE_MAX = (-170)
} err_t;
#endif
// end bli_type_defs.h
#line 18 "./frame/compat/cblas/src/cblas.h"
/*
* Enumerated and derived types
*/
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#ifdef __cplusplus
extern "C" {
#endif
/*
* ===========================================================================
* Prototypes for level 1 BLAS functions (complex are recast as routines)
* ===========================================================================
*/
BLIS_EXPORT_BLAS float cblas_sdsdot(f77_int N, float alpha, const float *X,
f77_int incX, const float *Y, f77_int incY);
BLIS_EXPORT_BLAS double cblas_dsdot(f77_int N, const float *X, f77_int incX, const float *Y,
f77_int incY);
BLIS_EXPORT_BLAS float cblas_sdot(f77_int N, const float *X, f77_int incX,
const float *Y, f77_int incY);
BLIS_EXPORT_BLAS double cblas_ddot(f77_int N, const double *X, f77_int incX,
const double *Y, f77_int incY);
/*
* Functions having prefixes Z and C only
*/
BLIS_EXPORT_BLAS void cblas_cdotu_sub(f77_int N, const void *X, f77_int incX,
const void *Y, f77_int incY, void *dotu);
BLIS_EXPORT_BLAS void cblas_cdotc_sub(f77_int N, const void *X, f77_int incX,
const void *Y, f77_int incY, void *dotc);
BLIS_EXPORT_BLAS void cblas_zdotu_sub(f77_int N, const void *X, f77_int incX,
const void *Y, f77_int incY, void *dotu);
BLIS_EXPORT_BLAS void cblas_zdotc_sub(f77_int N, const void *X, f77_int incX,
const void *Y, f77_int incY, void *dotc);
/*
* Functions having prefixes S D SC DZ
*/
BLIS_EXPORT_BLAS float cblas_snrm2(f77_int N, const float *X, f77_int incX);
BLIS_EXPORT_BLAS float cblas_sasum(f77_int N, const float *X, f77_int incX);
BLIS_EXPORT_BLAS double cblas_dnrm2(f77_int N, const double *X, f77_int incX);
BLIS_EXPORT_BLAS double cblas_dasum(f77_int N, const double *X, f77_int incX);
BLIS_EXPORT_BLAS float cblas_scnrm2(f77_int N, const void *X, f77_int incX);
BLIS_EXPORT_BLAS float cblas_scasum(f77_int N, const void *X, f77_int incX);
BLIS_EXPORT_BLAS double cblas_dznrm2(f77_int N, const void *X, f77_int incX);
BLIS_EXPORT_BLAS double cblas_dzasum(f77_int N, const void *X, f77_int incX);
/*
* Functions having standard 4 prefixes (S D C Z)
*/
BLIS_EXPORT_BLAS f77_int cblas_isamax(f77_int N, const float *X, f77_int incX);
BLIS_EXPORT_BLAS f77_int cblas_idamax(f77_int N, const double *X, f77_int incX);
BLIS_EXPORT_BLAS f77_int cblas_icamax(f77_int N, const void *X, f77_int incX);
BLIS_EXPORT_BLAS f77_int cblas_izamax(f77_int N, const void *X, f77_int incX);
/*
* ===========================================================================
* Prototypes for level 1 BLAS routines
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (s, d, c, z)
*/
void BLIS_EXPORT_BLAS cblas_sswap(f77_int N, float *X, f77_int incX,
float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_scopy(f77_int N, const float *X, f77_int incX,
float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_saxpy(f77_int N, float alpha, const float *X,
f77_int incX, float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dswap(f77_int N, double *X, f77_int incX,
double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dcopy(f77_int N, const double *X, f77_int incX,
double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_daxpy(f77_int N, double alpha, const double *X,
f77_int incX, double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_cswap(f77_int N, void *X, f77_int incX,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_ccopy(f77_int N, const void *X, f77_int incX,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_caxpy(f77_int N, const void *alpha, const void *X,
f77_int incX, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zswap(f77_int N, void *X, f77_int incX,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zcopy(f77_int N, const void *X, f77_int incX,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zaxpy(f77_int N, const void *alpha, const void *X,
f77_int incX, void *Y, f77_int incY);
/*
* Routines with S and D prefix only
*/
void BLIS_EXPORT_BLAS cblas_srotg(float *a, float *b, float *c, float *s);
void BLIS_EXPORT_BLAS cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
void BLIS_EXPORT_BLAS cblas_srot(f77_int N, float *X, f77_int incX,
float *Y, f77_int incY, const float c, const float s);
void BLIS_EXPORT_BLAS cblas_srotm(f77_int N, float *X, f77_int incX,
float *Y, f77_int incY, const float *P);
void BLIS_EXPORT_BLAS cblas_drotg(double *a, double *b, double *c, double *s);
void BLIS_EXPORT_BLAS cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
void BLIS_EXPORT_BLAS cblas_drot(f77_int N, double *X, f77_int incX,
double *Y, f77_int incY, const double c, const double s);
void BLIS_EXPORT_BLAS cblas_drotm(f77_int N, double *X, f77_int incX,
double *Y, f77_int incY, const double *P);
/*
* Routines with S D C Z CS and ZD prefixes
*/
void BLIS_EXPORT_BLAS cblas_sscal(f77_int N, float alpha, float *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_dscal(f77_int N, double alpha, double *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_cscal(f77_int N, const void *alpha, void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_zscal(f77_int N, const void *alpha, void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_csscal(f77_int N, float alpha, void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_zdscal(f77_int N, double alpha, void *X, f77_int incX);
/*
* ===========================================================================
* Prototypes for level 2 BLAS
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void BLIS_EXPORT_BLAS cblas_sgemv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
float alpha, const float *A, f77_int lda,
const float *X, f77_int incX, float beta,
float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_sgbmv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
f77_int KL, f77_int KU, float alpha,
const float *A, f77_int lda, const float *X,
f77_int incX, float beta, float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const float *A, f77_int lda,
float *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const float *A, f77_int lda,
float *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const float *Ap, float *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const float *A, f77_int lda, float *X,
f77_int incX);
void BLIS_EXPORT_BLAS cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const float *A, f77_int lda,
float *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const float *Ap, float *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_dgemv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
double alpha, const double *A, f77_int lda,
const double *X, f77_int incX, double beta,
double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dgbmv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
f77_int KL, f77_int KU, double alpha,
const double *A, f77_int lda, const double *X,
f77_int incX, double beta, double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const double *A, f77_int lda,
double *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const double *A, f77_int lda,
double *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const double *Ap, double *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const double *A, f77_int lda, double *X,
f77_int incX);
void BLIS_EXPORT_BLAS cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const double *A, f77_int lda,
double *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const double *Ap, double *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_cgemv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
const void *X, f77_int incX, const void *beta,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_cgbmv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
f77_int KL, f77_int KU, const void *alpha,
const void *A, f77_int lda, const void *X,
f77_int incX, const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *A, f77_int lda,
void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const void *A, f77_int lda,
void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *Ap, void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *A, f77_int lda, void *X,
f77_int incX);
void BLIS_EXPORT_BLAS cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const void *A, f77_int lda,
void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *Ap, void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_zgemv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
const void *X, f77_int incX, const void *beta,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zgbmv(enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N,
f77_int KL, f77_int KU, const void *alpha,
const void *A, f77_int lda, const void *X,
f77_int incX, const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *A, f77_int lda,
void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const void *A, f77_int lda,
void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *Ap, void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *A, f77_int lda, void *X,
f77_int incX);
void BLIS_EXPORT_BLAS cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, f77_int K, const void *A, f77_int lda,
void *X, f77_int incX);
void BLIS_EXPORT_BLAS cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag,
f77_int N, const void *Ap, void *X, f77_int incX);
/*
* Routines with S and D prefixes only
*/
void BLIS_EXPORT_BLAS cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const float *A,
f77_int lda, const float *X, f77_int incX,
float beta, float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, f77_int K, float alpha, const float *A,
f77_int lda, const float *X, f77_int incX,
float beta, float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const float *Ap,
const float *X, f77_int incX,
float beta, float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_sger(enum CBLAS_ORDER order, f77_int M, f77_int N,
float alpha, const float *X, f77_int incX,
const float *Y, f77_int incY, float *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const float *X,
f77_int incX, float *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const float *X,
f77_int incX, float *Ap);
void BLIS_EXPORT_BLAS cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const float *X,
f77_int incX, const float *Y, f77_int incY, float *A,
f77_int lda);
void BLIS_EXPORT_BLAS cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const float *X,
f77_int incX, const float *Y, f77_int incY, float *A);
void BLIS_EXPORT_BLAS cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const double *A,
f77_int lda, const double *X, f77_int incX,
double beta, double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, f77_int K, double alpha, const double *A,
f77_int lda, const double *X, f77_int incX,
double beta, double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const double *Ap,
const double *X, f77_int incX,
double beta, double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_dger(enum CBLAS_ORDER order, f77_int M, f77_int N,
double alpha, const double *X, f77_int incX,
const double *Y, f77_int incY, double *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const double *X,
f77_int incX, double *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const double *X,
f77_int incX, double *Ap);
void BLIS_EXPORT_BLAS cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const double *X,
f77_int incX, const double *Y, f77_int incY, double *A,
f77_int lda);
void BLIS_EXPORT_BLAS cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const double *X,
f77_int incX, const double *Y, f77_int incY, double *A);
/*
* Routines with C and Z prefixes only
*/
void BLIS_EXPORT_BLAS cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, const void *alpha, const void *A,
f77_int lda, const void *X, f77_int incX,
const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, f77_int K, const void *alpha, const void *A,
f77_int lda, const void *X, f77_int incX,
const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, const void *alpha, const void *Ap,
const void *X, f77_int incX,
const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_cgeru(enum CBLAS_ORDER order, f77_int M, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_cgerc(enum CBLAS_ORDER order, f77_int M, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const void *X, f77_int incX,
void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, float alpha, const void *X,
f77_int incX, void *A);
void BLIS_EXPORT_BLAS cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *Ap);
void BLIS_EXPORT_BLAS cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, const void *alpha, const void *A,
f77_int lda, const void *X, f77_int incX,
const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, f77_int K, const void *alpha, const void *A,
f77_int lda, const void *X, f77_int incX,
const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, const void *alpha, const void *Ap,
const void *X, f77_int incX,
const void *beta, void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zgeru(enum CBLAS_ORDER order, f77_int M, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_zgerc(enum CBLAS_ORDER order, f77_int M, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const void *X, f77_int incX,
void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
f77_int N, double alpha, const void *X,
f77_int incX, void *A);
void BLIS_EXPORT_BLAS cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *A, f77_int lda);
void BLIS_EXPORT_BLAS cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N,
const void *alpha, const void *X, f77_int incX,
const void *Y, f77_int incY, void *Ap);
/*
* ===========================================================================
* Prototypes for level 3 BLAS
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void BLIS_EXPORT_BLAS cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N,
f77_int K, float alpha, const float *A,
f77_int lda, const float *B, f77_int ldb,
float beta, float *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, f77_int M, f77_int N,
float alpha, const float *A, f77_int lda,
const float *B, f77_int ldb, float beta,
float *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
float alpha, const float *A, f77_int lda,
float beta, float *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
float alpha, const float *A, f77_int lda,
const float *B, f77_int ldb, float beta,
float *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
float alpha, const float *A, f77_int lda,
float *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
float alpha, const float *A, f77_int lda,
float *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N,
f77_int K, double alpha, const double *A,
f77_int lda, const double *B, f77_int ldb,
double beta, double *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, f77_int M, f77_int N,
double alpha, const double *A, f77_int lda,
const double *B, f77_int ldb, double beta,
double *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
double alpha, const double *A, f77_int lda,
double beta, double *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
double alpha, const double *A, f77_int lda,
const double *B, f77_int ldb, double beta,
double *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
double alpha, const double *A, f77_int lda,
double *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
double alpha, const double *A, f77_int lda,
double *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N,
f77_int K, const void *alpha, const void *A,
f77_int lda, const void *B, f77_int ldb,
const void *beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, const void *beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
const void *alpha, const void *A, f77_int lda,
const void *beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, const void *beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
void *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
void *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N,
f77_int K, const void *alpha, const void *A,
f77_int lda, const void *B, f77_int ldb,
const void *beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, const void *beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
const void *alpha, const void *A, f77_int lda,
const void *beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, const void *beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
void *B, f77_int ldb);
void BLIS_EXPORT_BLAS cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_DIAG Diag, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
void *B, f77_int ldb);
/*
* Routines with prefixes C and Z only
*/
void BLIS_EXPORT_BLAS cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, const void *beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
float alpha, const void *A, f77_int lda,
float beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, float beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side,
enum CBLAS_UPLO Uplo, f77_int M, f77_int N,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, const void *beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
double alpha, const void *A, f77_int lda,
double beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K,
const void *alpha, const void *A, f77_int lda,
const void *B, f77_int ldb, double beta,
void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_xerbla(f77_int p, const char *rout, const char *form, ...);
/*
* ===========================================================================
* BLAS Extension prototypes
* ===========================================================================
*/
// -- APIs to operations unique to BLIS --
void BLIS_EXPORT_BLAS cblas_saxpby(f77_int N, float alpha, const float *X,
f77_int incX, float beta, float *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_daxpby(f77_int N, double alpha, const double *X,
f77_int incX, double beta, double *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_caxpby(f77_int N, const void *alpha,
const void *X, f77_int incX, const void* beta,
void *Y, f77_int incY);
void BLIS_EXPORT_BLAS cblas_zaxpby(f77_int N, const void *alpha,
const void *X, f77_int incX, const void *beta,
void *Y, f77_int incY);
// -- APIs to level-3-like operations --
void BLIS_EXPORT_BLAS cblas_sgemmt(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB,
f77_int N, f77_int K, float alpha, const float *A,
f77_int lda, const float *B, f77_int ldb,
float beta, float *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_dgemmt(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB,
f77_int N, f77_int K, double alpha, const double *A,
f77_int lda, const double *B, f77_int ldb,
double beta, double *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_cgemmt(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB,
f77_int N, f77_int K, const void *alpha, const void *A,
f77_int lda, const void *B, f77_int ldb,
const void *beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zgemmt(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB,
f77_int N, f77_int K, const void *alpha, const void *A,
f77_int lda, const void *B, f77_int ldb,
const void *beta, void *C, f77_int ldc);
// -- Batch APIs --
void BLIS_EXPORT_BLAS cblas_sgemm_batch(enum CBLAS_ORDER Order,
enum CBLAS_TRANSPOSE *TransA_array,
enum CBLAS_TRANSPOSE *TransB_array,
f77_int *M_array, f77_int *N_array,
f77_int *K_array, const float *alpha_array, const float **A,
f77_int *lda_array, const float **B, f77_int *ldb_array,
const float *beta_array, float **C, f77_int *ldc_array,
f77_int group_count, f77_int *group_size);
void BLIS_EXPORT_BLAS cblas_dgemm_batch(enum CBLAS_ORDER Order,
enum CBLAS_TRANSPOSE *TransA_array,
enum CBLAS_TRANSPOSE *TransB_array,
f77_int *M_array, f77_int *N_array,
f77_int *K_array, const double *alpha_array,
const double **A,f77_int *lda_array,
const double **B, f77_int *ldb_array,
const double *beta_array, double **C, f77_int *ldc_array,
f77_int group_count, f77_int *group_size);
void BLIS_EXPORT_BLAS cblas_cgemm_batch(enum CBLAS_ORDER Order,
enum CBLAS_TRANSPOSE *TransA_array,
enum CBLAS_TRANSPOSE *TransB_array,
f77_int *M_array, f77_int *N_array,
f77_int *K_array, const void *alpha_array, const void **A,
f77_int *lda_array, const void **B, f77_int *ldb_array,
const void *beta_array, void **C, f77_int *ldc_array,
f77_int group_count, f77_int *group_size);
void BLIS_EXPORT_BLAS cblas_zgemm_batch(enum CBLAS_ORDER Order,
enum CBLAS_TRANSPOSE *TransA_array,
enum CBLAS_TRANSPOSE *TransB_array,
f77_int *M_array, f77_int *N_array,
f77_int *K_array, const void *alpha_array, const void **A,
f77_int *lda_array, const void **B, f77_int *ldb_array,
const void *beta_array, void **C, f77_int *ldc_array,
f77_int group_count, f77_int *group_size);
// -- 3m APIs --
void BLIS_EXPORT_BLAS cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N,
f77_int K, const void *alpha, const void *A,
f77_int lda, const void *B, f77_int ldb,
const void *beta, void *C, f77_int ldc);
void BLIS_EXPORT_BLAS cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA,
enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N,
f77_int K, const void *alpha, const void *A,
f77_int lda, const void *B, f77_int ldb,
const void *beta, void *C, f77_int ldc);
#ifdef __cplusplus
}
#endif
#endif