compact_topology_optimization/3d/util_mtimesx/mtimesx.c


								/*************************************************************************************

								 *

								 * MATLAB (R) is a trademark of The Mathworks (R) Corporation

								 *

								 * Function:    mtimesx

								 * Filename:    mtimesx.c

								 * Programmer:  James Tursa

								 * Version:     1.40

								 * Date:        October 4, 2010

								 * Copyright:   (c) 2009, 2010 by James Tursa, All Rights Reserved

								 *

								 *  This code uses the BSD License:

								 *

								 *  Redistribution and use in source and binary forms, with or without

								 *  modification, are permitted provided that the following conditions are

								 *  met:

								 *

								 *     * Redistributions of source code must retain the above copyright

								 *       notice, this list of conditions and the following disclaimer.

								 *     * Redistributions in binary form must reproduce the above copyright

								 *       notice, this list of conditions and the following disclaimer in

								 *       the documentation and/or other materials provided with the distribution

								 *

								 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

								 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

								 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

								 *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

								 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

								 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

								 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

								 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

								 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

								 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

								 *  POSSIBILITY OF SUCH DAMAGE.

								 *

								 * Requires: mtimesx_RealTimesReal.c

								 *

								 * mtimesx is a multiply function that utilizes BLAS calls and custom code for

								 * matrix-matrix, matrix-vector, vector-vector, scalar-vector, scalar-matrix,

								 * and scalar-array calculations. Full support is given for transpose, conjugate

								 * transpose, and conjugate operations. Operands can be single, double, or

								 * sparse double.  Sparse double matrices have direct support for scalar-matrix

								 * operations ... all other computations involving sparse matrices are done

								 * with calls to the MATLAB intrinsic functions.

								 *

								 * Building:

								 *

								 * mtimesx is typically self building. That is, the first time you call mtimesx,

								 * the mtimesx.m file recognizes that the mex routine needs to be compiled and

								 * then the compilation and linkage to the BLAS library will happen automatically.

								 * If the automatic compilation does not work, then you can do it manually as

								 * follows:

								 * - Put all of the files in one directory that is on the MATLAB path (but don't

								 *   put them in a toolbox directory)

								 * - Make that directory the current directory.

								 * - Create a string variable with the pathname and filename of the BLAS library

								 *   file to use. e.g. for microsoft it may be something like the following:

								 *

								 * - Issue the following command at the MATLAB prompt.

								 *   >> mex('mtimesx.c',lib_blas)

								 *

								 *   And if you are on a linux machine then you may need to add the string

								 *   '-DEFINEUNIX' to the mex command argument list.

								 *

								 * The usage is as follows (arguments in brackets [ ] are optional):

								 *

								 * Syntax

								 *

								 * M = mtimesx( [mode] )

								 * C = mtimesx(A [,transa] ,B [,transb] [,mode])

								 *

								 * Description

								 *

								 * mtimesx performs the matrix calculation C = op(A) * op(B), where:

								 *    A = A single or double or sparse scalar, matrix, or array.

								 *    B = A single or double or sparse scalar, matrix, or array.

								 *    transa = A character indicating a pre-operation on A (optional)

								 *    transb = A character indicating a pre-operation on B (optional)

								 *             The pre-operation can be any of:

								 *             'N' or 'n' = No operation (the default if trans_ is missing)

								 *             'T' or 't' = Transpose

								 *             'C' or 'c' = Conjugate Transpose

								 *             'G' or 'g' = Conjugate (no transpose)

								 *    mode = 'MATLAB' or 'SPEED' (sets mode for current and future calculations,

								 *                                case insensitive, optional)

								 *    M is a string indicating the current calculation mode, before setting the new one.

								 *    C is the result of the matrix multiply operation.

								 *

								 *  mtimesx uses a combination of BLAS library routine calls and custom code

								 *  to optimize the matrix or scalar multiplies with respect to speed

								 *  without sacrificing accuracy. As a result, some calculations are done in

								 *  a different way than the equivalent MATLAB calculation, resulting in

								 *  slightly different answers. Although slightly different, the mtimesx

								 *  results are just as accurate as the MATLAB results. The general matrix

								 *  multiply calculation uses the same BLAS calls as MATLAB, so there is no

								 *  difference in speed or results for this case. For the scalar * matrix,

								 *  scalar * vector, scalar * array, and matrix * vector cases there

								 *  generally *will* be a difference between mtimesx and the MATLAB built in

								 *  mtimes function. Usually, mtimesx will be faster that the equivalent

								 *  MATLAB function, but some of these cases are slightly slower.

								 *

								 *  M = mtimesx returns a string with the current calculation mode. The string

								 *      will either be 'MATLAB' or 'SPEED' or one of the OMP modes.

								 *

								 *  M = mtimesx(mode) sets the calculation mode to mode. The mode variable

								 *      must be either the string 'MATLAB' or the string 'SPEED'. The return

								 *      variable M is the previous mode setting prior to setting the new mode.

								 *      The mode is case insensitive (lower or upper case is OK). You can also

								 *      set one of the OMP modes if you have compiled with an OpenMP compiler.

								 *

								 * Mode:

								 *

								 * The 'MATLAB' mode uses the same sequence of BLAS calls that MATLAB does,

								 * and uses scalar multiply code that generates the same results as MATLAB.

								 * The purpose of this mode is to reproduce the exact same results as the

								 * same MATLAB operation. As such, there will often be no speed improvement

								 * by using mtimesx vs using the MATLAB intrinsic mtimes.

								 *

								 * The 'SPEED' mode uses different sequences of BLAS calls than MATLAB does

								 * whenever a speed improvement might be realized. Custom code for dot product

								 * calculations is used, and certain matrix-matrix and matrix-vector operations

								 * involving conjugates and/or transposes uses a series of dot product type

								 * calculations instead of calling BLAS routines if a speed improvement can

								 * be realized. This mode produces results that will sometimes differ slightly

								 * from the same MATLAB operation, but the results will still be accurate.

								 *

								 * The 'OMP' modes are basically the same as 'SPEED' mode except that some

								 * of the calculations are multi-threaded using OpenMP directives.

								 *

								 * Examples:

								 *

								 *  C = mtimesx(A,B)         % performs the calculation C = A * B

								 *  C = mtimesx(A,'T',B)     % performs the calculation C = A.' * B

								 *  C = mtimesx(A,B,'G')     % performs the calculation C = A * conj(B)

								 *  C = mtimesx(A,'C',B,'C') % performs the calculation C = A' * B'

								 *  mtimesx                  % returns the current calculation mode

								 *  mtimesx('MATLAB')        % sets calculation mode to match MATLAB

								 *

								 * Note: You cannot combine double sparse and single inputs, since MATLAB does not

								 * support a single sparse result. You also cannot combine sparse inputs with full

								 * nD (n > 2) inputs, since MATLAB does not support a sparse nD result. The only

								 * exception is a sparse scalar times an nD full array. In that special case,

								 * mtimesx will treat the sparse scalar as a full scalar and return a full nD result.

								 *

								 * Note: The �N�, �T�, and �C� have the same meanings as the direct inputs to the BLAS

								 * routines. The �G� input has no direct BLAS counterpart, but was relatively easy to

								 * implement in mtimesx and saves time (as opposed to computing conj(A) or conj(B)

								 * explicitly before calling mtimesx).

								 *

								 * mtimesx supports nD inputs. For these cases, the first two dimensions specify the

								 * matrix multiply involved. The remaining dimensions are duplicated and specify the

								 * number of individual matrix multiplies to perform for the result. i.e., mtimesx

								 * treats these cases as arrays of 2D matrices and performs the operation on the

								 * associated parings. For example:

								 *

								 *     If A is (2,3,4,5) and B is (3,6,4,5), then

								 *     mtimesx(A,B) would result in C(2,6,4,5)

								 *     where C(:,:,i,j) = A(:,:,i,j) * B(:,:,i,j), i=1:4, j=1:5

								 *

								 *     which would be equivalent to the MATLAB m-code:

								 *     C = zeros(2,6,4,5);

								 *     for m=1:4

								 *         for n=1:5

								 *             C(:,:,m,n) = A(:,:,m,n) * B(:,:,m,n);

								 *         end

								 *     end

								 *

								 * The first two dimensions must conform using the standard matrix multiply rules

								 * taking the transa and transb pre-operations into account, and dimensions 3:end

								 * must match exactly or be singleton (equal to 1). If a dimension is singleton

								 * then it is virtually expanded to the required size (i.e., equivalent to a

								 * repmat operation to get it to a conforming size but without the actual data

								 * copy). For example:

								 *

								 *     If A is (2,3,4,5) and B is (3,6,1,5), then

								 *     mtimesx(A,B) would result in C(2,6,4,5)

								 *     where C(:,:,i,j) = A(:,:,i,j) * B(:,:,1,j), i=1:4, j=1:5

								 *

								 *     which would be equivalent to the MATLAB m-code:

								 *     C = zeros(2,6,4,5);

								 *     for m=1:4

								 *         for n=1:5

								 *             C(:,:,m,n) = A(:,:,m,n) * B(:,:,1,n);

								 *         end

								 *     end

								 *

								 * When a transpose (or conjugate transpose) is involved, the first two dimensions

								 * are transposed in the multiply as you would expect. For example:

								 *

								 *     If A is (3,2,4,5) and B is (3,6,4,5), then

								 *     mtimesx(A,'C',B,'G') would result in C(2,6,4,5)

								 *     where C(:,:,i,j) = A(:,:,i,j)' * conj( B(:,:,i,j) ), i=1:4, j=1:5

								 *

								 *     which would be equivalent to the MATLAB m-code:

								 *     C = zeros(2,6,4,5);

								 *     for m=1:4

								 *         for n=1:5

								 *             C(:,:,m,n) = A(:,:,m,n)' * conj( B(:,:,m,n) );

								 *         end

								 *     end

								 *

								 *     If A is a scalar (1,1) and B is (3,6,4,5), then

								 *     mtimesx(A,'G',B,'C') would result in C(6,3,4,5)

								 *     where C(:,:,i,j) = conj(A) * B(:,:,i,j)', i=1:4, j=1:5

								 *

								 *     which would be equivalent to the MATLAB m-code:

								 *     C = zeros(6,3,4,5);

								 *     for m=1:4

								 *         for n=1:5

								 *             C(:,:,m,n) = conj(A) * B(:,:,m,n)';

								 *         end

								 *     end

								 *

								 * Change Log:

								 * 2009/Sep/27 --> 1.00, Initial Release

								 * 2009/Dec/10 --> 1.11, Fixed bug for empty transa & transb inputs

								 * 2010/Feb/23 --> 1.20, Fixed bug for dgemv and sgemv calls

								 * 2010/Aug/02 --> 1.30, Added (nD scalar) * (nD array) capability

								 *                       Replaced buggy mxRealloc with custom routine

								 * 2010/Oct/04 --> 1.40, Added OpenMP support for custom code

								 *                       Expanded sparse * single and sparse * nD support

								 *                       Fixed (nD complex scalar)C * (nD array) bug

								 *

								 ****************************************************************************/


								/* OpenMP ------------------------------------------------------------- */


								#ifdef _OPENMP

								#include <omp.h>

								#endif


								/* Includes ----------------------------------------------------------- */


								#include <string.h>

								#include <stddef.h>

								#include <ctype.h>

								#include <math.h>

								#include <time.h>

								#include "mex.h"


								/* Macros ------------------------------------------------------------- */


								#ifndef MWSIZE_MAX

								#define  mwIndex        int

								#define  mwSignedIndex  int

								#define  mwSize         int

								#define  mwSize_t       size_t

								#else

								#define  mwSize_t       mwSize

								#endif


								#define  METHOD_BLAS       0

								#define  METHOD_LOOPS      1

								#define  METHOD_LOOPS_OMP  2


								#define  MTIMESX_NOT_SET    0

								#define  MTIMESX_BLAS       1

								#define  MTIMESX_LOOPS      2

								#define  MTIMESX_LOOPS_OMP  3

								#define  MTIMESX_MATLAB     4

								#define  MTIMESX_SPEED      5

								#define  MTIMESX_SPEED_OMP  6


								#define  STRINGIZE(name)  #name

								#define  TOKENSTRING(name)  STRINGIZE(name)


								#define  DIRECTIVE_MAX  1000


								#ifndef  COMPILER

								#define  COMPILER  (unknown)

								#endif


								/* Prototypes --------------------------------------------------------- */


								mxArray *DoubleTimesDouble(mxArray *, char, mxArray *, char);

								mxArray *FloatTimesFloat(mxArray *, char, mxArray *, char);

								mxArray *mxCreateSharedDataCopy(const mxArray *pr);

								char mxArrayToTrans(const mxArray *mx);

								void *myRealloc(void *vp, mwSize_t n);

								void mtimesx_logo(void);

								mxArray *modestring(int);


								/* Global Variables --------------------------------------------------- */


								int mtimesx_mode  = MTIMESX_MATLAB;

								int max_threads   = 0;

								int threads_used  = 0;

								int debug         = 0;

								int debug_message = 0;


								/* Functions ect for OpenMP implementations ------- */


								#ifdef _OPENMP


								#define  OPENMP_ENABLED   1.0


								/* Functions etc for non OpenMP implementations ----------------- */


								/* The omp_get_num_procs() function is courtesy of Dirk-Jan Kroon */

								/* and is based on his FEX submission maxNumCompThreads --------- */


								#else


								#define  OPENMP_ENABLED   0.0

								#define  omp_get_wtick()  0.0

								#define  omp_get_wtime()  ((double)clock()/((double)CLOCKS_PER_SEC))


								#if defined(_WIN32) || defined(_WIN64)


								#include <windows.h>

								int omp_get_num_procs( ) {

								    SYSTEM_INFO sysinfo;

								    GetSystemInfo(&sysinfo);

								    return sysinfo.dwNumberOfProcessors;

								}


								#elif MACOS


								#include <sys/param.h>

								#include <sys/sysctl.h>

								int omp_get_num_procs( ) {

								    int nm[2];

								    size_t len = 4;

								    uint32_t count;


								    nm[0] = CTL_HW; nm[1] = HW_AVAILCPU;

								    sysctl(nm, 2, &count, &len, NULL, 0);


								    if(count < 1) {

								        nm[1] = HW_NCPU;

								        sysctl(nm, 2, &count, &len, NULL, 0);

								        if(count < 1) { count = 1; }

								    }

								    return count;

								}

								#else


								#include <unistd.h>

								int omp_get_num_procs( ) {

								    return sysconf(_SC_NPROCESSORS_ONLN);

								}


								#endif


								#endif


								/*------------------------------------------------------------------------ */

								/*------------------------------------------------------------------------ */

								/*------------------------------------------------------------------------ */

								/*------------------------------------------------------------------------ */


								void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[])

								{

								    mxArray *A, *B, *C, *Araw, *Braw;

								    mxArray *rhs[4];

								    char transa, transb;

								    char *directive, *cp;

								    char transstring[2] = "_";

								    int unsupported = 0;

								    int i, j, k, got_directive, nrhs_old = nrhs;

									int mtimesx_mode_new, max_threads_new,

										already_set_mode, already_set_debug, already_set_threads;

									mxArray *ans;

									double threads;


								/*-------------------------------------------------------------------------

								 * Check for proper number of inputs and outputs

								 *------------------------------------------------------------------------- */


								    if( nlhs > 1 ) {

								        mexErrMsgTxt("Must have at most 1 output.");

								    }


								/*-------------------------------------------------------------------------

								 * If no inputs, just return the current mode

								 *------------------------------------------------------------------------- */


								    if( nrhs == 0 ) {

										plhs[0] = modestring(mtimesx_mode);

								        return;

								    }


								/*-------------------------------------------------------------------------

								 * Find out if any input is a directive

								 *------------------------------------------------------------------------- */


									i = 0;

									mtimesx_mode_new    = MTIMESX_NOT_SET;

									max_threads_new     = 0;

									already_set_mode    = 0;

									already_set_debug   = 0;

									already_set_threads = 0;

									while( i < nrhs ) {

									if( mxIsChar(prhs[i]) ) {

										if( mxGetNumberOfElements(prhs[i]) > DIRECTIVE_MAX ) {

											mexErrMsgTxt("Unknown directive.");

										} else if( cp = directive = mxArrayToString(prhs[i]) ) {

											j = 0;

								            while( *cp ) {

												if( *cp == '(' ) j++;

												if( *cp == ')' ) j--;

												if( j == 0 ) {

								                    *cp = toupper( *cp );

												}

								                cp++;

								            }

											k = cp - directive;

											got_directive = 1;

								            if( strcmp(directive,"MATLAB") == 0 ) {

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								                mtimesx_mode_new = MTIMESX_MATLAB;

								            } else if( strcmp(directive,"SPEED") == 0 ) {

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								                mtimesx_mode_new = MTIMESX_SPEED;

								            } else if( strcmp(directive,"BLAS") == 0 ) {

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								                mtimesx_mode_new = MTIMESX_BLAS;

								            } else if( strcmp(directive,"LOOPS") == 0 ) {

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								                mtimesx_mode_new = MTIMESX_LOOPS;

								            } else if( strcmp(directive,"LOGO") == 0 ) {

												if( nrhs_old > 1 ) {

													mexErrMsgTxt("Cannot combine LOGO directive with other arguments.");

												}

								                mtimesx_logo();

												return;

								            } else if( strcmp(directive,"HELP") == 0 ) {

												if( nrhs_old > 1 ) {

													mexErrMsgTxt("Cannot combine HELP directive with other arguments.");

												}

								                mexEvalString("help mtimesx;");

												return;

								            } else if( strcmp(directive,"DEBUG") == 0 ) {

												if( already_set_debug ) {

													mexErrMsgTxt("Cannot set DEBUG/NODEBUG twice in same call.");

												}

												already_set_debug = 1;

								                debug = 1;

								            } else if( strcmp(directive,"NODEBUG") == 0 ) {

												if( already_set_debug ) {

													mexErrMsgTxt("Cannot set DEBUG/NODEBUG twice in same call.");

												}

												already_set_debug = 1;

								                debug = 0;

								            } else if( strcmp(directive,"OMP_GET_NUM_PROCS") == 0 ||

								                       strcmp(directive,"OMP_GET_NUM_PROCS()") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine OMP_GET_NUM_PROCS directive with other arguments.");

												}

												plhs[0] = mxCreateDoubleScalar(omp_get_num_procs());

												return;

								            } else if( strcmp(directive,"OMP_GET_MAX_THREADS") == 0 ||

								                       strcmp(directive,"OMP_GET_MAX_THREADS()") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine OMP_GET_MAX_THREADS directive with other arguments.");

												}

												plhs[0] = mxCreateDoubleScalar(max_threads);

												return;

								            } else if( strcmp(directive,"OMP_GET_NUM_THREADS") == 0 ||

								                       strcmp(directive,"OMP_GET_NUM_THREADS") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine OMP_GET_NUM_THREADS directive with other arguments.");

												}

												plhs[0] = mxCreateDoubleScalar(threads_used);

												return;

								            } else if( strcmp(directive,"COMPILER") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine COMPILER directive with other arguments.");

												}

												plhs[0] = mxCreateString(TOKENSTRING(COMPILER));

												return;

								            } else if( strcmp(directive,"OPENMP") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine OPENMP directive with other arguments.");

												}

												plhs[0] = mxCreateDoubleScalar(OPENMP_ENABLED);

												return;

								            } else if( strcmp(directive,"SPEEDOMP") == 0 ) {

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								#ifdef _OPENMP

								                mtimesx_mode_new = MTIMESX_SPEED_OMP;

												if( max_threads == 0 ) {

								 				    max_threads_new = omp_get_num_procs();

												}

								#else

								                mtimesx_mode_new = MTIMESX_SPEED;

								#endif

								            } else if( strcmp(directive,"LOOPSOMP") == 0 ) {

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								#ifdef _OPENMP

								                mtimesx_mode_new = MTIMESX_LOOPS_OMP;

												if( max_threads == 0 ) {

								 				    max_threads_new = omp_get_num_procs();

												}

								#else

								                mtimesx_mode_new = MTIMESX_LOOPS;

								#endif

								            } else if( strcmp(directive,"OMP_GET_WTICK") == 0 ||

								                       strcmp(directive,"OMP_GET_WTICK()") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine OMP_GET_WTICK directive with other arguments.");

												}

												plhs[0] = mxCreateDoubleScalar(omp_get_wtick());

												return;

								            } else if( strcmp(directive,"OMP_GET_WTIME") == 0 ||

								                       strcmp(directive,"OMP_GET_WTIME()") == 0 ) {

												if( nrhs > 1 ) {

													mexErrMsgTxt("Cannot combine OMP_GET_WTIME directive with other arguments.");

												}

												plhs[0] = mxCreateDoubleScalar(omp_get_wtime());

												return;

								            } else if( strcmp(directive,"OMP") == 0 ) {

								#ifdef _OPENMP

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												if( already_set_threads ) {

													mexErrMsgTxt("Cannot set threads twice in same call.");

												}

												already_set_mode = 1;

												already_set_threads = 1;

												max_threads_new = omp_get_num_procs();

								                mtimesx_mode_new = MTIMESX_SPEED_OMP;

								#else

												if( already_set_mode ) {

													mexErrMsgTxt("Cannot set mode twice in same call.");

												}

												already_set_mode = 1;

								                mtimesx_mode_new = MTIMESX_SPEED;

								#endif

								            } else if( strcmp(directive,"OMP_SET_NUM_THREADS") == 0 ||

								                       strcmp(directive,"OMP_SET_MAX_THREADS") == 0 ) {

												if( already_set_threads ) {

													mexErrMsgTxt("Cannot set threads twice in same call.");

												}

												already_set_threads = 1;

								#ifdef _OPENMP

												max_threads_new = omp_get_num_procs();

								#else

												max_threads_new = -1;

								#endif

											} else if( k > 20 && directive[19] == '(' ) {

												directive[19] = '\0';

												if( strcmp(directive,"OMP_SET_NUM_THREADS") == 0 ||

												    strcmp(directive,"OMP_SET_MAX_THREADS") == 0 ) {

												    if( already_set_threads ) {

													    mexErrMsgTxt("Cannot set threads twice in same call.");

												    }

								 				    already_set_threads = 1;

													directive[ 8] = 'N';

													directive[ 9] = 'U';

													directive[10] = 'M';

								 				    directive[19] = '=';

													cp = directive + 20;

													j = 1;

													while( *cp ) {

														if( *cp == '(' ) {

															j++;

														} else if( *cp == ')' ) {

															j--;

															if( !j ) {

																*cp = ';';

																break;

															}

														}

														cp++;

													}

													if( j ) {

													    mexErrMsgTxt("Expression or statement is incorrect--possibly unbalanced (");

													}

													if( mexEvalString(directive) ) {

														mexErrMsgTxt("Unable to evaluate expression for number of threads");

													}

													ans = mexGetVariablePtr("caller","OMP_SET_NUM_THREADS");

													if( ans == NULL ) {

														mexErrMsgTxt("Unable to evaluate expression for number of threads");

													}

													j = threads = mxGetScalar(ans);

													mexEvalString("clear OMP_SET_NUM_THREADS;");

													if( j < 1 || j != threads ) {

														mexErrMsgTxt("Number of threads must be a positive integer.");

													}

													if( j > omp_get_num_procs() ) j = omp_get_num_procs();

								#ifdef _OPENMP

												    max_threads_new = j;

								#else

												    max_threads_new = -1;

								#endif

												} else {

													got_directive = 0;

												}

											} else {

												got_directive = 0;

								            }

								            mxFree(directive);

								        } else {

								            mexErrMsgTxt("Error allocating memory for directive string");

								        }

										if( got_directive ) {

											for( j=i; j<nrhs-1; j++ ) {

												prhs[j] = prhs[j+1];

											}

											nrhs--;

										} else {

											i++;

										}

									} else {

										i++;

									}

								    }

									if( mtimesx_mode_new > 0 ) {

										mtimesx_mode = mtimesx_mode_new;

									}

									if( max_threads_new > 0 ) {

										max_threads = max_threads_new;

									}

								    if( nrhs == 0 ) {

										if( mtimesx_mode_new || !max_threads_new ) {

								   		    plhs[0] = modestring(mtimesx_mode);

										} else {

								   		    plhs[0] = mxCreateDoubleScalar(max_threads);

										}

								        return;

								    }


								    if( nrhs < 2 || nrhs > 4 ) {

								        mexErrMsgTxt("Must have 2 - 4 non-directive inputs for multiply function");

								    }


								/*-------------------------------------------------------------------------

								 * Pick off the transpose character inputs. If they are missing or empty

								 * or blank, then use 'N' by default.

								 *------------------------------------------------------------------------- */


								    Araw = (mxArray *) prhs[0];

								    if( nrhs == 2 ) {

								        transa = 'N';

								        Braw = (mxArray *) prhs[1];

								        transb = 'N';

								    } else if( nrhs == 3 ) {

								        if( mxIsChar(prhs[1]) ) {

								            transa = mxArrayToTrans(prhs[1]);

								            Braw = (mxArray *) prhs[2];

								            transb = 'N';

								        } else if( mxIsChar(prhs[2]) ) {

								            transa = 'N';

								            Braw = (mxArray *) prhs[1];

								            transb = mxArrayToTrans(prhs[2]);

								        } else {

								            mexErrMsgTxt("2nd or 3rd input must be char.");

								        }

								    } else { /* nrhs == 4 */

								        if( mxIsChar(prhs[1]) && mxIsChar(prhs[3]) ) {

								            transa = mxArrayToTrans(prhs[1]);

								            Braw = (mxArray *) prhs[2];

								            transb = mxArrayToTrans(prhs[3]);

								        } else {

								            mexErrMsgTxt("2nd and 4th inputs must be char.");

								        }

								    }


								/*-----------------------------------------------------------------------------

								 * Check for valid TRANS characters

								 *----------------------------------------------------------------------------- */


								    if( transa == 'n' ) transa = 'N';

								    if( transa == 't' ) transa = 'T';

								    if( transa == 'c' ) transa = 'C';

								    if( transa == 'g' ) transa = 'G';

								    if( transb == 'n' ) transb = 'N';

								    if( transb == 't' ) transb = 'T';

								    if( transb == 'c' ) transb = 'C';

								    if( transb == 'g' ) transb = 'G';

								    if( (transa != 'N' && transa != 'T' && transa != 'C' && transa != 'G') ||

								        (transb != 'N' && transb != 'T' && transb != 'C' && transb != 'G') ) {

								        mexErrMsgTxt("Invalid TRANS character. Expected N, T, C, or G.");

								    }


								/*-----------------------------------------------------------------------------

								 * Check for proper input type and call the appropriate multiply routine.

								 * To be similar to MATLAB for mixed single-double operations, convert

								 * single inputs to double, do the calc, then convert back to single.

								 *----------------------------------------------------------------------------- */


								/* Future Enhancement: Put in special code for scalar multiplies, do calc in double */


								    if( mxIsDouble(Araw) ) {

								        if( mxIsDouble(Braw) ) {

											/*

								            if( mxIsSparse(Araw) && !mxIsSparse(Braw)  &&

								                (mxGetNumberOfElements(Araw) != 1 || mxGetNumberOfDimensions(Braw) == 2) ) {

								                k = mexCallMATLAB(1, &B, 1, &Braw, "sparse");

								                plhs[0] = DoubleTimesDouble(Araw, transa, B, transb);

								                mxDestroyArray(B);

								            } else if( !mxIsSparse(Araw) && mxIsSparse(Braw) &&

								                (mxGetNumberOfElements(Braw) != 1 || mxGetNumberOfDimensions(Araw) == 2) ) {

								                k = mexCallMATLAB(1, &A, 1, &Araw, "sparse");

								                plhs[0] = DoubleTimesDouble(A, transa, Braw, transb);

								                mxDestroyArray(A);

								            } else {

								                plhs[0] = DoubleTimesDouble(Araw, transa, Braw, transb);

								            }

											*/

								            if( mxIsSparse(Araw) && !mxIsSparse(Braw)  &&

								                mxGetNumberOfElements(Araw) == 1 && mxGetNumberOfDimensions(Braw) == 2 ) {

								                k = mexCallMATLAB(1, &B, 1, &Braw, "sparse");

								                plhs[0] = DoubleTimesDouble(Araw, transa, B, transb);

								                mxDestroyArray(B);

								            } else if( !mxIsSparse(Araw) && mxIsSparse(Braw) &&

								                mxGetNumberOfElements(Braw) == 1 && mxGetNumberOfDimensions(Araw) == 2 ) {

								                k = mexCallMATLAB(1, &A, 1, &Araw, "sparse");

								                plhs[0] = DoubleTimesDouble(A, transa, Braw, transb);

								                mxDestroyArray(A);

								            } else {

								                plhs[0] = DoubleTimesDouble(Araw, transa, Braw, transb);

								            }

								        } else if( mxIsSingle(Braw) ) {

								            if( mxIsSparse(Araw) ) {

												if( mxGetNumberOfElements(Araw) == 1 ) {

									                k = mexCallMATLAB(1, &C, 1, &Araw, "full");

									                k = mexCallMATLAB(1, &A, 1, &C, "single");

													mxDestroyArray(C);

									                plhs[0] = FloatTimesFloat(A, transa, Braw, transb);

													mxDestroyArray(A);

												} else {

													k = mexCallMATLAB(1, &B, 1, &Braw, "double");

												    plhs[0] = DoubleTimesDouble(Araw, transa, B, transb);

											        mxDestroyArray(B);

												}

								            } else if( mxGetNumberOfElements(Araw) == 1 || mxGetNumberOfElements(Braw) == 1 ) {

								                k = mexCallMATLAB(1, &B, 1, &Braw, "double");

								                C = DoubleTimesDouble(Araw, transa, B, transb);

								                mxDestroyArray(B);

								                k = mexCallMATLAB(1, plhs, 1, &C, "single");

								                mxDestroyArray(C);

								            } else {

								                k = mexCallMATLAB(1, &A, 1, &Araw, "single");

								                plhs[0] = FloatTimesFloat(A, transa, Braw, transb);

								                mxDestroyArray(A);

								            }

								        } else {

								            unsupported = 1;

								        }

								    } else if( mxIsSingle(Araw) ) {

								        if( mxIsDouble(Braw) ) {

								            if( mxIsSparse(Braw) ) {

												if( mxGetNumberOfElements(Braw) == 1 ) {

									                k = mexCallMATLAB(1, &C, 1, &Braw, "full");

									                k = mexCallMATLAB(1, &B, 1, &C, "single");

													mxDestroyArray(C);

									                plhs[0] = FloatTimesFloat(Araw, transa, B, transb);

													mxDestroyArray(B);

												} else {

													k = mexCallMATLAB(1, &A, 1, &Araw, "double");

												    plhs[0] = DoubleTimesDouble(A, transa, Braw, transb);

											        mxDestroyArray(A);

												}

								            } else if( mxGetNumberOfElements(Araw) == 1 || mxGetNumberOfElements(Braw) == 1 ) {

								                k = mexCallMATLAB(1, &A, 1, &Araw, "double");

								                C = DoubleTimesDouble(A, transa, Braw, transb);

								                mxDestroyArray(A);

								                k = mexCallMATLAB(1, plhs, 1, &C, "single");

								                mxDestroyArray(C);

								            } else {

								                k = mexCallMATLAB(1, &B, 1, &Braw, "single");

								                plhs[0] = FloatTimesFloat(Araw, transa, B, transb);

								                mxDestroyArray(B);

								            }

								        } else if( mxIsSingle(Braw) ) {

								            plhs[0] = FloatTimesFloat(Araw, transa, Braw, transb);

								        } else {

								            unsupported = 1;

								        }

								    } else {

								        unsupported = 1;

								    }

								    if( unsupported ) {

										if( debug ) {

											mexPrintf("MTIMESX: Unsupported types ... calling MATLAB intrinsic function mtimes\n");

										}

								        rhs[0] = Araw;

								        transstring[0] = transa;

								        rhs[1] = mxCreateString(transstring);

								        rhs[2] = Braw;

								        transstring[0] = transb;

								        rhs[3] = mxCreateString(transstring);

								        k = mexCallMATLAB(1, plhs, 4, rhs, "mtimesx_sparse");

								        mxDestroyArray(rhs[3]);

								        mxDestroyArray(rhs[1]);

										threads_used = 0;

								    }


								    return;


								}


								/*-------------------------------------------------------------------------------

								 *

								 * Convert mxArray char input into trans character.

								 *

								 *------------------------------------------------------------------------------- */


								char mxArrayToTrans(const mxArray *mx)

								{

								    mwSize n;

								    char *cp;

								    char trans;


								    if( cp = mxArrayToString(mx) ) {

								        n = mxGetNumberOfElements(mx);

								        if( n == 0 ) {

								            trans = 'N';  /* Treat empty char strings the same as 'N' */

								        } else {

								            trans = *cp;  /* Pick off only the first character */

								        }

								        mxFree(cp);

								    } else {

								        mexErrMsgTxt("Error allocating memory for trans string");

								    }

								    return trans;

								}


								/*-------------------------------------------------------------------------------

								 *

								 * Clean a real sparse matrix of zeros. The code is a bit ugly because it was

								 * highly optimized for speed.

								 *

								 *------------------------------------------------------------------------------- */


								mwIndex spcleanreal(mxArray *mx)

								{

								    mwSize n, nrow;

								    mwIndex *ir, *jc, *iz;

								    mwIndex j, x, y, diff;

								    double *pr, *pz;


								    n = mxGetN(mx);

								    pz = pr = mxGetData(mx);

								    ir = mxGetIr(mx);

								    jc = mxGetJc(mx);


								    diff = 0;

								    for( y=0; y<n; y++ ) {

								        nrow = jc[y+1] - jc[y];

								        for( x=0; x<nrow; x++ ) {

								            if( *pr == 0.0 ) {

								                iz = (ir += (pr - pz));

								                pz = pr;

								                j = y + 1;

								                goto copydata;

								            }

								            pr++;

								        }

								    }

								    return jc[n];


								    for( y=0; y<n; y++ ) {

								        j = y + 1;

								        nrow = (jc[j] - diff) - jc[y];

								        for( x=0; x<nrow; x++ ) {

								            if( *pr != 0.0 ) {

								                *pz++ = *pr;

								                *iz++ = *ir;

								            } else {

								                copydata:

								                diff++;

								            }

								            pr++;

								            ir++;

								        }

								        jc[j] -= diff;

								    }

								    return jc[n];

								}


								/*-------------------------------------------------------------------------------

								 *

								 * Clean a complex sparse matrix of zeros. The code is a bit ugly because it was

								 * highly optimized for speed.

								 *

								 *------------------------------------------------------------------------------- */


								mwIndex spcleancomplex(mxArray *mx)

								{

								    mwSize n, nrow;

								    mwIndex *ir, *jc, *iz;

								    mwIndex j, x, y, diff;

								    double *pr, *pz;

								    double *pi, *py;


								    n = mxGetN(mx);

								    pz = pr = mxGetData(mx);

								    py = pi = mxGetImagData(mx);

								    ir = mxGetIr(mx);

								    jc = mxGetJc(mx);


								    diff = 0;

								    for( y=0; y<n; y++ ) {

								        nrow = jc[y+1] - jc[y];

								        for( x=0; x<nrow; x++ ) {

								            if( *pr == 0.0 && *pi == 0.0 ) {

								                iz = (ir += (pr - pz));

								                pz = pr;

								                py = pi;

								                j = y + 1;

								                goto copydata;

								            }

								            pr++;

								            pi++;

								        }

								    }

								    return jc[n];


								    for( y=0; y<n; y++ ) {

								        j = y + 1;

								        nrow = (jc[j] - diff) - jc[y];

								        for( x=0; x<nrow; x++ ) {

								            if( *pr != 0.0 || *pi != 0.0 ) {

								                *pz++ = *pr;

								                *py++ = *pi;

								                *iz++ = *ir;

								            } else {

								                copydata:

								                diff++;

								            }

								            pr++;

								            pi++;

								            ir++;

								        }

								        jc[j] -= diff;

								    }

								    return jc[n];

								}


								/*------------------------------------------------------------------------------- */


								mwIndex spclean(mxArray *mx)

								{

								    if( mxIsComplex(mx) ) {

								        return spcleancomplex(mx);

								    } else {

								        return spcleanreal(mx);

								    }

								}


								/*-------------------------------------------------------------------------------

								 * myRealloc for use only in cases of decreasing a block size. Avoids using the

								 * buggy mxRealloc function for this case.

								 *------------------------------------------------------------------------------- */


								void *myRealloc(void *vp, mwSize_t n)

								{

								    void *xp;


								    if( xp = mxMalloc(n) ) { /* not really a necessary test for a mex routine */

								        memcpy(xp, vp, n);

								        mxFree(vp);

								        return xp;

								    } else {

								        return vp;

								    }

								}


								/*-------------------------------------------------------------------------------

								 * Logo.

								 *------------------------------------------------------------------------------- */


								void mtimesx_logo(void)

								{

								    mxArray *rhs[9];

								    double *x, *y, *z;

								    double r;

								    mwSize i, j;


								    /* fh = figure */

								    mexCallMATLAB(1, rhs, 0, NULL,"figure");


								    /* set(fh, 'Color', [1 1 1]) */

								    rhs[1] = mxCreateString("Color");

								    rhs[2] = mxCreateDoubleMatrix(1, 3, mxREAL);

								    x = mxGetPr(rhs[2]);

								    x[0] = x[1] = x[2] = 1.0;

								    mexCallMATLAB(0, NULL, 3, rhs, "set");

								    mxDestroyArray(rhs[0]);

								    mxDestroyArray(rhs[1]);

								    mxDestroyArray(rhs[2]);


									/* [X Y] = meshgrid(-8:0.0625:8); */

								    /* R = sqrt(X.^2 + Y.^2) + eps; */

								    /* Z = sin(R)./R; */

								    rhs[0] = mxCreateDoubleMatrix(257, 257, mxREAL);

								    rhs[1] = mxCreateDoubleMatrix(257, 257, mxREAL);

								    rhs[2] = mxCreateDoubleMatrix(257, 257, mxREAL);

								    x = mxGetPr(rhs[0]);

								    y = mxGetPr(rhs[1]);

								    z = mxGetPr(rhs[2]);

								    for( i=0; i<257; i++ ) {

								        for( j=0; j<257; j++ ) {

								            *x = -8.0 + i * 0.0625;

								            *y = -8.0 + j * 0.0625;

								            r = sqrt((*x)*(*x) + (*y)*(*y)) + 2.22e-16;

								            *z++ = sin(r) / r;

								            x++;

								            y++;

								        }

								    }


								    /* surf(X,Y,Z,'FaceColor','interp',... */

									/* 'EdgeColor','none',... */

									/* 'FaceLighting','phong') */

								    rhs[3] = mxCreateString("FaceColor");

								    rhs[4] = mxCreateString("interp");

								    rhs[5] = mxCreateString("EdgeColor");

								    rhs[6] = mxCreateString("none");

								    rhs[7] = mxCreateString("FaceLighting");

								    rhs[8] = mxCreateString("phong");

								    mexCallMATLAB(0, NULL, 9, rhs, "surf");

								    for( i=0; i<9; i++ ) {

								        mxDestroyArray(rhs[i]);

								    }


								    /* daspect([5 5 1]) */

								    rhs[0] = mxCreateDoubleMatrix(1, 3, mxREAL);

								    x = mxGetPr(rhs[0]);

								    x[0] = 5.0;

								    x[1] = 5.0;

								    x[2] = 1.0;

								    mexCallMATLAB(0, NULL, 1, rhs, "daspect");

								    mxDestroyArray(rhs[0]);


									/* axis('tight') */

								    rhs[0] = mxCreateString("tight");

								    mexCallMATLAB(0, NULL, 1, rhs, "axis");

								    mxDestroyArray(rhs[0]);


									/* axis('off') */

								    rhs[0] = mxCreateString("off");

								    mexCallMATLAB(0, NULL, 1, rhs, "axis");

								    mxDestroyArray(rhs[0]);


									/* view([-50 30]) */

								    rhs[0] = mxCreateDoubleMatrix(1, 2, mxREAL);

								    x = mxGetPr(rhs[0]);

								    x[0] = -50.0;

								    x[1] =  30.0;

								    mexCallMATLAB(0, NULL, 1, rhs, "view");

								    mxDestroyArray(rhs[0]);


									/* camlight('left') */

								    rhs[0] = mxCreateString("left");

								    mexCallMATLAB(0, NULL, 1, rhs, "camlight");

								    mxDestroyArray(rhs[0]);

								}


								/*-------------------------------------------------------------------------------

								 * Construct mode string.

								 *------------------------------------------------------------------------------- */


								mxArray *modestring(int m)

								{


									switch( m )

									{

									case MTIMESX_BLAS:

								        return mxCreateString("BLAS");

									case MTIMESX_LOOPS:

								        return mxCreateString("LOOPS");

									case MTIMESX_LOOPS_OMP:

								        return mxCreateString("LOOPSOMP");

									case MTIMESX_MATLAB:

								        return mxCreateString("MATLAB");

									case MTIMESX_SPEED:

								        return mxCreateString("SPEED");

									case MTIMESX_SPEED_OMP:

								        return mxCreateString("SPEEDOMP");

									}

									return mxCreateString("");

								}


								/*-------------------------------------------------------------------------------

								 * Construct thread string.

								 *------------------------------------------------------------------------------- */


								mxArray *threadstring(int t)

								{

									char ompstring[] = "OMP_SET_NUM_THREADS(___)";

									int k;


									if( t > 999 ) {

										return mxCreateString("OMP_SET_NUM_THREADS");

									} else {

										k = 20;

										if( max_threads > 99 ) {

											ompstring[k++] = (t / 100) + '0';

											t /= 10;

										}

										if( t > 9 ) {

											ompstring[k++] = (t / 10) + '0';

											t /= 10;

										}

										ompstring[k++] = t  + '0';

										ompstring[k++] = ')';

										ompstring[k  ] = '\0';

										return mxCreateString(ompstring);

									}

								}


								/*-------------------------------------------------------------------------

								 * Macros to define CAPITAL letters as the address of the lower case letter

								 * This will make the function calls look much cleaner

								 *------------------------------------------------------------------------- */


								#define  M         &m

								#define  K         &k

								#define  L         &l

								#define  N         &n

								#define  M1        &m1

								#define  N1        &n1

								#define  M2        &m2

								#define  N2        &n2

								#define  AR        &ar

								#define  AI        &ai

								#define  BI        &bi

								#define  AIBI      &aibi

								#define  LDA       &lda

								#define  LDB       &ldb

								#define  LDC       &ldc

								#define  ZERO      &Zero

								#define  ONE       &One

								#define  MINUSONE  &Minusone

								#define  TRANSA    &transa

								#define  TRANSB    &transb

								#define  TRANS     &trans

								#define  PTRANSA   &ptransa

								#define  PTRANSB   &ptransb

								#define  INCX      &inc

								#define  INCY      &inc

								#define  ALPHA     &alpha

								#define  UPLO      &uplo


								/*-------------------------------------------------------------------------

								 * Number of bytes that a sparse matrix result has to drop by before a

								 * realloc will be performed on the data area(s).

								 *------------------------------------------------------------------------- */


								#define  REALLOCTOL  1024


								/*-------------------------------------------------------------------------

								 * Number of elements in a dot product must be at least this much before

								 * the calculation will be multi-threaded even when OMP is enabled.

								 *------------------------------------------------------------------------- */


								#ifdef __LCC__

								#define  OMP_DOT_SMALL  182000

								#else

								#define  OMP_DOT_SMALL  126000

								#endif


								/*-------------------------------------------------------------------------

								 * Number of elements in the temporary dot product storage array. Used to

								 * ensure multi-threaded results are consistent from run to run. Prefer

								 * using this as opposed to using atomic or reduction methods which would

								 * not guarantee consistent results from run to run.

								 *------------------------------------------------------------------------- */


								#define  OMP_DOT_ARRAY_SIZE  256


								/*-------------------------------------------------------------------------

								 * Number of elements in a scalar product must be at least this much before

								 * the calculation will be multi-threaded even when OMP is enabled.

								 *------------------------------------------------------------------------- */


								#define  OMP_SCALAR_SMALL  100000


								/*-------------------------------------------------------------------------

								 * Number of elements in an outer product must be at least this much before

								 * the calculation will be multi-threaded even when OMP is enabled.

								 *------------------------------------------------------------------------- */


								#define  OMP_OUTER_SMALL  100000


								/*-------------------------------------------------------------------------

								 * Number of loops in special multiply cases must be at least this much

								 * before the calculation will be multi-threaded even when OMP is enabled.

								 *------------------------------------------------------------------------- */


								#define  OMP_SPECIAL_SMALL  1000


								/*-------------------------------------------------------------------------

								 * Macros for the double * double function. All of the generic function

								 * names and variable types in the file mtimesx_RealTimesReal.c are to be

								 * replace with these names specific to the double type.

								 *------------------------------------------------------------------------- */


								#define  MatlabReturnType  mxDOUBLE_CLASS

								#define  RealTimesReal  DoubleTimesDouble

								#define  RealTimesScalar  DoubleTimesScalar

								#define  RealTimesScalarX DoubleTimesScalarX

								#define  AllRealZero  AllDoubleZero

								#define  RealKindEqP1P0TimesRealKindN  DoubleImagEqP1P0TimesDoubleImagN

								#define  RealKindEqP1P0TimesRealKindG  DoubleImagEqP1P0TimesDoubleImagG

								#define  RealKindEqP1P0TimesRealKindT  DoubleImagEqP1P0TimesDoubleImagT

								#define  RealKindEqP1P0TimesRealKindC  DoubleImagEqP1P0TimesDoubleImagC

								#define  RealKindEqP1P1TimesRealKindN  DoubleImagEqP1P1TimesDoubleImagN

								#define  RealKindEqP1P1TimesRealKindG  DoubleImagEqP1P1TimesDoubleImagG

								#define  RealKindEqP1P1TimesRealKindT  DoubleImagEqP1P1TimesDoubleImagT

								#define  RealKindEqP1P1TimesRealKindC  DoubleImagEqP1P1TimesDoubleImagC

								#define  RealKindEqP1M1TimesRealKindN  DoubleImagEqP1M1TimesDoubleImagN

								#define  RealKindEqP1M1TimesRealKindG  DoubleImagEqP1M1TimesDoubleImagG

								#define  RealKindEqP1M1TimesRealKindT  DoubleImagEqP1M1TimesDoubleImagT

								#define  RealKindEqP1M1TimesRealKindC  DoubleImagEqP1M1TimesDoubleImagC

								#define  RealKindEqP1PxTimesRealKindN  DoubleImagEqP1PxTimesDoubleImagN

								#define  RealKindEqP1PxTimesRealKindG  DoubleImagEqP1PxTimesDoubleImagG

								#define  RealKindEqP1PxTimesRealKindT  DoubleImagEqP1PxTimesDoubleImagT

								#define  RealKindEqP1PxTimesRealKindC  DoubleImagEqP1PxTimesDoubleImagC

								#define  RealKindEqM1P1TimesRealKindN  DoubleImagEqM1P1TimesDoubleImagN

								#define  RealKindEqM1P1TimesRealKindG  DoubleImagEqM1P1TimesDoubleImagG

								#define  RealKindEqM1P1TimesRealKindT  DoubleImagEqM1P1TimesDoubleImagT

								#define  RealKindEqM1P1TimesRealKindC  DoubleImagEqM1P1TimesDoubleImagC

								#define  RealKindEqM1M1TimesRealKindN  DoubleImagEqM1M1TimesDoubleImagN

								#define  RealKindEqM1M1TimesRealKindG  DoubleImagEqM1M1TimesDoubleImagG

								#define  RealKindEqM1M1TimesRealKindT  DoubleImagEqM1M1TimesDoubleImagT

								#define  RealKindEqM1M1TimesRealKindC  DoubleImagEqM1M1TimesDoubleImagC

								#define  RealKindEqM1PxTimesRealKindN  DoubleImagEqM1PxTimesDoubleImagN

								#define  RealKindEqM1PxTimesRealKindG  DoubleImagEqM1PxTimesDoubleImagG

								#define  RealKindEqM1PxTimesRealKindT  DoubleImagEqM1PxTimesDoubleImagT

								#define  RealKindEqM1PxTimesRealKindC  DoubleImagEqM1PxTimesDoubleImagC

								#define  RealKindEqM1P0TimesRealKindN  DoubleImagEqM1P0TimesDoubleImagN

								#define  RealKindEqM1P0TimesRealKindG  DoubleImagEqM1P0TimesDoubleImagG

								#define  RealKindEqM1P0TimesRealKindT  DoubleImagEqM1P0TimesDoubleImagT

								#define  RealKindEqM1P0TimesRealKindC  DoubleImagEqM1P0TimesDoubleImagC

								#define  RealKindEqPxP1TimesRealKindN  DoubleImagEqPxP1TimesDoubleImagN

								#define  RealKindEqPxP1TimesRealKindG  DoubleImagEqPxP1TimesDoubleImagG

								#define  RealKindEqPxP1TimesRealKindT  DoubleImagEqPxP1TimesDoubleImagT

								#define  RealKindEqPxP1TimesRealKindC  DoubleImagEqPxP1TimesDoubleImagC

								#define  RealKindEqPxM1TimesRealKindN  DoubleImagEqPxM1TimesDoubleImagN

								#define  RealKindEqPxM1TimesRealKindG  DoubleImagEqPxM1TimesDoubleImagG

								#define  RealKindEqPxM1TimesRealKindT  DoubleImagEqPxM1TimesDoubleImagT

								#define  RealKindEqPxM1TimesRealKindC  DoubleImagEqPxM1TimesDoubleImagC

								#define  RealKindEqPxP0TimesRealKindN  DoubleImagEqPxP0TimesDoubleImagN

								#define  RealKindEqPxP0TimesRealKindG  DoubleImagEqPxP0TimesDoubleImagG

								#define  RealKindEqPxP0TimesRealKindT  DoubleImagEqPxP0TimesDoubleImagT

								#define  RealKindEqPxP0TimesRealKindC  DoubleImagEqPxP0TimesDoubleImagC

								#define  RealKindEqPxPxTimesRealKindN  DoubleImagEqPxPxTimesDoubleImagN

								#define  RealKindEqPxPxTimesRealKindG  DoubleImagEqPxPxTimesDoubleImagG

								#define  RealKindEqPxPxTimesRealKindT  DoubleImagEqPxPxTimesDoubleImagT

								#define  RealKindEqPxPxTimesRealKindC  DoubleImagEqPxPxTimesDoubleImagC


								#if defined(_WIN32) || defined(_WIN64)

								#define  xDOT      ddot

								#define  xGEMV     dgemv

								#define  xGEMM     dgemm

								#define  xSYRK     dsyrk

								#define  xSYR2K    dsyr2k

								#define  xAXPY     daxpy

								#define  xGER      dger

								#define  xSYR      dsyr

								#define  xSYR2     dsyr2

								#else

								#define  xDOT      ddot_

								#define  xGEMV     dgemv_

								#define  xGEMM     dgemm_

								#define  xSYRK     dsyrk_

								#define  xSYR2K    dsyr2k_

								#define  xAXPY     daxpy_

								#define  xGER      dger_

								#define  xSYR      dsyr_

								#define  xSYR2     dsyr2_

								#endif


								#define  xFILLPOS  dfillpos

								#define  xFILLNEG  dfillneg

								#define  zero      0.0

								#define  one       1.0

								#define  minusone -1.0

								#define  RealKind              double

								#define  RealKindComplex       doublecomplex

								#define  RealKindDotProduct    doublecomplexdotproduct

								#define  RealKindDotProductX   doublecomplexdotproductx

								#define  RealKindOuterProduct  doublecomplexouterproduct

								#define  RealKindOuterProductX doublecomplexouterproductx

								#define  RealScalarTimesReal   doublescalartimesdouble

								#define  RealKindZZ            doubleZZ


								#include  "mtimesx_RealTimesReal.c"


								/*----------------------------------------------------------------------

								 * Undefine all of the double specific macros

								 *---------------------------------------------------------------------- */


								#undef  MatlabReturnType

								#undef  RealTimesReal

								#undef  RealTimesScalar

								#undef  RealTimesScalarX

								#undef  AllRealZero

								#undef  RealKindEqP1P0TimesRealKindN

								#undef  RealKindEqP1P0TimesRealKindG

								#undef  RealKindEqP1P0TimesRealKindT

								#undef  RealKindEqP1P0TimesRealKindC

								#undef  RealKindEqP1P1TimesRealKindN

								#undef  RealKindEqP1P1TimesRealKindG

								#undef  RealKindEqP1P1TimesRealKindT

								#undef  RealKindEqP1P1TimesRealKindC

								#undef  RealKindEqP1M1TimesRealKindN

								#undef  RealKindEqP1M1TimesRealKindG

								#undef  RealKindEqP1M1TimesRealKindT

								#undef  RealKindEqP1M1TimesRealKindC

								#undef  RealKindEqP1PxTimesRealKindN

								#undef  RealKindEqP1PxTimesRealKindG

								#undef  RealKindEqP1PxTimesRealKindT

								#undef  RealKindEqP1PxTimesRealKindC

								#undef  RealKindEqM1P1TimesRealKindN

								#undef  RealKindEqM1P1TimesRealKindG

								#undef  RealKindEqM1P1TimesRealKindT

								#undef  RealKindEqM1P1TimesRealKindC

								#undef  RealKindEqM1M1TimesRealKindN

								#undef  RealKindEqM1M1TimesRealKindG

								#undef  RealKindEqM1M1TimesRealKindT

								#undef  RealKindEqM1M1TimesRealKindC

								#undef  RealKindEqM1P0TimesRealKindN

								#undef  RealKindEqM1P0TimesRealKindG

								#undef  RealKindEqM1P0TimesRealKindT

								#undef  RealKindEqM1P0TimesRealKindC

								#undef  RealKindEqM1PxTimesRealKindN

								#undef  RealKindEqM1PxTimesRealKindG

								#undef  RealKindEqM1PxTimesRealKindT

								#undef  RealKindEqM1PxTimesRealKindC

								#undef  RealKindEqPxP1TimesRealKindN

								#undef  RealKindEqPxP1TimesRealKindG

								#undef  RealKindEqPxP1TimesRealKindT

								#undef  RealKindEqPxP1TimesRealKindC

								#undef  RealKindEqPxM1TimesRealKindN

								#undef  RealKindEqPxM1TimesRealKindG

								#undef  RealKindEqPxM1TimesRealKindT

								#undef  RealKindEqPxM1TimesRealKindC

								#undef  RealKindEqPxP0TimesRealKindN

								#undef  RealKindEqPxP0TimesRealKindG

								#undef  RealKindEqPxP0TimesRealKindT

								#undef  RealKindEqPxP0TimesRealKindC

								#undef  RealKindEqPxPxTimesRealKindN

								#undef  RealKindEqPxPxTimesRealKindG

								#undef  RealKindEqPxPxTimesRealKindT

								#undef  RealKindEqPxPxTimesRealKindC

								#undef  xDOT

								#undef  xGEMV

								#undef  xGEMM

								#undef  xSYRK

								#undef  xSYR2K

								#undef  xAXPY

								#undef  xGER

								#undef  xSYR

								#undef  xSYR2

								#undef  xFILLPOS

								#undef  xFILLNEG

								#undef  zero

								#undef  one

								#undef  minusone

								#undef  RealKind

								#undef  RealKindComplex

								#undef  RealKindDotProduct

								#undef  RealKindDotProductX

								#undef  RealKindOuterProduct

								#undef  RealKindOuterProductX

								#undef  RealScalarTimesReal

								#undef  RealKindZZ


								/*-------------------------------------------------------------------------

								 * Macros for the single * single function. All of the generic function

								 * names and variable types in the file mtimesx_RealTimesReal.c are to be

								 * replaced with these names specific to the single type.

								 *------------------------------------------------------------------------- */


								#define  MatlabReturnType  mxSINGLE_CLASS

								#define  RealTimesReal  FloatTimesFloat

								#define  RealTimesScalar  FloatTimesScalar

								#define  RealTimesScalarX FloatTimesScalarX

								#define  AllRealZero  AllFloatZero

								#define  RealKindEqP1P0TimesRealKindN  FloatImagEqP1P0TimesFloatImagN

								#define  RealKindEqP1P0TimesRealKindG  FloatImagEqP1P0TimesFloatImagG

								#define  RealKindEqP1P0TimesRealKindT  FloatImagEqP1P0TimesFloatImagT

								#define  RealKindEqP1P0TimesRealKindC  FloatImagEqP1P0TimesFloatImagC

								#define  RealKindEqP1P1TimesRealKindN  FloatImagEqP1P1TimesFloatImagN

								#define  RealKindEqP1P1TimesRealKindG  FloatImagEqP1P1TimesFloatImagG

								#define  RealKindEqP1P1TimesRealKindT  FloatImagEqP1P1TimesFloatImagT

								#define  RealKindEqP1P1TimesRealKindC  FloatImagEqP1P1TimesFloatImagC

								#define  RealKindEqP1M1TimesRealKindN  FloatImagEqP1M1TimesFloatImagN

								#define  RealKindEqP1M1TimesRealKindG  FloatImagEqP1M1TimesFloatImagG

								#define  RealKindEqP1M1TimesRealKindT  FloatImagEqP1M1TimesFloatImagT

								#define  RealKindEqP1M1TimesRealKindC  FloatImagEqP1M1TimesFloatImagC

								#define  RealKindEqP1PxTimesRealKindN  FloatImagEqP1PxTimesFloatImagN

								#define  RealKindEqP1PxTimesRealKindG  FloatImagEqP1PxTimesFloatImagG

								#define  RealKindEqP1PxTimesRealKindT  FloatImagEqP1PxTimesFloatImagT

								#define  RealKindEqP1PxTimesRealKindC  FloatImagEqP1PxTimesFloatImagC

								#define  RealKindEqM1P1TimesRealKindN  FloatImagEqM1P1TimesFloatImagN

								#define  RealKindEqM1P1TimesRealKindG  FloatImagEqM1P1TimesFloatImagG

								#define  RealKindEqM1P1TimesRealKindT  FloatImagEqM1P1TimesFloatImagT

								#define  RealKindEqM1P1TimesRealKindC  FloatImagEqM1P1TimesFloatImagC

								#define  RealKindEqM1M1TimesRealKindN  FloatImagEqM1M1TimesFloatImagN

								#define  RealKindEqM1M1TimesRealKindG  FloatImagEqM1M1TimesFloatImagG

								#define  RealKindEqM1M1TimesRealKindT  FloatImagEqM1M1TimesFloatImagT

								#define  RealKindEqM1M1TimesRealKindC  FloatImagEqM1M1TimesFloatImagC

								#define  RealKindEqM1P0TimesRealKindN  FloatImagEqM1P0TimesFloatImagN

								#define  RealKindEqM1P0TimesRealKindG  FloatImagEqM1P0TimesFloatImagG

								#define  RealKindEqM1P0TimesRealKindT  FloatImagEqM1P0TimesFloatImagT

								#define  RealKindEqM1P0TimesRealKindC  FloatImagEqM1P0TimesFloatImagC

								#define  RealKindEqM1PxTimesRealKindN  FloatImagEqM1PxTimesFloatImagN

								#define  RealKindEqM1PxTimesRealKindG  FloatImagEqM1PxTimesFloatImagG

								#define  RealKindEqM1PxTimesRealKindT  FloatImagEqM1PxTimesFloatImagT

								#define  RealKindEqM1PxTimesRealKindC  FloatImagEqM1PxTimesFloatImagC

								#define  RealKindEqPxP1TimesRealKindN  FloatImagEqPxP1TimesFloatImagN

								#define  RealKindEqPxP1TimesRealKindG  FloatImagEqPxP1TimesFloatImagG

								#define  RealKindEqPxP1TimesRealKindT  FloatImagEqPxP1TimesFloatImagT

								#define  RealKindEqPxP1TimesRealKindC  FloatImagEqPxP1TimesFloatImagC

								#define  RealKindEqPxM1TimesRealKindN  FloatImagEqPxM1TimesFloatImagN

								#define  RealKindEqPxM1TimesRealKindG  FloatImagEqPxM1TimesFloatImagG

								#define  RealKindEqPxM1TimesRealKindT  FloatImagEqPxM1TimesFloatImagT

								#define  RealKindEqPxM1TimesRealKindC  FloatImagEqPxM1TimesFloatImagC

								#define  RealKindEqPxP0TimesRealKindN  FloatImagEqPxP0TimesFloatImagN

								#define  RealKindEqPxP0TimesRealKindG  FloatImagEqPxP0TimesFloatImagG

								#define  RealKindEqPxP0TimesRealKindT  FloatImagEqPxP0TimesFloatImagT

								#define  RealKindEqPxP0TimesRealKindC  FloatImagEqPxP0TimesFloatImagC

								#define  RealKindEqPxPxTimesRealKindN  FloatImagEqPxPxTimesFloatImagN

								#define  RealKindEqPxPxTimesRealKindG  FloatImagEqPxPxTimesFloatImagG

								#define  RealKindEqPxPxTimesRealKindT  FloatImagEqPxPxTimesFloatImagT

								#define  RealKindEqPxPxTimesRealKindC  FloatImagEqPxPxTimesFloatImagC


								#if defined(_WIN32) || defined(_WIN64)

								#define  xDOT      sdot

								#define  xGEMV     sgemv

								#define  xGEMM     sgemm

								#define  xSYRK     ssyrk

								#define  xSYR2K    ssyr2k

								#define  xAXPY     saxpy

								#define  xGER      sger

								#define  xSYR      ssyr

								#define  xSYR2     ssyr2

								#else

								#define  xDOT      sdot_

								#define  xGEMV     sgemv_

								#define  xGEMM     sgemm_

								#define  xSYRK     ssyrk_

								#define  xSYR2K    ssyr2k_

								#define  xAXPY     saxpy_

								#define  xGER      sger_

								#define  xSYR      ssyr_

								#define  xSYR2     ssyr2_

								#endif


								#define  xFILLPOS  sfillpos

								#define  xFILLNEG  sfillneg

								#define  zero       0.0f

								#define  one        1.0f

								#define  minusone  -1.0f

								#define  RealKind              float

								#define  RealKindComplex       floatcomplex

								#define  RealKindDotProduct    floatcomplexdotproduct

								#define  RealKindDotProductX   floatcomplexdotproductx

								#define  RealKindOuterProduct  floatcomplexouterproduct

								#define  RealKindOuterProductX floatcomplexouterproductx

								#define  RealScalarTimesReal   floatscalartimesfloat

								#define  RealKindZZ            floatZZ


								#include  "mtimesx_RealTimesReal.c"