/*
* Copyright (c) 2012 Hypertriton, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Operations on vectors in R^3 using Streaming SIMD Extensions.
*/
#include
#ifdef HAVE_SSE
#include
#include
const M_VectorOps3 mVecOps3_SSE = {
"sse",
M_VectorZero3_SSE, /* -16 clks */
M_VectorGet3_SSE, /* = (sets w=0) */
M_VectorSet3_SSE, /* = (sets w=0) */
M_VectorCopy3_SSE, /* = */
M_VectorFlip3_SSE, /* = */
M_VectorLen3_SSE, /* = */
M_VectorLen3p_SSE, /* -3 clks */
M_VectorDot3_SSE, /* +14 clks (SSE3) */
M_VectorDot3p_SSE, /* -4 clks (SSE3) */
M_VectorDistance3_SSE, /* -55 clks */
M_VectorDistance3p_SSE, /* -120 clks */
M_VectorNorm3_SSE, /* -105 clks */
M_VectorNorm3p_SSE, /* -87 clks */
M_VectorNorm3v_SSE, /* -67 clks */
M_VectorCross3_SSE, /* = */
M_VectorCross3p_SSE, /* -20 clks */
M_VectorNormCross3_SSE, /* -42 clks */
M_VectorNormCross3p_SSE, /* -40 clks */
M_VectorScale3_SSE, /* -27 clks */
M_VectorScale3p_SSE, /* -15 clks */
M_VectorScale3v_SSE, /* -29 clks */
M_VectorAdd3_SSE, /* -29 clks */
M_VectorAdd3p_SSE, /* -15 clks */
M_VectorAdd3v_SSE, /* -3 clks */
M_VectorSum3_SSE, /* -58 clks (100 vecs) */
M_VectorSub3_SSE, /* -29 clks */
M_VectorSub3p_SSE, /* -15 clks */
M_VectorSub3v_SSE, /* -3 clks */
M_VectorAvg3_SSE, /* +11 clks */
M_VectorAvg3p_SSE, /* -9 clks */
M_VectorLERP3_SSE, /* */
M_VectorLERP3p_SSE, /* */
M_VectorElemPow3_SSE, /* */
M_VectorVecAngle3_SSE /* */
};
#endif /* HAVE_SSE */