// This code contains NVIDIA Confidential Information and is disclosed 
// under the Mutual Non-Disclosure Agreement. 
// 
// Notice 
// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
// 
// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
// information or for any infringement of patents or other rights of third parties that may 
// result from its use. No license is granted by implication or otherwise under any patent 
// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
// expressly authorized by NVIDIA.  Details are subject to change without notice. 
// This code supersedes and replaces all information previously supplied. 
// NVIDIA Corporation products are not authorized for use as critical 
// components in life support devices or systems without express written approval of 
// NVIDIA Corporation. 
// 
// Copyright © 2008- 2013 NVIDIA Corporation. All rights reserved.
//
// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
// rights in and to this software and related documentation and any modifications thereto.
// Any use, reproduction, disclosure or distribution of this software and related
// documentation without an express license agreement from NVIDIA Corporation is
// strictly prohibited.
//

#ifndef _NVWAVEWORKS_FLOAT16_UTIL_H
#define _NVWAVEWORKS_FLOAT16_UTIL_H

#include "simd/Simd4f.h"
#include "simd/Simd4i.h"

namespace GFSDK_WaveWorks_Float16_Util
{
	inline void float16(gfsdk_U16* __restrict out, const float in)
	{
		// Non-SIMD implementation
		gfsdk_U32 fltInt32 = *((gfsdk_U32*)&in);
		gfsdk_U16 fltInt16 = (fltInt32 >> 31) << 5;
		gfsdk_U16 tmp = (fltInt32 >> 23) & 0xff;
		tmp = (tmp - 0x70) & (gfsdk_U32((int)(0x70 - tmp) >> 4) >> 27);
		fltInt16 = (fltInt16 | tmp) << 10;
		fltInt16 |= (fltInt32 >> 13) & 0x3ff;
		*((gfsdk_U16*)out) = (gfsdk_U16)fltInt16;
	};

	inline void float16x4(gfsdk_U16* __restrict out, const Simd4f in)
	{
		// SIMD implementation
		Simd4i fltInt32 = *((Simd4i*)&in);
		Simd4i fltInt16 = (fltInt32 >> 31) << 5;
		Simd4i tmp = (fltInt32 >> 23) & simd4i(0xff);
		Simd4i p = simd4i(0x70);
		Simd4i signmask_5bits = ((simdi::operator-(p,tmp)) >> 16) & simd4i(0x0000001f);
		tmp = (simdi::operator-(tmp,p)) & signmask_5bits;
		fltInt16 = (fltInt16 | tmp) << 10;
		fltInt16 = fltInt16 | ((fltInt32 >> 13) &  simd4i(0x3ff));
		gfsdk_U32* result = (gfsdk_U32*)&fltInt16;
		*((gfsdk_U16*)out + 0) = (gfsdk_U16)(*(result+0));
		*((gfsdk_U16*)out + 1) = (gfsdk_U16)(*(result+1));
		*((gfsdk_U16*)out + 2) = (gfsdk_U16)(*(result+2));
		*((gfsdk_U16*)out + 3) = (gfsdk_U16)(*(result+3));
	};

	inline float float32(const gfsdk_U16 in)
	{
		gfsdk_U32 fltInt16 = in;
		gfsdk_U32 fltInt32 = gfsdk_U32(fltInt16 >> 15) << 8;
		gfsdk_U32 tmp = (fltInt16 >> 10) & 0x1f;
		tmp = (tmp + 0x70);								// TODO: doesn't handle specials...
		fltInt32 = (fltInt32 | tmp) << 23;
		fltInt32 |= (fltInt16 << 13) & 0x7fffff;	

		float result;
		*((gfsdk_U32*)&result) = fltInt32;
		return result;
	}

	inline gfsdk_float4 float32x4(const gfsdk_U16* __restrict in)
	{
		gfsdk_float4 result;
		result.x = float32(in[0]);
		result.y = float32(in[1]);
		result.z = float32(in[2]);
		result.w = float32(in[3]);
		return result;
	}
};

#endif	// _NVWAVEWORKS_SIMULATION_UTIL_H