1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
// This code contains NVIDIA Confidential Information and is disclosed
// under the Mutual Non-Disclosure Agreement.
//
// Notice
// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
//
// NVIDIA Corporation assumes no responsibility for the consequences of use of such
// information or for any infringement of patents or other rights of third parties that may
// result from its use. No license is granted by implication or otherwise under any patent
// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
// expressly authorized by NVIDIA. Details are subject to change without notice.
// This code supersedes and replaces all information previously supplied.
// NVIDIA Corporation products are not authorized for use as critical
// components in life support devices or systems without express written approval of
// NVIDIA Corporation.
//
// Copyright � 2008- 2013 NVIDIA Corporation. All rights reserved.
//
// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
// rights in and to this software and related documentation and any modifications thereto.
// Any use, reproduction, disclosure or distribution of this software and related
// documentation without an express license agreement from NVIDIA Corporation is
// strictly prohibited.
//
#ifndef _NVWAVEWORKS_FLOAT16_UTIL_H
#define _NVWAVEWORKS_FLOAT16_UTIL_H
#include "simd/Simd4f.h"
#include "simd/Simd4i.h"
namespace GFSDK_WaveWorks_Float16_Util
{
inline void float16(gfsdk_U16* __restrict out, const float in)
{
// Non-SIMD implementation
gfsdk_U32 fltInt32 = *((gfsdk_U32*)&in);
gfsdk_U16 fltInt16 = (fltInt32 >> 31) << 5;
gfsdk_U16 tmp = (fltInt32 >> 23) & 0xff;
tmp = (tmp - 0x70) & (gfsdk_U32((int)(0x70 - tmp) >> 4) >> 27);
fltInt16 = (fltInt16 | tmp) << 10;
fltInt16 |= (fltInt32 >> 13) & 0x3ff;
*((gfsdk_U16*)out) = (gfsdk_U16)fltInt16;
};
inline void float16x4(gfsdk_U16* __restrict out, const Simd4f in)
{
// SIMD implementation
Simd4i fltInt32 = *((Simd4i*)&in);
Simd4i fltInt16 = (fltInt32 >> 31) << 5;
Simd4i tmp = (fltInt32 >> 23) & simd4i(0xff);
Simd4i p = simd4i(0x70);
Simd4i signmask_5bits = ((simdi::operator-(p,tmp)) >> 16) & simd4i(0x0000001f);
tmp = (simdi::operator-(tmp,p)) & signmask_5bits;
fltInt16 = (fltInt16 | tmp) << 10;
fltInt16 = fltInt16 | ((fltInt32 >> 13) & simd4i(0x3ff));
gfsdk_U32* result = (gfsdk_U32*)&fltInt16;
*((gfsdk_U16*)out + 0) = (gfsdk_U16)(*(result+0));
*((gfsdk_U16*)out + 1) = (gfsdk_U16)(*(result+1));
*((gfsdk_U16*)out + 2) = (gfsdk_U16)(*(result+2));
*((gfsdk_U16*)out + 3) = (gfsdk_U16)(*(result+3));
};
inline float float32(const gfsdk_U16 in)
{
gfsdk_U32 fltInt16 = in;
gfsdk_U32 fltInt32 = gfsdk_U32(fltInt16 >> 15) << 8;
gfsdk_U32 tmp = (fltInt16 >> 10) & 0x1f;
tmp = (tmp + 0x70); // TODO: doesn't handle specials...
fltInt32 = (fltInt32 | tmp) << 23;
fltInt32 |= (fltInt16 << 13) & 0x7fffff;
float result;
*((gfsdk_U32*)&result) = fltInt32;
return result;
}
inline gfsdk_float4 float32x4(const gfsdk_U16* __restrict in)
{
gfsdk_float4 result;
result.x = float32(in[0]);
result.y = float32(in[1]);
result.z = float32(in[2]);
result.w = float32(in[3]);
return result;
}
};
#endif // _NVWAVEWORKS_SIMULATION_UTIL_H
|