Commit 2e60eef8 authored by Danillo Bracco Graziosi's avatar Danillo Bracco Graziosi Committed by David Flynn
Browse files

raht/m44486: fixed-point implementation

This replaces the previous floating point transform implementation with
a fixed-point alternative with essentially identical compression
performance.
parent cecfbf46
......@@ -40,6 +40,7 @@
#include "entropy.h"
#include "io_hls.h"
#include "RAHT.h"
#include "FixedPoint.h"
namespace pcc {
......@@ -352,8 +353,11 @@ AttributeDecoder::decodeReflectancesRaht(
PCCPointSet3& pointCloud)
{
const int voxelCount = int(pointCloud.getPointCount());
uint64_t* weight = new uint64_t[voxelCount];
int* binaryLayer = new int[voxelCount];
std::vector<MortonCodeWithIndex> packedVoxel(voxelCount);
for (int n = 0; n < voxelCount; n++) {
weight[n] = 1;
const auto position = pointCloud[n];
int x = int(position[0]);
int y = int(position[1]);
......@@ -369,55 +373,34 @@ AttributeDecoder::decodeReflectancesRaht(
}
sort(packedVoxel.begin(), packedVoxel.end());
// Moroton codes
// Morton codes
long long* mortonCode = new long long[voxelCount];
for (int n = 0; n < voxelCount; n++) {
mortonCode[n] = packedVoxel[n].mortonCode;
}
// Re-obtain weights at the decoder by calling RAHT without any attributes.
float* weight = new float[voxelCount];
int* binaryLayer = new int[voxelCount];
regionAdaptiveHierarchicalTransform(
mortonCode, nullptr, weight, binaryLayer, 0, voxelCount, aps.raht_depth);
// Sort integerized attributes by weight
std::vector<WeightWithIndex> sortedWeight(voxelCount);
for (int n = 0; n < voxelCount; n++) {
sortedWeight[n].weight = weight[n];
sortedWeight[n].index = n;
}
sort(sortedWeight.begin(), sortedWeight.end());
// Entropy decode
int* sortedIntegerizedAttributes = new int[voxelCount];
const int attribCount = 1;
uint32_t value;
int* integerizedAttributes = new int[attribCount * voxelCount];
for (int n = 0; n < voxelCount; ++n) {
const uint32_t attValue0 = decoder.decode();
sortedIntegerizedAttributes[n] = UIntToInt(attValue0);
value = decoder.decode();
integerizedAttributes[n] = UIntToInt(value);
}
// Unsort integerized attributes by weight.
int* integerizedAttributes = new int[voxelCount];
for (int n = 0; n < voxelCount; n++) {
integerizedAttributes[sortedWeight[n].index] =
sortedIntegerizedAttributes[n];
}
// Inverse Quantize.
float* attributes = new float[voxelCount];
for (int n = 0; n < voxelCount; n++) {
attributes[n] = integerizedAttributes[n] * aps.quant_step_size_luma;
}
FixedPoint* attributes = new FixedPoint[attribCount * voxelCount];
regionAdaptiveHierarchicalInverseTransform(
mortonCode, attributes, 1, voxelCount, aps.raht_depth);
FixedPoint(aps.quant_step_size_luma), mortonCode, attributes, weight,
attribCount, voxelCount, integerizedAttributes);
const int maxReflectance = (1 << desc.attr_bitdepth) - 1;
const int minReflectance = 0;
const int64_t maxReflectance = (1 << desc.attr_bitdepth) - 1;
const int64_t minReflectance = 0;
for (int n = 0; n < voxelCount; n++) {
const int reflectance =
PCCClip((int)round(attributes[n]), minReflectance, maxReflectance);
pointCloud.setReflectance(packedVoxel[n].index, uint16_t(reflectance));
int64_t val = attributes[attribCount * n].round();
const uint16_t reflectance =
(uint16_t)PCCClip(val, minReflectance, maxReflectance);
pointCloud.setReflectance(packedVoxel[n].index, reflectance);
}
// De-allocate arrays.
......@@ -425,7 +408,6 @@ AttributeDecoder::decodeReflectancesRaht(
delete[] mortonCode;
delete[] attributes;
delete[] integerizedAttributes;
delete[] sortedIntegerizedAttributes;
delete[] weight;
}
......@@ -439,8 +421,11 @@ AttributeDecoder::decodeColorsRaht(
PCCPointSet3& pointCloud)
{
const int voxelCount = int(pointCloud.getPointCount());
uint64_t* weight = new uint64_t[voxelCount];
int* binaryLayer = new int[voxelCount];
std::vector<MortonCodeWithIndex> packedVoxel(voxelCount);
for (int n = 0; n < voxelCount; n++) {
weight[n] = 1;
const auto position = pointCloud[n];
int x = int(position[0]);
int y = int(position[1]);
......@@ -456,74 +441,35 @@ AttributeDecoder::decodeColorsRaht(
}
sort(packedVoxel.begin(), packedVoxel.end());
// Moroton codes
// Morton codes
long long* mortonCode = new long long[voxelCount];
for (int n = 0; n < voxelCount; n++) {
mortonCode[n] = packedVoxel[n].mortonCode;
}
// Re-obtain weights at the decoder by calling RAHT without any attributes.
float* weight = new float[voxelCount];
int* binaryLayer = new int[voxelCount];
regionAdaptiveHierarchicalTransform(
mortonCode, nullptr, weight, binaryLayer, 0, voxelCount, aps.raht_depth);
// Sort integerized attributes by weight
std::vector<WeightWithIndex> sortedWeight(voxelCount);
for (int n = 0; n < voxelCount; n++) {
sortedWeight[n].weight = weight[n];
sortedWeight[n].index = n;
}
sort(sortedWeight.begin(), sortedWeight.end());
// Entropy decode
const int attribCount = 3;
uint32_t values[3];
int* sortedIntegerizedAttributes = new int[attribCount * voxelCount];
for (int n = 0; n < voxelCount; ++n) {
if (
binaryLayer[sortedWeight[n].index] >= aps.raht_binary_level_threshold) {
decoder.decode(values);
sortedIntegerizedAttributes[n] = UIntToInt(values[0]);
for (int d = 1; d < 3; ++d) {
sortedIntegerizedAttributes[voxelCount * d + n] = UIntToInt(values[d]);
}
} else {
values[0] = decoder.decode();
sortedIntegerizedAttributes[n] = UIntToInt(values[0]);
for (int d = 1; d < 3; d++) {
sortedIntegerizedAttributes[voxelCount * d + n] = 0;
}
}
}
// Unsort integerized attributes by weight.
int* integerizedAttributes = new int[attribCount * voxelCount];
for (int n = 0; n < voxelCount; n++) {
for (int k = 0; k < attribCount; k++) {
// Pull sorted integerized attributes out of column-major order.
integerizedAttributes[attribCount * sortedWeight[n].index + k] =
sortedIntegerizedAttributes[voxelCount * k + n];
}
}
// Inverse Quantize.
float* attributes = new float[attribCount * voxelCount];
for (int n = 0; n < voxelCount; n++) {
for (int k = 0; k < attribCount; k++) {
attributes[attribCount * n + k] =
integerizedAttributes[attribCount * n + k] * aps.quant_step_size_luma;
for (int n = 0; n < voxelCount; ++n) {
decoder.decode(values);
for (int d = 0; d < attribCount; ++d) {
integerizedAttributes[voxelCount * d + n] = UIntToInt(values[d]);
}
}
FixedPoint* attributes = new FixedPoint[attribCount * voxelCount];
regionAdaptiveHierarchicalInverseTransform(
mortonCode, attributes, attribCount, voxelCount, aps.raht_depth);
FixedPoint(aps.quant_step_size_luma), mortonCode, attributes, weight,
attribCount, voxelCount, integerizedAttributes);
const int clipMax = (1 << desc.attr_bitdepth) - 1;
for (int n = 0; n < voxelCount; n++) {
const int r = (int)round(attributes[attribCount * n]);
const int g = (int)round(attributes[attribCount * n + 1]);
const int b = (int)round(attributes[attribCount * n + 2]);
const int r = attributes[attribCount * n].round();
const int g = attributes[attribCount * n + 1].round();
const int b = attributes[attribCount * n + 2].round();
PCCColor3B color;
color[0] = uint8_t(PCCClip(r, 0, clipMax));
color[1] = uint8_t(PCCClip(g, 0, clipMax));
......@@ -536,7 +482,6 @@ AttributeDecoder::decodeColorsRaht(
delete[] mortonCode;
delete[] attributes;
delete[] integerizedAttributes;
delete[] sortedIntegerizedAttributes;
delete[] weight;
}
......
......@@ -40,6 +40,7 @@
#include "constants.h"
#include "entropy.h"
#include "RAHT.h"
#include "FixedPoint.h"
// todo(df): promote to per-attribute encoder parameter
static const float kAttrPredLambdaR = 0.01;
......@@ -628,13 +629,12 @@ AttributeEncoder::encodeReflectancesTransformRaht(
PCCResidualsEncoder& encoder)
{
const int voxelCount = int(pointCloud.getPointCount());
// Pack voxel into int64, sort in Morton order, and unpack.
std::vector<MortonCodeWithIndex> packedVoxel(voxelCount);
for (int n = 0; n < voxelCount; n++) {
const auto position = pointCloud[n];
const int x = int(position[0]);
const int y = int(position[1]);
const int z = int(position[2]);
int x = int(position[0]);
int y = int(position[1]);
int z = int(position[2]);
long long mortonCode = 0;
for (int b = 0; b < aps.raht_depth; b++) {
mortonCode |= (long long)((x >> b) & 1) << (3 * b + 2);
......@@ -648,77 +648,52 @@ AttributeEncoder::encodeReflectancesTransformRaht(
// Allocate arrays.
long long* mortonCode = new long long[voxelCount];
float* attributes = new float[voxelCount];
int* integerizedAttributes = new int[voxelCount];
int* sortedIntegerizedAttributes = new int[voxelCount];
float* weight = new float[voxelCount];
const int attribCount = 1;
FixedPoint* attributes = new FixedPoint[attribCount * voxelCount];
int* integerizedAttributes = new int[attribCount * voxelCount];
uint64_t* weight = new uint64_t[voxelCount];
int* binaryLayer = new int[voxelCount];
// Populate input arrays.
for (int n = 0; n < voxelCount; n++) {
weight[n] = 1;
mortonCode[n] = packedVoxel[n].mortonCode;
attributes[n] = pointCloud.getReflectance(packedVoxel[n].index);
const auto reflectance = pointCloud.getReflectance(packedVoxel[n].index);
attributes[attribCount * n] = reflectance;
}
// Transform.
regionAdaptiveHierarchicalTransform(
mortonCode, attributes, weight, binaryLayer, 1, voxelCount,
aps.raht_depth);
// Quantize.
for (int n = 0; n < voxelCount; n++) {
integerizedAttributes[n] =
int(round(attributes[n] / aps.quant_step_size_luma));
}
FixedPoint(aps.quant_step_size_luma), mortonCode, attributes, weight,
binaryLayer, attribCount, voxelCount, integerizedAttributes);
// Sort integerized attributes by weight.
std::vector<WeightWithIndex> sortedWeight(voxelCount);
for (int n = 0; n < voxelCount; n++) {
sortedWeight[n].weight = weight[n];
sortedWeight[n].index = n;
}
sort(sortedWeight.begin(), sortedWeight.end());
for (int n = 0; n < voxelCount; n++) {
// Put sorted integerized attributes into column-major order.
sortedIntegerizedAttributes[n] =
integerizedAttributes[sortedWeight[n].index];
}
// Entropy encode.
uint32_t value;
for (int n = 0; n < voxelCount; ++n) {
const int64_t detail = IntToUInt(sortedIntegerizedAttributes[n]);
const int64_t detail = IntToUInt(integerizedAttributes[n]);
assert(detail < std::numeric_limits<uint32_t>::max());
const uint32_t attValue0 = uint32_t(detail);
encoder.encode(attValue0);
value = uint32_t(detail);
encoder.encode(value);
}
// Re-obtain weights at the decoder by calling Raht without any attributes.
regionAdaptiveHierarchicalTransform(
mortonCode, nullptr, weight, binaryLayer, 0, voxelCount, aps.raht_depth);
// Sort integerized attributes by weight.
for (int n = 0; n < voxelCount; n++) {
sortedWeight[n].weight = weight[n];
sortedWeight[n].index = n;
}
sort(sortedWeight.begin(), sortedWeight.end());
// Unsort integerized attributes by weight.
// local decode
std::fill_n(attributes, attribCount * voxelCount, FixedPoint(0));
for (int n = 0; n < voxelCount; n++) {
// Pull sorted integerized attributes out of column-major order.
integerizedAttributes[sortedWeight[n].index] =
sortedIntegerizedAttributes[n];
}
// Inverse Quantize.
for (int n = 0; n < voxelCount; n++) {
attributes[n] = integerizedAttributes[n] * aps.quant_step_size_luma;
mortonCode[n] = packedVoxel[n].mortonCode;
weight[n] = 1;
}
regionAdaptiveHierarchicalInverseTransform(
mortonCode, attributes, 1, voxelCount, aps.raht_depth);
FixedPoint(aps.quant_step_size_luma), mortonCode, attributes, weight,
attribCount, voxelCount, integerizedAttributes);
const int maxReflectance = (1 << desc.attr_bitdepth) - 1;
const int minReflectance = 0;
const int64_t maxReflectance = (1 << desc.attr_bitdepth) - 1;
const int64_t minReflectance = 0;
for (int n = 0; n < voxelCount; n++) {
const int reflectance =
PCCClip((int)round(attributes[n]), minReflectance, maxReflectance);
pointCloud.setReflectance(packedVoxel[n].index, uint16_t(reflectance));
int64_t val = attributes[attribCount * n].round();
const uint16_t reflectance =
(uint16_t)PCCClip(val, minReflectance, maxReflectance);
pointCloud.setReflectance(packedVoxel[n].index, reflectance);
}
// De-allocate arrays.
......@@ -726,7 +701,6 @@ AttributeEncoder::encodeReflectancesTransformRaht(
delete[] mortonCode;
delete[] attributes;
delete[] integerizedAttributes;
delete[] sortedIntegerizedAttributes;
delete[] weight;
}
......@@ -760,14 +734,14 @@ AttributeEncoder::encodeColorsTransformRaht(
// Allocate arrays.
long long* mortonCode = new long long[voxelCount];
const int attribCount = 3;
float* attributes = new float[attribCount * voxelCount];
FixedPoint* attributes = new FixedPoint[attribCount * voxelCount];
int* integerizedAttributes = new int[attribCount * voxelCount];
int* sortedIntegerizedAttributes = new int[attribCount * voxelCount];
float* weight = new float[voxelCount];
uint64_t* weight = new uint64_t[voxelCount];
int* binaryLayer = new int[voxelCount];
// Populate input arrays.
for (int n = 0; n < voxelCount; n++) {
weight[n] = 1;
mortonCode[n] = packedVoxel[n].mortonCode;
const auto color = pointCloud.getColor(packedVoxel[n].index);
attributes[attribCount * n] = color[0];
......@@ -777,89 +751,37 @@ AttributeEncoder::encodeColorsTransformRaht(
// Transform.
regionAdaptiveHierarchicalTransform(
mortonCode, attributes, weight, binaryLayer, attribCount, voxelCount,
aps.raht_depth);
// Quantize.
for (int n = 0; n < voxelCount; n++) {
for (int k = 0; k < attribCount; k++) {
integerizedAttributes[attribCount * n + k] =
int(round(attributes[attribCount * n + k] / aps.quant_step_size_luma));
}
}
// Sort integerized attributes by weight.
std::vector<WeightWithIndex> sortedWeight(voxelCount);
for (int n = 0; n < voxelCount; n++) {
sortedWeight[n].weight = weight[n];
sortedWeight[n].index = n;
}
sort(sortedWeight.begin(), sortedWeight.end());
for (int n = 0; n < voxelCount; n++) {
for (int k = 0; k < attribCount; k++) {
// Put sorted integerized attributes into column-major order.
sortedIntegerizedAttributes[voxelCount * k + n] =
integerizedAttributes[attribCount * sortedWeight[n].index + k];
}
}
FixedPoint(aps.quant_step_size_luma), mortonCode, attributes, weight,
binaryLayer, attribCount, voxelCount, integerizedAttributes);
// Entropy encode.
uint32_t values[3];
uint32_t values[attribCount];
for (int n = 0; n < voxelCount; ++n) {
const int64_t detail = IntToUInt(sortedIntegerizedAttributes[n]);
assert(detail < std::numeric_limits<uint32_t>::max());
values[0] = uint32_t(detail);
if (
binaryLayer[sortedWeight[n].index] >= aps.raht_binary_level_threshold) {
for (int d = 1; d < 3; ++d) {
const int64_t detail =
IntToUInt(sortedIntegerizedAttributes[voxelCount * d + n]);
assert(detail < std::numeric_limits<uint32_t>::max());
values[d] = uint32_t(detail);
}
encoder.encode(values[0], values[1], values[2]);
} else {
for (int d = 1; d < 3; d++) {
sortedIntegerizedAttributes[voxelCount * d + n] = 0;
}
encoder.encode(values[0]);
for (int d = 0; d < attribCount; ++d) {
const int64_t detail =
IntToUInt(integerizedAttributes[voxelCount * d + n]);
assert(detail < std::numeric_limits<uint32_t>::max());
values[d] = uint32_t(detail);
}
encoder.encode(values[0], values[1], values[2]);
}
// Re-obtain weights at the decoder by calling RAHT without any attributes.
regionAdaptiveHierarchicalTransform(
mortonCode, nullptr, weight, binaryLayer, 0, voxelCount, aps.raht_depth);
// Sort integerized attributes by weight.
// local decode
std::fill_n(attributes, attribCount * voxelCount, FixedPoint(0));
for (int n = 0; n < voxelCount; n++) {
sortedWeight[n].weight = weight[n];
sortedWeight[n].index = n;
}
sort(sortedWeight.begin(), sortedWeight.end());
// Unsort integerized attributes by weight.
for (int n = 0; n < voxelCount; n++) {
for (int k = 0; k < attribCount; k++) {
// Pull sorted integerized attributes out of column-major order.
integerizedAttributes[attribCount * sortedWeight[n].index + k] =
sortedIntegerizedAttributes[voxelCount * k + n];
}
}
// Inverse Quantize.
for (int n = 0; n < voxelCount; n++) {
for (int k = 0; k < attribCount; k++) {
attributes[attribCount * n + k] =
integerizedAttributes[attribCount * n + k] * aps.quant_step_size_luma;
}
weight[n] = 1;
mortonCode[n] = packedVoxel[n].mortonCode;
}
regionAdaptiveHierarchicalInverseTransform(
mortonCode, attributes, attribCount, voxelCount, aps.raht_depth);
FixedPoint(aps.quant_step_size_luma), mortonCode, attributes, weight,
attribCount, voxelCount, integerizedAttributes);
const int clipMax = (1 << desc.attr_bitdepth) - 1;
for (size_t n = 0; n < voxelCount; n++) {
const int r = (int)round(attributes[attribCount * n]);
const int g = (int)round(attributes[attribCount * n + 1]);
const int b = (int)round(attributes[attribCount * n + 2]);
for (int n = 0; n < voxelCount; n++) {
const int r = attributes[attribCount * n].round();
const int g = attributes[attribCount * n + 1].round();
const int b = attributes[attribCount * n + 2].round();
PCCColor3B color;
color[0] = uint8_t(PCCClip(r, 0, clipMax));
color[1] = uint8_t(PCCClip(g, 0, clipMax));
......@@ -872,7 +794,6 @@ AttributeEncoder::encodeColorsTransformRaht(
delete[] mortonCode;
delete[] attributes;
delete[] integerizedAttributes;
delete[] sortedIntegerizedAttributes;
delete[] weight;
}
......
......@@ -61,6 +61,7 @@ file(GLOB PROJECT_INC_FILES
"BitReader.h"
"BitWriter.h"
"DualLutCoder.h"
"FixedPoint.h"
"OctreeNeighMap.h"
"PCCKdTree.h"
"PCCMath.h"
......@@ -100,6 +101,7 @@ file(GLOB PROJECT_CPP_FILES
"AttributeDecoder.cpp"
"AttributeEncoder.cpp"
"DualLutCoder.cpp"
"FixedPoint.cpp"
"OctreeNeighMap.cpp"
"RAHT.cpp"
"TMC3.cpp"
......
/* The copyright in this software is being made available under the BSD
* Licence, included below. This software may be subject to other third
* party and contributor rights, including patent rights, and no such
* rights are granted under this licence.
*
* Copyright (c) 2019, ISO/IEC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of the ISO/IEC nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "FixedPoint.h"
namespace pcc {
//============================================================================
void
FixedPoint::operator/=(const FixedPoint& that)
{
if (this->val < 0) {
if (that.val < 0)
this->val =
-(((-that.val) >> 1) + ((-this->val) << kFracBits)) / that.val;
else
this->val =
-(((+that.val) >> 1) + ((-this->val) << kFracBits)) / that.val;
} else {
if (that.val < 0)
this->val =
+(((-that.val) >> 1) + ((+this->val) << kFracBits)) / that.val;
else
this->val =
+(((+that.val) >> 1) + ((+this->val) << kFracBits)) / that.val;
}
}
static const uint32_t kSqrtLut[256] = {
1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16};
uint32_t
sqrtFixedpoint(uint64_t x)
{
uint32_t a;
// Initial guess
if (x >= ((int64_t)0x1 << 32)) {
if (x >= ((int64_t)0x1 << 48)) {
if (x >= ((int64_t)0x1 << 56))
a = (kSqrtLut[x >> 56] << 28) - 1;
else
a = kSqrtLut[x >> 48] << 24;
} else {
if (x >= ((int64_t)0x1 << 40))
a = kSqrtLut[x >> 40] << 20;
else
a = kSqrtLut[x >> 32] << 16;
}