Commit dce92811 authored by Toshiyasu Sugio's avatar Toshiyasu Sugio Committed by David Flynn
Browse files

attr/m43665: configurable number of direct neighbour predictors

This commit provides an ability to vary the number of direct neighbours
used in the predicting transform.  Mode 0 corresponds to the existing
averaging, and mode n selects the n-th nearest neighbour for direct
prediction.

The provided code has been reworked to:
 - rewrite unary coding to be a little more obvious
 - tidy the mode decision path
 - add configuration for max_num_direct_predictors (renamed from
   MaxNumPredCand)
 - now counts the number of single predictors, rather than prediction
   modes.
parent cb261e2d
...@@ -49,14 +49,14 @@ namespace pcc { ...@@ -49,14 +49,14 @@ namespace pcc {
struct PCCResidualsDecoder { struct PCCResidualsDecoder {
EntropyDecoder arithmeticDecoder; EntropyDecoder arithmeticDecoder;
StaticBitModel binaryModel0; StaticBitModel binaryModel0;
AdaptiveBitModel binaryModelPred;
AdaptiveBitModel binaryModelDiff[7]; AdaptiveBitModel binaryModelDiff[7];
AdaptiveBitModel binaryModelIsZero[7]; AdaptiveBitModel binaryModelIsZero[7];
AdaptiveBitModel ctxPredMode[2];
DualLutCoder<false> symbolCoder[2]; DualLutCoder<false> symbolCoder[2];
void start(const char* buf, int buf_len); void start(const char* buf, int buf_len);
void stop(); void stop();
bool decodePred(); int decodePredMode(int max);
uint32_t decodeSymbol(int k1, int k2); uint32_t decodeSymbol(int k1, int k2);
void decode(uint32_t values[3]); void decode(uint32_t values[3]);
uint32_t decode(); uint32_t decode();
...@@ -81,10 +81,23 @@ PCCResidualsDecoder::stop() ...@@ -81,10 +81,23 @@ PCCResidualsDecoder::stop()
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
bool int
PCCResidualsDecoder::decodePred() PCCResidualsDecoder::decodePredMode(int maxMode)
{ {
return arithmeticDecoder.decode(binaryModelPred); int mode = 0;
if (maxMode == 0)
return mode;
int ctxIdx = 0;
while (arithmeticDecoder.decode(ctxPredMode[ctxIdx])) {
ctxIdx = 1;
mode++;
if (mode == maxMode)
break;
}
return mode;
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
...@@ -204,11 +217,8 @@ AttributeDecoder::computeReflectancePredictionWeights( ...@@ -204,11 +217,8 @@ AttributeDecoder::computeReflectancePredictionWeights(
} }
const int64_t maxDiff = maxValue - minValue; const int64_t maxDiff = maxValue - minValue;
if (maxDiff > aps.adaptive_prediction_threshold) { if (maxDiff > aps.adaptive_prediction_threshold) {
const bool predIndex = decoder.decodePred(); predictor.predMode =
if (predIndex == 0) { decoder.decodePredMode(aps.max_num_direct_predictors);
predictor.neighborCount = 1;
predictor.neighbors[0].weight = 1.0;
}
} }
} }
} }
...@@ -278,11 +288,8 @@ AttributeDecoder::computeColorPredictionWeights( ...@@ -278,11 +288,8 @@ AttributeDecoder::computeColorPredictionWeights(
maxValue[2] - minValue[2], maxValue[2] - minValue[2],
(std::max)(maxValue[0] - minValue[0], maxValue[1] - minValue[1])); (std::max)(maxValue[0] - minValue[0], maxValue[1] - minValue[1]));
if (maxDiff > aps.adaptive_prediction_threshold) { if (maxDiff > aps.adaptive_prediction_threshold) {
const bool predIndex = decoder.decodePred(); predictor.predMode =
if (predIndex == 0) { decoder.decodePredMode(aps.max_num_direct_predictors);
predictor.neighborCount = 1;
predictor.neighbors[0].weight = 1.0;
}
} }
} }
} }
......
...@@ -41,6 +41,10 @@ ...@@ -41,6 +41,10 @@
#include "entropy.h" #include "entropy.h"
#include "RAHT.h" #include "RAHT.h"
// todo(df): promote to per-attribute encoder parameter
static const float kAttrPredLambdaR = 0.01;
static const float kAttrPredLambdaC = 0.01;
namespace pcc { namespace pcc {
//============================================================================ //============================================================================
// An encapsulation of the entropy coding methods used in attribute coding // An encapsulation of the entropy coding methods used in attribute coding
...@@ -48,14 +52,14 @@ namespace pcc { ...@@ -48,14 +52,14 @@ namespace pcc {
struct PCCResidualsEncoder { struct PCCResidualsEncoder {
EntropyEncoder arithmeticEncoder; EntropyEncoder arithmeticEncoder;
StaticBitModel binaryModel0; StaticBitModel binaryModel0;
AdaptiveBitModel binaryModelPred;
AdaptiveBitModel binaryModelDiff[7]; AdaptiveBitModel binaryModelDiff[7];
AdaptiveBitModel binaryModelIsZero[7]; AdaptiveBitModel binaryModelIsZero[7];
AdaptiveBitModel ctxPredMode[2];
DualLutCoder<false> symbolCoder[2]; DualLutCoder<false> symbolCoder[2];
void start(int numPoints); void start(int numPoints);
int stop(); int stop();
void encodePred(const bool value); void encodePredMode(int value, int max);
void encodeSymbol(uint32_t value, int k1, int k2); void encodeSymbol(uint32_t value, int k1, int k2);
void encode(uint32_t value0, uint32_t value1, uint32_t value2); void encode(uint32_t value0, uint32_t value1, uint32_t value2);
void encode(uint32_t value); void encode(uint32_t value);
...@@ -83,9 +87,21 @@ PCCResidualsEncoder::stop() ...@@ -83,9 +87,21 @@ PCCResidualsEncoder::stop()
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
void void
PCCResidualsEncoder::encodePred(const bool value) PCCResidualsEncoder::encodePredMode(int mode, int maxMode)
{ {
arithmeticEncoder.encode(value, binaryModelPred); // max = 0 => no direct predictors are used
if (maxMode == 0)
return;
int ctxIdx = 0;
for (int i = 0; i < mode; i++) {
arithmeticEncoder.encode(1, ctxPredMode[ctxIdx]);
ctxIdx = 1;
}
// Truncated unary
if (mode != maxMode)
arithmeticEncoder.encode(0, ctxPredMode[ctxIdx]);
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
...@@ -360,26 +376,37 @@ AttributeEncoder::computeReflectancePredictionWeights( ...@@ -360,26 +376,37 @@ AttributeEncoder::computeReflectancePredictionWeights(
const int64_t maxDiff = maxValue - minValue; const int64_t maxDiff = maxValue - minValue;
if (maxDiff > aps.adaptive_prediction_threshold) { if (maxDiff > aps.adaptive_prediction_threshold) {
const int qs = aps.quant_step_size_luma; const int qs = aps.quant_step_size_luma;
const uint16_t reflectance = pointCloud.getReflectance(predictor.index); uint16_t attrValue = pointCloud.getReflectance(predictor.index);
const uint16_t reflectance0 =
pointCloud.getReflectance(predictor.neighbors[0].index); // base case: weighted average of n neighbours
const uint16_t predictedReflectance = predictor.predMode = 0;
predictor.predictReflectance(pointCloud); uint16_t attrPred = predictor.predictReflectance(pointCloud);
const int64_t residuals1 = int64_t attrResidualQuant =
computeReflectanceResidual(reflectance, predictedReflectance, qs); computeReflectanceResidual(attrValue, attrPred, qs);
const int64_t residuals0 =
computeReflectanceResidual(reflectance, reflectance0, qs); double best_score = attrResidualQuant + kAttrPredLambdaR * (double)qs;
const double bits1 = std::round(1000.0 * context.bits(residuals1));
const double bits0 = std::round(1000.0 * context.bits(residuals0)); for (int i = 0; i < predictor.neighborCount; i++) {
if ((bits1 - bits0) <= 1.0) { if (i == aps.max_num_direct_predictors)
context.update(residuals1); break;
encoder.encodePred(1);
} else { attrPred = pointCloud.getReflectance(predictor.neighbors[i].index);
context.update(residuals0); attrResidualQuant =
encoder.encodePred(0); computeReflectanceResidual(attrValue, attrPred, qs);
predictor.neighbors[0].weight = 1.0;
predictor.neighborCount = 1; double idxBits = i + (i == aps.max_num_direct_predictors - 1 ? 1 : 2);
double score = attrResidualQuant + idxBits * kAttrPredLambdaR * qs;
if (score < best_score) {
best_score = score;
predictor.predMode = i + 1;
// NB: setting predictor.neighborCount = 1 will cause issues
// with reconstruction.
}
} }
encoder.encodePredMode(
predictor.predMode, aps.max_num_direct_predictors);
} }
} }
} }
...@@ -487,27 +514,39 @@ AttributeEncoder::computeColorPredictionWeights( ...@@ -487,27 +514,39 @@ AttributeEncoder::computeColorPredictionWeights(
if (maxDiff > aps.adaptive_prediction_threshold) { if (maxDiff > aps.adaptive_prediction_threshold) {
const int qs = aps.quant_step_size_luma; const int qs = aps.quant_step_size_luma;
const int qs2 = aps.quant_step_size_chroma; const int qs2 = aps.quant_step_size_chroma;
const PCCColor3B color = pointCloud.getColor(predictor.index); PCCColor3B attrValue = pointCloud.getColor(predictor.index);
const PCCColor3B color0 =
pointCloud.getColor(predictor.neighbors[0].index); // base case: weighted average of n neighbours
const PCCColor3B predictedColor = predictor.predictColor(pointCloud); predictor.predMode = 0;
const PCCVector3<int64_t> residuals1 = PCCColor3B attrPred = predictor.predictColor(pointCloud);
computeColorResiduals(color, predictedColor, qs, qs2); PCCVector3<int64_t> attrResidualQuant =
const PCCVector3<int64_t> residuals0 = computeColorResiduals(attrValue, attrPred, qs, qs2);
computeColorResiduals(color, color0, qs, qs2);
const double bits1 = std::round( double best_score = attrResidualQuant[0] + attrResidualQuant[1]
1000.0 * context.bits(residuals1[0], residuals1[1], residuals1[2])); + attrResidualQuant[2] + kAttrPredLambdaC * (double)qs;
const double bits0 = std::round(
1000.0 * context.bits(residuals0[0], residuals0[1], residuals0[2])); for (int i = 0; i < predictor.neighborCount; i++) {
if ((bits1 - bits0) <= 1.0) { if (i == aps.max_num_direct_predictors)
context.update(residuals1[0], residuals1[1], residuals1[2]); break;
encoder.encodePred(1);
} else { attrPred = pointCloud.getColor(predictor.neighbors[i].index);
context.update(residuals0[0], residuals0[1], residuals0[2]); attrResidualQuant =
encoder.encodePred(0); computeColorResiduals(attrValue, attrPred, qs, qs2);
predictor.neighbors[0].weight = 1.0;
predictor.neighborCount = 1; double idxBits = i + (i == aps.max_num_direct_predictors - 1 ? 1 : 2);
double score = attrResidualQuant[0] + attrResidualQuant[1]
+ attrResidualQuant[2] + idxBits * kAttrPredLambdaC * qs;
if (score < best_score) {
best_score = score;
predictor.predMode = i + 1;
// NB: setting predictor.neighborCount = 1 will cause issues
// with reconstruction.
}
} }
encoder.encodePredMode(
predictor.predMode, aps.max_num_direct_predictors);
} }
} }
} }
......
...@@ -79,27 +79,45 @@ struct PCCPredictor { ...@@ -79,27 +79,45 @@ struct PCCPredictor {
size_t neighborCount; size_t neighborCount;
uint32_t index; uint32_t index;
PCCNeighborInfo neighbors[kAttributePredictionMaxNeighbourCount]; PCCNeighborInfo neighbors[kAttributePredictionMaxNeighbourCount];
int8_t predMode;
PCCColor3B predictColor(const PCCPointSet3& pointCloud) const PCCColor3B predictColor(const PCCPointSet3& pointCloud) const
{ {
PCCVector3D predicted(0.0); PCCVector3D predicted(0.0);
for (size_t i = 0; i < neighborCount; ++i) { if (predMode > neighborCount) {
const PCCColor3B color = pointCloud.getColor(neighbors[i].index); /* nop */
const double w = neighbors[i].weight; } else if (predMode > 0) {
const PCCColor3B color =
pointCloud.getColor(neighbors[predMode - 1].index);
for (size_t k = 0; k < 3; ++k) { for (size_t k = 0; k < 3; ++k) {
predicted[k] += w * color[k]; predicted[k] += color[k];
}
} else {
for (size_t i = 0; i < neighborCount; ++i) {
const PCCColor3B color = pointCloud.getColor(neighbors[i].index);
const double w = neighbors[i].weight;
for (size_t k = 0; k < 3; ++k) {
predicted[k] += w * color[k];
}
} }
} }
return PCCColor3B( return PCCColor3B(
uint8_t(std::round(predicted[0])), uint8_t(std::round(predicted[1])), uint8_t(std::round(predicted[0])), uint8_t(std::round(predicted[1])),
uint8_t(std::round(predicted[2]))); uint8_t(std::round(predicted[2])));
} }
uint16_t predictReflectance(const PCCPointSet3& pointCloud) const uint16_t predictReflectance(const PCCPointSet3& pointCloud) const
{ {
double predicted(0.0); double predicted(0.0);
for (size_t i = 0; i < neighborCount; ++i) { if (predMode > neighborCount) {
predicted += /* nop */
neighbors[i].weight * pointCloud.getReflectance(neighbors[i].index); } else if (predMode > 0) {
predicted = pointCloud.getReflectance(neighbors[predMode - 1].index);
} else {
for (size_t i = 0; i < neighborCount; ++i) {
predicted +=
neighbors[i].weight * pointCloud.getReflectance(neighbors[i].index);
}
} }
return uint16_t(std::round(predicted)); return uint16_t(std::round(predicted));
} }
...@@ -130,6 +148,7 @@ struct PCCPredictor { ...@@ -130,6 +148,7 @@ struct PCCPredictor {
neighbors[0].index = reference; neighbors[0].index = reference;
neighbors[0].predictorIndex = predictorIndex; neighbors[0].predictorIndex = predictorIndex;
neighbors[0].weight = 1.0; neighbors[0].weight = 1.0;
predMode = 0;
} }
}; };
inline int64_t inline int64_t
......
...@@ -388,6 +388,11 @@ ParseParameters(int argc, char* argv[], Parameters& params) ...@@ -388,6 +388,11 @@ ParseParameters(int argc, char* argv[], Parameters& params)
"single|multi predictors. Applies to transformType=2 only.\n" "single|multi predictors. Applies to transformType=2 only.\n"
" -1: auto = 2**(bitdepth-2)") " -1: auto = 2**(bitdepth-2)")
("max_num_direct_predictors",
params_attr.aps.max_num_direct_predictors, 3,
"Maximum number of nearest neighbour candidates used in direct"
"attribute prediction")
("levelOfDetailCount", ("levelOfDetailCount",
params_attr.aps.numDetailLevels, 1, params_attr.aps.numDetailLevels, 1,
"Attribute's number of levels of detail") "Attribute's number of levels of detail")
......
...@@ -246,6 +246,7 @@ struct AttributeParameterSet { ...@@ -246,6 +246,7 @@ struct AttributeParameterSet {
//--- lifting/predicting transform parameters //--- lifting/predicting transform parameters
int num_pred_nearest_neighbours; int num_pred_nearest_neighbours;
int max_num_direct_predictors;
int adaptive_prediction_threshold; int adaptive_prediction_threshold;
// NB: derived from num_detail_levels_minus1 // NB: derived from num_detail_levels_minus1
......
...@@ -261,6 +261,7 @@ write(const AttributeParameterSet& aps) ...@@ -261,6 +261,7 @@ write(const AttributeParameterSet& aps)
|| aps.attr_encoding == AttributeEncoding::kPredictingTransform; || aps.attr_encoding == AttributeEncoding::kPredictingTransform;
if (isLifting) { if (isLifting) {
bs.writeUe(aps.num_pred_nearest_neighbours); bs.writeUe(aps.num_pred_nearest_neighbours);
bs.writeUe(aps.max_num_direct_predictors);
bs.writeUe(aps.quant_step_size_luma); bs.writeUe(aps.quant_step_size_luma);
bs.writeUe(aps.quant_step_size_chroma); bs.writeUe(aps.quant_step_size_chroma);
...@@ -307,6 +308,7 @@ parseAps(const PayloadBuffer& buf) ...@@ -307,6 +308,7 @@ parseAps(const PayloadBuffer& buf)
|| aps.attr_encoding == AttributeEncoding::kPredictingTransform; || aps.attr_encoding == AttributeEncoding::kPredictingTransform;
if (isLifting) { if (isLifting) {
bs.readUe(&aps.num_pred_nearest_neighbours); bs.readUe(&aps.num_pred_nearest_neighbours);
bs.readUe(&aps.max_num_direct_predictors);
bs.readUe(&aps.quant_step_size_luma); bs.readUe(&aps.quant_step_size_luma);
bs.readUe(&aps.quant_step_size_chroma); bs.readUe(&aps.quant_step_size_chroma);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment