TMC3.cpp 17.3 KB
Newer Older
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
1
/* The copyright in this software is being made available under the BSD
David Flynn's avatar
David Flynn committed
2
3
4
 * Licence, included below.  This software may be subject to other third
 * party and contributor rights, including patent rights, and no such
 * rights are granted under this licence.
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
5
 *
David Flynn's avatar
David Flynn committed
6
 * Copyright (c) 2017-2018, ISO/IEC
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
7
8
9
10
11
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
David Flynn's avatar
David Flynn committed
12
13
14
15
16
17
18
19
20
21
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * * Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 *
 * * Neither the name of the ISO/IEC nor the names of its contributors
 *   may be used to endorse or promote products derived from this
 *   software without specific prior written permission.
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
22
23
24
25
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
David Flynn's avatar
David Flynn committed
26
27
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
28
29
30
31
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
David Flynn's avatar
David Flynn committed
32
33
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
34
35
36
 */

#include "TMC3.h"
37
#include "program_options_lite.h"
38
#include "io_tlv.h"
39
#include "version.h"
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
40
41
42
43

using namespace std;
using namespace pcc;

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
//============================================================================

struct Parameters {
  bool isDecoder;

  std::string uncompressedDataPath;
  std::string compressedStreamPath;
  std::string reconstructedDataPath;

  pcc::EncoderParams encoder;
  pcc::DecoderParams decoder;

  // todo(df): this should be per-attribute
  ColorTransform colorTransform;
};

//============================================================================

62
63
64
int
main(int argc, char* argv[])
{
65
  cout << "MPEG PCC tmc3 version " << ::pcc::version << endl;
66

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
67
68
69
70
  Parameters params;
  if (!ParseParameters(argc, argv, params)) {
    return -1;
  }
71
72
73
74
75
76
77

  // Timers to count elapsed wall/user time
  pcc::chrono::Stopwatch<std::chrono::steady_clock> clock_wall;
  pcc::chrono::Stopwatch<pcc::chrono::utime_inc_children_clock> clock_user;

  clock_wall.start();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
78
  int ret = 0;
79
  if (params.isDecoder) {
80
    ret = Decompress(params, clock_user);
81
82
  } else {
    ret = Compress(params, clock_user);
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
83
84
  }

85
86
87
88
89
90
91
92
  clock_wall.stop();

  using namespace std::chrono;
  auto total_wall = duration_cast<milliseconds>(clock_wall.count()).count();
  auto total_user = duration_cast<milliseconds>(clock_user.count()).count();
  std::cout << "Processing time (wall): " << total_wall / 1000.0 << " s\n";
  std::cout << "Processing time (user): " << total_user / 1000.0 << " s\n";

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
93
94
95
  return ret;
}

96
97
98
//---------------------------------------------------------------------------
// :: Command line / config parsing helpers

99
100
101
102
template<typename T>
static std::istream&
readUInt(std::istream& in, T& val)
{
103
104
105
106
107
108
  unsigned int tmp;
  in >> tmp;
  val = T(tmp);
  return in;
}

109
110
111
static std::istream&
operator>>(std::istream& in, ColorTransform& val)
{
112
113
114
  return readUInt(in, val);
}

115
namespace pcc {
116
static std::istream&
117
operator>>(std::istream& in, AttributeEncoding& val)
118
{
119
  return readUInt(in, val);
120
121
}
}  // namespace pcc
122

123
namespace pcc {
124
125
126
static std::istream&
operator>>(std::istream& in, GeometryCodecType& val)
{
127
  return readUInt(in, val);
128
129
}
}  // namespace pcc
130

131
namespace pcc {
132
static std::ostream&
133
operator<<(std::ostream& out, const AttributeEncoding& val)
134
{
135
  switch (val) {
136
137
138
  case AttributeEncoding::kPredictingTransform: out << "0 (Pred)"; break;
  case AttributeEncoding::kRAHTransform: out << "1 (RAHT)"; break;
  case AttributeEncoding::kLiftingTransform: out << "2 (Lift)"; break;
139
140
  }
  return out;
141
142
}
}  // namespace pcc
143

144
namespace pcc {
145
146
147
static std::ostream&
operator<<(std::ostream& out, const GeometryCodecType& val)
{
148
  switch (val) {
149
  case GeometryCodecType::kOctree: out << "1 (TMC1 Octree)"; break;
150
151
152
  case GeometryCodecType::kTriSoup: out << "2 (TMC3 TriSoup)"; break;
  }
  return out;
153
154
}
}  // namespace pcc
155

156
157
158
//---------------------------------------------------------------------------
// :: Command line / config parsing

159
160
161
bool
ParseParameters(int argc, char* argv[], Parameters& params)
{
162
163
  namespace po = df::program_options_lite;

164
165
166
167
168
  struct {
    AttributeDescription desc;
    AttributeParameterSet aps;
  } params_attr;

169
170
171
172
173
174
175
176
177
178
179
180
181
  bool print_help = false;

  // a helper to set the attribute
  std::function<po::OptionFunc::Func> attribute_setter =
    [&](po::Options&, const std::string& name, po::ErrorReporter) {
      // copy the current state of parsed attribute parameters
      //
      // NB: this does not cause the default values of attr to be restored
      // for the next attribute block.  A side-effect of this is that the
      // following is allowed leading to attribute foo having both X=1 and
      // Y=2:
      //   "--attr.X=1 --attribute foo --attr.Y=2 --attribute foo"
      //
182
183
184
185
186
187
188
189
190
191
192
193
194
195

      // NB: insert returns any existing element
      const auto& it = params.encoder.attributeIdxMap.insert(
        {name, int(params.encoder.attributeIdxMap.size())});

      if (it.second) {
        params.encoder.sps.attributeSets.push_back(params_attr.desc);
        params.encoder.aps.push_back(params_attr.aps);
        return;
      }

      // update existing entry
      params.encoder.sps.attributeSets[it.first->second] = params_attr.desc;
      params.encoder.aps[it.first->second] = params_attr.aps;
196
197
    };

198
  /* clang-format off */
199
200
201
202
203
204
205
206
207
208
209
  // The definition of the program/config options, along with default values.
  //
  // NB: when updating the following tables:
  //      (a) please keep to 80-columns for easier reading at a glance,
  //      (b) do not vertically align values -- it breaks quickly
  //
  po::Options opts;
  opts.addOptions()
  ("help", print_help, false, "this help text")
  ("config,c", po::parseConfigFile, "configuration file name")

210
211
  (po::Section("General"))

212
  ("mode", params.isDecoder, false,
213
214
    "The encoding/decoding mode:\n"
    "  0: encode\n"
215
    "  1: decode")
216
217
218

  // i/o parameters
  ("reconstructedDataPath",
219
220
    params.reconstructedDataPath, {},
    "The ouput reconstructed pointcloud file path (decoder only)")
221
222

  ("uncompressedDataPath",
223
224
    params.uncompressedDataPath, {},
    "The input pointcloud file path")
225
226

  ("compressedStreamPath",
227
228
    params.compressedStreamPath, {},
    "The compressed bitstream path (encoder=output, decoder=input)")
229

230
  ("postRecolorPath",
231
    params.encoder.postRecolorPath, {},
232
    "Recolored pointcloud file path (encoder only)")
233
234

  ("preInvScalePath",
235
    params.decoder.preInvScalePath, {},
236
    "Pre inverse scaled pointcloud file path (decoder only)")
237

238
  // general
239
  // todo(df): this should be per-attribute
240
  ("colorTransform",
241
242
243
244
    params.colorTransform, COLOR_TRANSFORM_RGB_TO_YCBCR,
    "The colour transform to be applied:\n"
    "  0: none\n"
    "  1: RGB to YCbCr (Rec.709)")
245

246
247
248
  (po::Section("Decoder"))

  ("roundOutputPositions",
249
    params.decoder.roundOutputPositions, false,
250
251
252
253
    "todo(kmammou)")

  (po::Section("Encoder"))

254
  ("positionQuantizationScale",
255
    params.encoder.sps.seq_source_geom_scale_factor, 1.f,
256
    "Scale factor to be applied to point positions during quantization process")
257
258

  ("mergeDuplicatedPoints",
259
    params.encoder.gps.geom_unique_points_flag, true,
260
    "Enables removal of duplicated points")
261

262
  (po::Section("Geometry"))
263

264
  // tools
265
  ("geometryCodec",
266
    params.encoder.gps.geom_codec_type, GeometryCodecType::kOctree,
267
    "Controls the method used to encode geometry:\n"
268
269
    "  1: octree (TMC3)\n"
    "  2: trisoup (TMC1)")
270

271
  ("neighbourContextRestriction",
272
    params.encoder.gps.neighbour_context_restriction_flag, false,
273
    "Limit geometry octree occupancy contextualisation to sibling nodes")
274

275
  ("neighbourAvailBoundaryLog2",
276
    params.encoder.gps.neighbour_avail_boundary_log2, 0,
277
278
279
    "Defines the avaliability volume for neighbour occupancy lookups."
    " 0: unconstrained")

280
  ("inferredDirectCodingMode",
281
    params.encoder.gps.inferred_direct_coding_mode_enabled_flag, true,
282
    "Permits early termination of the geometry octree for isolated points")
283

284
285
  // (trisoup) geometry parameters
  ("triSoupDepth",  // log2(maxBB+1), where maxBB+1 is analogous to image width
286
    params.encoder.gps.trisoup_depth, 10,
287
    "Depth of voxels (reconstructed points) in trisoup geometry")
288
289

  ("triSoupLevel",
290
    params.encoder.gps.trisoup_triangle_level, 7,
291
    "Level of triangles (reconstructed surface) in trisoup geometry")
292
293

  ("triSoupIntToOrigScale",  // reciprocal of positionQuantizationScale
294
295
    params.encoder.sps.donotuse_trisoup_int_to_orig_scale, 1.f,
    "orig_coords = integer_coords * intToOrigScale")
296

297
298
  (po::Section("Attributes"))

299
300
301
  // attribute processing
  //   NB: Attribute options are special in the way they are applied (see above)
  ("attribute",
302
303
304
    attribute_setter,
    "Encode the given attribute (NB, must appear after the"
    "following attribute parameters)")
305

306
  ("transformType",
307
    params_attr.aps.attr_encoding, AttributeEncoding::kPredictingTransform,
308
    "Coding method to use for attribute:\n"
309
    "  0: Hierarchical neighbourhood prediction\n"
310
    "  1: Region Adaptive Hierarchical Transform (RAHT)\n"
311
    "  2: Hierarichical neighbourhood prediction as lifting transform")
312

313
  ("rahtLeafDecimationDepth",
314
    params_attr.aps.raht_binary_level_threshold, 3,
315
316
    "Sets coefficients to zero in the bottom n levels of RAHT tree. "
    "Used for chroma-subsampling in attribute=color only.")
317

318
  ("rahtQuantizationStep",
319
320
    params_attr.aps.quant_step_size_luma, {},
    "deprecated -- use quantizationStepsLuma")
321
322

  ("rahtDepth",
323
    params_attr.aps.raht_depth, 21,
324
325
    "Number of bits for morton representation of an RAHT co-ordinate"
    "component")
326

327
  ("numberOfNearestNeighborsInPrediction",
328
    params_attr.aps.num_pred_nearest_neighbours, 4,
329
    "Attribute's maximum number of nearest neighbors to be used for prediction")
330
331

  ("levelOfDetailCount",
332
    params_attr.aps.numDetailLevels, 1,
333
    "Attribute's number of levels of detail")
334
335

  ("quantizationSteps",
336
337
    params_attr.aps.quant_step_size_luma, {},
    "deprecated -- use quantizationStepsLuma")
338
339

  ("quantizationStepsLuma",
340
    params_attr.aps.quant_step_size_luma, {},
341
342
343
    "Attribute's luma quantization step sizes (one for each LoD)")

  ("quantizationStepsChroma",
344
    params_attr.aps.quant_step_size_chroma, {},
345
    "Attribute's chroma quantization step sizes (one for each LoD)")
346

347
348
  ("dist2",
    params_attr.aps.dist2, {},
349
    "Attribute's list of squared distances (one for each LoD)")
350
  ;
351
  /* clang-format on */
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

  po::setDefaults(opts);
  po::ErrorReporter err;
  const list<const char*>& argv_unhandled =
    po::scanArgv(opts, argc, (const char**)argv, err);

  for (const auto arg : argv_unhandled) {
    err.warn() << "Unhandled argument ignored: " << arg << "\n";
  }

  if (argc == 1 || print_help) {
    po::doHelp(std::cout, opts, 78);
    return false;
  }

367
368
369
370
  if (int(params.encoder.gps.geom_codec_type) == 0) {
    err.error() << "Bypassed geometry coding is no longer supported\n";
  }

371
  // For trisoup, ensure that positionQuantizationScale is the exact inverse of intToOrigScale.
372
373
374
  if (params.encoder.gps.geom_codec_type == GeometryCodecType::kTriSoup) {
    params.encoder.sps.seq_source_geom_scale_factor =
      1.0f / params.encoder.sps.donotuse_trisoup_int_to_orig_scale;
375
376
  }

377
  // For RAHT, ensure that the unused lod count = 0 (prevents mishaps)
378
379
380
381
382
  for (const auto& it : params.encoder.attributeIdxMap) {
    auto& attr_aps = params.encoder.aps[it.second];

    if (attr_aps.attr_encoding == AttributeEncoding::kRAHTransform) {
      attr_aps.numDetailLevels = 0;
383
384
385
    }
  }

386
  // sanity checks
387
  //  - validate that quantizationStepsLuma/Chroma, dist2
388
  //    of each attribute contain levelOfDetailCount elements.
389
390
391
392
393
394
395
396
397
398
  for (const auto& it : params.encoder.attributeIdxMap) {
    const auto& attr_sps = params.encoder.sps.attributeSets[it.second];
    const auto& attr_aps = params.encoder.aps[it.second];

    bool isLifting =
      attr_aps.attr_encoding == AttributeEncoding::kPredictingTransform
      || attr_aps.attr_encoding == AttributeEncoding::kLiftingTransform;

    if (isLifting) {
      int lod = attr_aps.numDetailLevels;
399

400
      if (lod > 255) {
401
        err.error() << it.first
402
                    << ".levelOfDetailCount must be less than 256\n";
403
      }
404
      // todo(df): the following two checks are removed in m42640/2
405
406
      if (attr_aps.dist2.size() != lod) {
        err.error() << it.first << ".dist2 does not have " << lod
407
                    << " entries\n";
408
      }
409
410
      if (attr_aps.quant_step_size_luma.size() != lod) {
        err.error() << it.first << ".quantizationStepsLuma does not have "
411
412
                    << lod << " entries\n";
      }
413
414
415
416
      if (it.first == "color") {
        if (attr_aps.quant_step_size_chroma.size() != lod) {
          err.error() << it.first << ".quantizationStepsChroma does not have "
                      << lod << " entries\n";
417
        }
418
      }
419

420
      if (
421
        attr_aps.num_pred_nearest_neighbours
422
423
        > PCCTMC3MaxPredictionNearestNeighborCount) {
        err.error()
424
          << it.first
425
426
427
          << ".numberOfNearestNeighborsInPrediction must be less than "
          << PCCTMC3MaxPredictionNearestNeighborCount << "\n";
      }
428
    }
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
429
430
  }

431
432
  // check required arguments are specified

433
  if (!params.isDecoder && params.uncompressedDataPath.empty())
434
435
    err.error() << "uncompressedDataPath not set\n";

436
  if (params.isDecoder && params.reconstructedDataPath.empty())
437
438
439
440
441
442
443
444
445
446
    err.error() << "reconstructedDataPath not set\n";

  if (params.compressedStreamPath.empty())
    err.error() << "compressedStreamPath not set\n";

  // report the current configuration (only in the absence of errors so
  // that errors/warnings are more obvious and in the same place).
  if (err.is_errored)
    return false;

447
448
  // Dump the complete derived configuration
  cout << "+ Effective configuration parameters\n";
449

450
  po::dumpCfg(cout, opts, "General", 4);
451
  if (params.isDecoder) {
452
    po::dumpCfg(cout, opts, "Decoder", 4);
453
  } else {
454
455
456
    po::dumpCfg(cout, opts, "Encoder", 4);
    po::dumpCfg(cout, opts, "Geometry", 4);

457
    for (const auto& it : params.encoder.attributeIdxMap) {
458
      // NB: when dumping the config, opts references params_attr
459
460
      params_attr.desc = params.encoder.sps.attributeSets[it.second];
      params_attr.aps = params.encoder.aps[it.second];
461
462
463
      cout << "    " << it.first << "\n";
      po::dumpCfg(cout, opts, "Attributes", 8);
    }
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
464
465
  }

466
467
  cout << endl;

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
468
469
  return true;
}
470

471
int
472
Compress(Parameters& params, Stopwatch& clock)
473
{
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
474
  PCCPointSet3 pointCloud;
475
476
477
  if (
    !pointCloud.read(params.uncompressedDataPath)
    || pointCloud.getPointCount() == 0) {
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
478
479
480
481
    cout << "Error: can't open input file!" << endl;
    return -1;
  }

482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
  // Sanitise the input point cloud
  // todo(df): remove the following with generic handling of properties
  bool codeColour = params.encoder.attributeIdxMap.count("color");
  if (!codeColour)
    pointCloud.removeColors();
  assert(codeColour == pointCloud.hasColors());

  bool codeReflectance = params.encoder.attributeIdxMap.count("reflectance");
  if (!codeReflectance)
    pointCloud.removeReflectances();
  assert(codeReflectance == pointCloud.hasReflectances());

  ofstream fout(params.compressedStreamPath, ios::binary);
  if (!fout.is_open()) {
    return -1;
  }

499
500
  clock.start();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
501
502
503
504
505
506
507
508
509
510
  if (params.colorTransform == COLOR_TRANSFORM_RGB_TO_YCBCR) {
    pointCloud.convertRGBToYUV();
  }
  PCCTMC3Encoder3 encoder;

  std::unique_ptr<PCCPointSet3> reconstructedPointCloud;
  if (!params.reconstructedDataPath.empty()) {
    reconstructedPointCloud.reset(new PCCPointSet3);
  }

511
  int ret = encoder.compress(
512
513
    pointCloud, &params.encoder,
    [&](const PayloadBuffer& buf) { writeTlv(buf, fout); },
514
    reconstructedPointCloud.get());
515
  if (ret) {
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
516
517
518
    cout << "Error: can't compress point cloud!" << endl;
    return -1;
  }
519

520
  std::cout << "Total bitstream size " << fout.tellp() << " B" << std::endl;
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
521
522
  fout.close();

523
524
  clock.stop();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
525
526
527
528
529
530
531
532
533
  if (!params.reconstructedDataPath.empty()) {
    if (params.colorTransform == COLOR_TRANSFORM_RGB_TO_YCBCR) {
      reconstructedPointCloud->convertYUVToRGB();
    }
    reconstructedPointCloud->write(params.reconstructedDataPath, true);
  }

  return 0;
}
534
int
535
Decompress(Parameters& params, Stopwatch& clock)
536
{
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
537
538
539
540
541
  ifstream fin(params.compressedStreamPath, ios::binary);
  if (!fin.is_open()) {
    return -1;
  }

542
543
  clock.start();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
544
545
  PCCTMC3Decoder3 decoder;
  PCCPointSet3 pointCloud;
546

547
  int ret = decoder.decompress(params.decoder, fin, pointCloud);
548
  if (ret) {
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
549
550
551
    cout << "Error: can't decompress point cloud!" << endl;
    return -1;
  }
552
  std::cout << "Total bitstream size " << fin.tellg() << " B" << std::endl;
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
553
554
555
556
557

  if (params.colorTransform == COLOR_TRANSFORM_RGB_TO_YCBCR) {
    pointCloud.convertYUVToRGB();
  }

558
559
  clock.stop();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
560
561
562
563
  if (!pointCloud.write(params.reconstructedDataPath, true)) {
    cout << "Error: can't open output file!" << endl;
    return -1;
  }
564

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
565
566
  return 0;
}