TMC3.cpp 18 KB
Newer Older
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
1
/* The copyright in this software is being made available under the BSD
David Flynn's avatar
David Flynn committed
2
3
4
 * Licence, included below.  This software may be subject to other third
 * party and contributor rights, including patent rights, and no such
 * rights are granted under this licence.
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
5
 *
David Flynn's avatar
David Flynn committed
6
 * Copyright (c) 2017-2018, ISO/IEC
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
7
8
9
10
11
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
David Flynn's avatar
David Flynn committed
12
13
14
15
16
17
18
19
20
21
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * * Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 *
 * * Neither the name of the ISO/IEC nor the names of its contributors
 *   may be used to endorse or promote products derived from this
 *   software without specific prior written permission.
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
22
23
24
25
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
David Flynn's avatar
David Flynn committed
26
27
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
28
29
30
31
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
David Flynn's avatar
David Flynn committed
32
33
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
34
35
36
 */

#include "TMC3.h"
37
#include "program_options_lite.h"
38
#include "io_tlv.h"
39
#include "version.h"
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
40
41
42
43

using namespace std;
using namespace pcc;

44
45
46
47
48
49
50
51
52
//============================================================================

struct Parameters {
  bool isDecoder;

  std::string uncompressedDataPath;
  std::string compressedStreamPath;
  std::string reconstructedDataPath;

53
54
55
  // Filename for saving pre inverse scaled point cloud.
  std::string preInvScalePath;

56
57
58
59
60
61
62
63
64
  pcc::EncoderParams encoder;
  pcc::DecoderParams decoder;

  // todo(df): this should be per-attribute
  ColorTransform colorTransform;
};

//============================================================================

65
66
67
int
main(int argc, char* argv[])
{
68
  cout << "MPEG PCC tmc3 version " << ::pcc::version << endl;
69

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
70
71
72
73
  Parameters params;
  if (!ParseParameters(argc, argv, params)) {
    return -1;
  }
74
75
76
77
78
79
80

  // Timers to count elapsed wall/user time
  pcc::chrono::Stopwatch<std::chrono::steady_clock> clock_wall;
  pcc::chrono::Stopwatch<pcc::chrono::utime_inc_children_clock> clock_user;

  clock_wall.start();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
81
  int ret = 0;
82
  if (params.isDecoder) {
83
    ret = Decompress(params, clock_user);
84
85
  } else {
    ret = Compress(params, clock_user);
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
86
87
  }

88
89
90
91
92
93
94
95
  clock_wall.stop();

  using namespace std::chrono;
  auto total_wall = duration_cast<milliseconds>(clock_wall.count()).count();
  auto total_user = duration_cast<milliseconds>(clock_user.count()).count();
  std::cout << "Processing time (wall): " << total_wall / 1000.0 << " s\n";
  std::cout << "Processing time (user): " << total_user / 1000.0 << " s\n";

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
96
97
98
  return ret;
}

99
100
101
//---------------------------------------------------------------------------
// :: Command line / config parsing helpers

102
103
104
105
template<typename T>
static std::istream&
readUInt(std::istream& in, T& val)
{
106
107
108
109
110
111
  unsigned int tmp;
  in >> tmp;
  val = T(tmp);
  return in;
}

112
113
114
static std::istream&
operator>>(std::istream& in, ColorTransform& val)
{
115
116
117
  return readUInt(in, val);
}

118
namespace pcc {
119
static std::istream&
120
operator>>(std::istream& in, AttributeEncoding& val)
121
{
122
  return readUInt(in, val);
123
124
}
}  // namespace pcc
125

126
namespace pcc {
127
128
129
static std::istream&
operator>>(std::istream& in, GeometryCodecType& val)
{
130
  return readUInt(in, val);
131
132
}
}  // namespace pcc
133

134
namespace pcc {
135
static std::ostream&
136
operator<<(std::ostream& out, const AttributeEncoding& val)
137
{
138
  switch (val) {
139
140
141
  case AttributeEncoding::kPredictingTransform: out << "0 (Pred)"; break;
  case AttributeEncoding::kRAHTransform: out << "1 (RAHT)"; break;
  case AttributeEncoding::kLiftingTransform: out << "2 (Lift)"; break;
142
143
  }
  return out;
144
145
}
}  // namespace pcc
146

147
namespace pcc {
148
149
150
static std::ostream&
operator<<(std::ostream& out, const GeometryCodecType& val)
{
151
  switch (val) {
152
  case GeometryCodecType::kOctree: out << "1 (TMC1 Octree)"; break;
153
154
155
  case GeometryCodecType::kTriSoup: out << "2 (TMC3 TriSoup)"; break;
  }
  return out;
156
157
}
}  // namespace pcc
158

159
160
161
//---------------------------------------------------------------------------
// :: Command line / config parsing

162
163
164
bool
ParseParameters(int argc, char* argv[], Parameters& params)
{
165
166
  namespace po = df::program_options_lite;

167
168
169
170
171
  struct {
    AttributeDescription desc;
    AttributeParameterSet aps;
  } params_attr;

172
173
174
175
176
177
178
179
180
181
182
183
184
  bool print_help = false;

  // a helper to set the attribute
  std::function<po::OptionFunc::Func> attribute_setter =
    [&](po::Options&, const std::string& name, po::ErrorReporter) {
      // copy the current state of parsed attribute parameters
      //
      // NB: this does not cause the default values of attr to be restored
      // for the next attribute block.  A side-effect of this is that the
      // following is allowed leading to attribute foo having both X=1 and
      // Y=2:
      //   "--attr.X=1 --attribute foo --attr.Y=2 --attribute foo"
      //
185
186
187
188
189
190
191
192
193
194
195
196
197
198

      // NB: insert returns any existing element
      const auto& it = params.encoder.attributeIdxMap.insert(
        {name, int(params.encoder.attributeIdxMap.size())});

      if (it.second) {
        params.encoder.sps.attributeSets.push_back(params_attr.desc);
        params.encoder.aps.push_back(params_attr.aps);
        return;
      }

      // update existing entry
      params.encoder.sps.attributeSets[it.first->second] = params_attr.desc;
      params.encoder.aps[it.first->second] = params_attr.aps;
199
200
    };

201
  /* clang-format off */
202
203
204
205
206
207
208
209
210
211
212
  // The definition of the program/config options, along with default values.
  //
  // NB: when updating the following tables:
  //      (a) please keep to 80-columns for easier reading at a glance,
  //      (b) do not vertically align values -- it breaks quickly
  //
  po::Options opts;
  opts.addOptions()
  ("help", print_help, false, "this help text")
  ("config,c", po::parseConfigFile, "configuration file name")

213
214
  (po::Section("General"))

215
  ("mode", params.isDecoder, false,
216
217
    "The encoding/decoding mode:\n"
    "  0: encode\n"
218
    "  1: decode")
219
220
221

  // i/o parameters
  ("reconstructedDataPath",
222
223
    params.reconstructedDataPath, {},
    "The ouput reconstructed pointcloud file path (decoder only)")
224
225

  ("uncompressedDataPath",
226
227
    params.uncompressedDataPath, {},
    "The input pointcloud file path")
228
229

  ("compressedStreamPath",
230
231
    params.compressedStreamPath, {},
    "The compressed bitstream path (encoder=output, decoder=input)")
232

233
  ("postRecolorPath",
234
    params.encoder.postRecolorPath, {},
235
    "Recolored pointcloud file path (encoder only)")
236
237

  ("preInvScalePath",
238
    params.preInvScalePath, {},
239
    "Pre inverse scaled pointcloud file path (decoder only)")
240

241
  // general
242
  // todo(df): this should be per-attribute
243
  ("colorTransform",
244
245
246
247
    params.colorTransform, COLOR_TRANSFORM_RGB_TO_YCBCR,
    "The colour transform to be applied:\n"
    "  0: none\n"
    "  1: RGB to YCbCr (Rec.709)")
248

249
250
251
  (po::Section("Decoder"))

  ("roundOutputPositions",
252
    params.decoder.roundOutputPositions, false,
253
254
255
256
    "todo(kmammou)")

  (po::Section("Encoder"))

257
  ("positionQuantizationScale",
258
    params.encoder.sps.seq_source_geom_scale_factor, 1.f,
259
    "Scale factor to be applied to point positions during quantization process")
260
261

  ("mergeDuplicatedPoints",
262
    params.encoder.gps.geom_unique_points_flag, true,
263
    "Enables removal of duplicated points")
264

265
  (po::Section("Geometry"))
266

267
  // tools
268
  ("geometryCodec",
269
    params.encoder.gps.geom_codec_type, GeometryCodecType::kOctree,
270
    "Controls the method used to encode geometry:\n"
271
272
    "  1: octree (TMC3)\n"
    "  2: trisoup (TMC1)")
273

274
  ("neighbourContextRestriction",
275
    params.encoder.gps.neighbour_context_restriction_flag, false,
276
    "Limit geometry octree occupancy contextualisation to sibling nodes")
277

278
  ("neighbourAvailBoundaryLog2",
279
    params.encoder.gps.neighbour_avail_boundary_log2, 0,
280
281
282
    "Defines the avaliability volume for neighbour occupancy lookups."
    " 0: unconstrained")

283
  ("inferredDirectCodingMode",
284
    params.encoder.gps.inferred_direct_coding_mode_enabled_flag, true,
285
    "Permits early termination of the geometry octree for isolated points")
286

287
288
  // (trisoup) geometry parameters
  ("triSoupDepth",  // log2(maxBB+1), where maxBB+1 is analogous to image width
289
    params.encoder.gps.trisoup_depth, 10,
290
    "Depth of voxels (reconstructed points) in trisoup geometry")
291
292

  ("triSoupLevel",
293
    params.encoder.gps.trisoup_triangle_level, 7,
294
    "Level of triangles (reconstructed surface) in trisoup geometry")
295
296

  ("triSoupIntToOrigScale",  // reciprocal of positionQuantizationScale
297
298
    params.encoder.sps.donotuse_trisoup_int_to_orig_scale, 1.f,
    "orig_coords = integer_coords * intToOrigScale")
299

300
301
  (po::Section("Attributes"))

302
303
304
  // attribute processing
  //   NB: Attribute options are special in the way they are applied (see above)
  ("attribute",
305
306
307
    attribute_setter,
    "Encode the given attribute (NB, must appear after the"
    "following attribute parameters)")
308

309
  ("transformType",
310
    params_attr.aps.attr_encoding, AttributeEncoding::kPredictingTransform,
311
    "Coding method to use for attribute:\n"
312
    "  0: Hierarchical neighbourhood prediction\n"
313
    "  1: Region Adaptive Hierarchical Transform (RAHT)\n"
314
    "  2: Hierarichical neighbourhood prediction as lifting transform")
315

316
  ("rahtLeafDecimationDepth",
317
    params_attr.aps.raht_binary_level_threshold, 3,
318
319
    "Sets coefficients to zero in the bottom n levels of RAHT tree. "
    "Used for chroma-subsampling in attribute=color only.")
320

321
  ("rahtQuantizationStep",
322
323
    params_attr.aps.quant_step_size_luma, {},
    "deprecated -- use quantizationStepsLuma")
324
325

  ("rahtDepth",
326
    params_attr.aps.raht_depth, 21,
327
328
    "Number of bits for morton representation of an RAHT co-ordinate"
    "component")
329

330
  ("numberOfNearestNeighborsInPrediction",
331
    params_attr.aps.num_pred_nearest_neighbours, 4,
332
    "Attribute's maximum number of nearest neighbors to be used for prediction")
333
334

  ("levelOfDetailCount",
335
    params_attr.aps.numDetailLevels, 1,
336
    "Attribute's number of levels of detail")
337
338

  ("quantizationSteps",
339
340
    params_attr.aps.quant_step_size_luma, {},
    "deprecated -- use quantizationStepsLuma")
341
342

  ("quantizationStepsLuma",
343
    params_attr.aps.quant_step_size_luma, {},
344
345
346
    "Attribute's luma quantization step sizes (one for each LoD)")

  ("quantizationStepsChroma",
347
    params_attr.aps.quant_step_size_chroma, {},
348
    "Attribute's chroma quantization step sizes (one for each LoD)")
349

350
351
  ("dist2",
    params_attr.aps.dist2, {},
352
    "Attribute's list of squared distances (one for each LoD)")
353
  ;
354
  /* clang-format on */
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

  po::setDefaults(opts);
  po::ErrorReporter err;
  const list<const char*>& argv_unhandled =
    po::scanArgv(opts, argc, (const char**)argv, err);

  for (const auto arg : argv_unhandled) {
    err.warn() << "Unhandled argument ignored: " << arg << "\n";
  }

  if (argc == 1 || print_help) {
    po::doHelp(std::cout, opts, 78);
    return false;
  }

370
371
372
373
  if (int(params.encoder.gps.geom_codec_type) == 0) {
    err.error() << "Bypassed geometry coding is no longer supported\n";
  }

374
  // For trisoup, ensure that positionQuantizationScale is the exact inverse of intToOrigScale.
375
376
377
  if (params.encoder.gps.geom_codec_type == GeometryCodecType::kTriSoup) {
    params.encoder.sps.seq_source_geom_scale_factor =
      1.0f / params.encoder.sps.donotuse_trisoup_int_to_orig_scale;
378
379
  }

380
  // For RAHT, ensure that the unused lod count = 0 (prevents mishaps)
381
382
383
384
385
  for (const auto& it : params.encoder.attributeIdxMap) {
    auto& attr_aps = params.encoder.aps[it.second];

    if (attr_aps.attr_encoding == AttributeEncoding::kRAHTransform) {
      attr_aps.numDetailLevels = 0;
386
387
388
    }
  }

389
  // sanity checks
390
  //  - validate that quantizationStepsLuma/Chroma, dist2
391
  //    of each attribute contain levelOfDetailCount elements.
392
393
394
395
396
397
398
399
400
401
  for (const auto& it : params.encoder.attributeIdxMap) {
    const auto& attr_sps = params.encoder.sps.attributeSets[it.second];
    const auto& attr_aps = params.encoder.aps[it.second];

    bool isLifting =
      attr_aps.attr_encoding == AttributeEncoding::kPredictingTransform
      || attr_aps.attr_encoding == AttributeEncoding::kLiftingTransform;

    if (isLifting) {
      int lod = attr_aps.numDetailLevels;
402

403
      if (lod > 255) {
404
        err.error() << it.first
405
                    << ".levelOfDetailCount must be less than 256\n";
406
      }
407
      // todo(df): the following two checks are removed in m42640/2
408
409
      if (attr_aps.dist2.size() != lod) {
        err.error() << it.first << ".dist2 does not have " << lod
410
                    << " entries\n";
411
      }
412
413
      if (attr_aps.quant_step_size_luma.size() != lod) {
        err.error() << it.first << ".quantizationStepsLuma does not have "
414
415
                    << lod << " entries\n";
      }
416
417
418
419
      if (it.first == "color") {
        if (attr_aps.quant_step_size_chroma.size() != lod) {
          err.error() << it.first << ".quantizationStepsChroma does not have "
                      << lod << " entries\n";
420
        }
421
      }
422

423
      if (
424
        attr_aps.num_pred_nearest_neighbours
425
426
        > PCCTMC3MaxPredictionNearestNeighborCount) {
        err.error()
427
          << it.first
428
429
430
          << ".numberOfNearestNeighborsInPrediction must be less than "
          << PCCTMC3MaxPredictionNearestNeighborCount << "\n";
      }
431
    }
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
432
433
  }

434
435
  // check required arguments are specified

436
  if (!params.isDecoder && params.uncompressedDataPath.empty())
437
438
    err.error() << "uncompressedDataPath not set\n";

439
  if (params.isDecoder && params.reconstructedDataPath.empty())
440
441
442
443
444
445
446
447
448
449
    err.error() << "reconstructedDataPath not set\n";

  if (params.compressedStreamPath.empty())
    err.error() << "compressedStreamPath not set\n";

  // report the current configuration (only in the absence of errors so
  // that errors/warnings are more obvious and in the same place).
  if (err.is_errored)
    return false;

450
451
  // Dump the complete derived configuration
  cout << "+ Effective configuration parameters\n";
452

453
  po::dumpCfg(cout, opts, "General", 4);
454
  if (params.isDecoder) {
455
    po::dumpCfg(cout, opts, "Decoder", 4);
456
  } else {
457
458
459
    po::dumpCfg(cout, opts, "Encoder", 4);
    po::dumpCfg(cout, opts, "Geometry", 4);

460
    for (const auto& it : params.encoder.attributeIdxMap) {
461
      // NB: when dumping the config, opts references params_attr
462
463
      params_attr.desc = params.encoder.sps.attributeSets[it.second];
      params_attr.aps = params.encoder.aps[it.second];
464
465
466
      cout << "    " << it.first << "\n";
      po::dumpCfg(cout, opts, "Attributes", 8);
    }
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
467
468
  }

469
470
  cout << endl;

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
471
472
  return true;
}
473

474
int
475
Compress(Parameters& params, Stopwatch& clock)
476
{
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
477
  PCCPointSet3 pointCloud;
478
479
480
  if (
    !pointCloud.read(params.uncompressedDataPath)
    || pointCloud.getPointCount() == 0) {
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
481
482
483
484
    cout << "Error: can't open input file!" << endl;
    return -1;
  }

485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  // Sanitise the input point cloud
  // todo(df): remove the following with generic handling of properties
  bool codeColour = params.encoder.attributeIdxMap.count("color");
  if (!codeColour)
    pointCloud.removeColors();
  assert(codeColour == pointCloud.hasColors());

  bool codeReflectance = params.encoder.attributeIdxMap.count("reflectance");
  if (!codeReflectance)
    pointCloud.removeReflectances();
  assert(codeReflectance == pointCloud.hasReflectances());

  ofstream fout(params.compressedStreamPath, ios::binary);
  if (!fout.is_open()) {
    return -1;
  }

502
503
  clock.start();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
504
505
506
507
508
509
510
511
512
513
  if (params.colorTransform == COLOR_TRANSFORM_RGB_TO_YCBCR) {
    pointCloud.convertRGBToYUV();
  }
  PCCTMC3Encoder3 encoder;

  std::unique_ptr<PCCPointSet3> reconstructedPointCloud;
  if (!params.reconstructedDataPath.empty()) {
    reconstructedPointCloud.reset(new PCCPointSet3);
  }

514
  int ret = encoder.compress(
515
516
    pointCloud, &params.encoder,
    [&](const PayloadBuffer& buf) { writeTlv(buf, fout); },
517
    reconstructedPointCloud.get());
518
  if (ret) {
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
519
520
521
    cout << "Error: can't compress point cloud!" << endl;
    return -1;
  }
522

523
  std::cout << "Total bitstream size " << fout.tellp() << " B" << std::endl;
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
524
525
  fout.close();

526
527
  clock.stop();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
528
529
530
531
532
533
534
535
536
  if (!params.reconstructedDataPath.empty()) {
    if (params.colorTransform == COLOR_TRANSFORM_RGB_TO_YCBCR) {
      reconstructedPointCloud->convertYUVToRGB();
    }
    reconstructedPointCloud->write(params.reconstructedDataPath, true);
  }

  return 0;
}
537
int
538
Decompress(Parameters& params, Stopwatch& clock)
539
{
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
540
541
542
543
544
  ifstream fin(params.compressedStreamPath, ios::binary);
  if (!fin.is_open()) {
    return -1;
  }

545
546
  clock.start();

547
  PayloadBuffer buf;
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
548
  PCCTMC3Decoder3 decoder;
549

550
551
552
  while (true) {
    PayloadBuffer* buf_ptr = &buf;
    readTlv(fin, &buf);
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
553

554
555
556
    // at end of file (or other error), flush decoder
    if (!fin)
      buf_ptr = nullptr;
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
557

558
559
560
    int ret = decoder.decompress(
      params.decoder, buf_ptr, [&](const PCCPointSet3& decodedPointCloud) {
        PCCPointSet3 pointCloud(decodedPointCloud);
561

562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
        if (params.colorTransform == COLOR_TRANSFORM_RGB_TO_YCBCR) {
          pointCloud.convertYUVToRGB();
        }

        // Dump the decoded colour using the pre inverse scaled geometry
        if (!params.preInvScalePath.empty()) {
          pointCloud.write(params.preInvScalePath);
        }

        decoder.inverseQuantization(
          pointCloud, params.decoder.roundOutputPositions);

        clock.stop();

        if (!pointCloud.write(params.reconstructedDataPath, true)) {
          cout << "Error: can't open output file!" << endl;
        }

        clock.start();
      });

    if (ret) {
      cout << "Error: can't decompress point cloud!" << endl;
      return -1;
    }

    if (!buf_ptr)
      break;
Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
590
  }
591

592
593
594
595
596
597
  fin.clear();
  fin.seekg(0, ios_base::end);
  std::cout << "Total bitstream size " << fin.tellg() << " B" << std::endl;

  clock.stop();

Khaled Mammou's avatar
TMC3v0  
Khaled Mammou committed
598
599
  return 0;
}