Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions dali/operators/audio/nonsilence_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace dali {

DALI_SCHEMA(NonsilentRegion)
.DocStr(R"code(The operator performs leading and trailing silence detection in an audio buffer.
.DocStr(R"code(The operator performs leading and trailing silence detection in an audio buffer.
The operator returns the beginning and length of the non-silent region by comparing short term power of the signal
with a silence cut-off threshold. The signal is consider silence when ``short_term_power_db < cutoff_db`` with::

Expand All @@ -35,22 +35,24 @@ Inputs/Outputs
Remarks
- If ``Outputs[1] == 0``, ``Outputs[0]`` value is undefined
)code")
.NumInput(1)
.NumOutput(detail::kNumOutputs)
.AddOptionalArg("cutoff_db",
R"code(The threshold [dB], below which everything is considered as silence)code",
-60.f)
.AddOptionalArg("window_length", R"code(Size of a sliding window.
.NumInput(1)
.NumOutput(detail::kNumOutputs)
.InputDoc(0, "audio_buffer", "1D TensorList", "Batch of audio buffers")
.OutputDoc(0, "start", "TensorList of int",
"Start positions, in samples, of nonsilent regions.")
.OutputDoc(1, "length", "TensorList of int", "Lengths, in samples, of nonsilent regions.")
.AddOptionalArg("cutoff_db",
R"code(The threshold [dB], below which everything is considered as silence)code", -60.f)
.AddOptionalArg("window_length", R"code(Size of a sliding window.
The sliding window is used to calculate short-term power of the signal.)code", 2048)
.AddOptionalArg("reference_power",
R"code(The reference power used for converting signal to db.
.AddOptionalArg("reference_power",
R"code(The reference power used for converting signal to db.
If ``reference_power`` is not provided, the maximum of the signal will be used as the reference power)code",
0.f)
.AddOptionalArg("reset_interval",
R"code(The number of samples after which the moving mean average is
0.f)
.AddOptionalArg("reset_interval",
R"code(The number of samples after which the moving mean average is
recalculated to avoid loss of precision. If ``reset_interval == -1`` or the input type allows exact calculation,
the average won't be reset. The default value should fit most of the use cases.)code",
8192);
the average won't be reset. The default value should fit most of the use cases.)code", 8192);

DALI_REGISTER_OPERATOR(NonsilentRegion, NonsilenceOperatorCpu, CPU);

Expand Down
3 changes: 3 additions & 0 deletions dali/operators/decoder/audio/audio_decoder_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ This operator produces two outputs:
)code")
.NumInput(1)
.NumOutput(2)
.OutputDoc(0, "decoded", "TensorList of int16, int32 or float", "The decoded audio recordings.")

@jantonguirao jantonguirao Apr 10, 2020

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I remember the conclusion was to start using TensorList for describing outputs. Wasn't it?

.OutputDoc(1, "sampling_rate", "TensorList of float",
"The sampling rates corresponding to the decoded sound recordings [Hz].")
.AddOptionalArg("sample_rate",
"If specified, the target sample rate, in Hz, to which the audio is resampled.",
0.0f, true)
Expand Down
20 changes: 14 additions & 6 deletions dali/operators/decoder/image_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ Output of the decoder is in `HWC` ordering.)code")
.AddParent("ImageDecoderAttr")
.AddParent("RandomCropAttr");


DALI_SCHEMA(ImageDecoderSlice)
.DocStr(R"code(Decode images on the host with a cropping window of given size and anchor.
.DocStr(R"code(Decode images on the host with a cropping window of given size and anchor.
Inputs must be supplied as 3 separate tensors in a specific order: `data`
containing input data, `anchor` containing either normalized or absolute coordinates
(depending on the value of `normalized_anchor`) for the starting point of the
Expand All @@ -111,9 +110,18 @@ coordinates and `WH` order for the slice arguments.
When possible, will make use of partial decoding (e.g. libjpeg-turbo, nvJPEG).
When not supported, will decode the whole image and then crop.
Output of the decoder is in `HWC` ordering.)code")
.NumInput(3)
.NumOutput(1)
.AddParent("ImageDecoderAttr")
.AddParent("SliceAttr");
.NumInput(3)
.InputDoc(0, "data", "TensorList", "Batch containing input data")
.InputDoc(1, "anchor", "1D TensorList of float",
R"code(Input containing either normalized or absolute coordinates
(depending on the value of `normalized_anchor`) for the starting point of the
slice (x0, x1, x2, ...).)code")
.InputDoc(2, "shape", "1D TensorList of float",
R"code(Input containing either normalized or absolute coordinates
(depending on the value of `normalized_shape`) for the dimensions of the slice
(s0, s1, s2, ...).)code")
.NumOutput(1)
.AddParent("ImageDecoderAttr")
.AddParent("SliceAttr");

} // namespace dali
8 changes: 4 additions & 4 deletions dali/operators/generic/reshape.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ DALI_SCHEMA(Reshape)
The buffer contents are not copied.)")
.NumInput(1, 2)
.NumOutput(1)
.InputDox(0, "data", "TensorList", "Data to be reshaped")
.InputDox(1, "shape_input", "1D TensorList of integers", "Same as `shape` keyword argument")
.InputDoc(0, "data", "TensorList", "Data to be reshaped")
.InputDoc(1, "shape_input", "1D TensorList of int", "Same as `shape` keyword argument")
.PassThrough({{0, 0}})
.AllowSequences()
.SupportVolumetric()
Expand Down Expand Up @@ -59,8 +59,8 @@ DALI_SCHEMA(Reinterpret)
The buffer contents are not copied.)")
.NumInput(1, 2)
.NumOutput(1)
.InputDox(0, "data", "TensorList", "Data to be reshaped")
.InputDox(1, "shape_input", "1D TensorList of integers", "Same as `shape` keyword argument")
.InputDoc(0, "data", "TensorList", "Data to be reshaped")
.InputDoc(1, "shape_input", "1D TensorList of int", "Same as `shape` keyword argument")
.PassThrough({{0, 0}})
.AllowSequences()
.SupportVolumetric()
Expand Down
6 changes: 3 additions & 3 deletions dali/operators/generic/slice/slice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ with arguments `axis_names` or `axes`. By default `Slice` operator uses normaliz
coordinates and `WH` order for the slice arguments.)code")
.NumInput(3)
.NumOutput(1)
.InputDox(0, "data", "TensorList", "Batch containing input data")
.InputDox(1, "anchor", "1D TensorList of floats",
.InputDoc(0, "data", "TensorList", "Batch containing input data")
.InputDoc(1, "anchor", "1D TensorList of float",
R"code(Input containing either normalized or absolute coordinates
(depending on the value of `normalized_anchor`) for the starting point of the
slice (x0, x1, x2, ...).)code")
.InputDox(2, "shape", "1D TensorList of floats",
.InputDoc(2, "shape", "1D TensorList of float",
R"code(Input containing either normalized or absolute coordinates
(depending on the value of `normalized_shape`) for the dimensions of the slice
(s0, s1, s2, ...).)code")
Expand Down
16 changes: 13 additions & 3 deletions dali/operators/image/crop/bbox_crop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,23 @@ relative terms, depending of whether the fixed ``crop_shape`` was used or not.
The third and fourth output correspond to the adjusted bounding boxes and optionally
their corresponding labels. Bounding boxes are always specified in relative coordinates.)code")
.NumInput(1, 2) // [boxes, labels (optional),]
.InputDox(
.InputDoc(
0, "boxes", "2D TensorList of float",
"Relative coordinates of the bounding boxes represented as a 2D tensor where the first "
"dimension refers to the index of the bounding box and the second dimension refers to the "
"index of the coordinate.")
.InputDox(1, "labels", "1D TensorList of integers",
.InputDoc(1, "labels", "1D TensorList of integers",
"(optional) Labels associated with each of the bounding boxes.")
.OutputDocStr(R"code(anchor : TensorList of {batch, 2} or {batch, 3} float
Slice-compatible anchors [x, y, (z,)] of calculated crop windows.
shape : TensorList of {batch, 2} or {batch, 3} float
Slice compatible dimensions [w, h, (d,)] of calculated crop windows.
bboxes : 2D TensorList of float
Adjusted bounding boxes. Each sample ``i`` has shape ``{m_i, 4}`` representing ``m_i`` bounding boxes
that are valid for given crop window.
labels : 2D TensorList of int, optional
Labels corresponding to bounding boxes. Each sample ``i`` has shape ``{m_i, 1}`` representing
``m_i`` labels.)code")
.NumOutput(3) // [anchor, shape, bboxes, labels (optional),]
.AdditionalOutputsFn([](const OpSpec &spec) {
return spec.NumInput() - 1; // +1 if labels are provided
Expand Down Expand Up @@ -187,7 +197,7 @@ explicitly.)code",

Value for ``min`` should satisfy ``0.0 <= min <= max``.

Note: Providing ``aspect_ratio`` and ``scaling`` is incompatible with specifying `crop_shape`
Note: Providing ``aspect_ratio`` and ``scaling`` is incompatible with specifying ``crop_shape``
explicitly)code",
std::vector<float>{1.f, 1.f})
.AddOptionalArg(
Expand Down
28 changes: 15 additions & 13 deletions dali/operators/random/normal_distribution_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,25 @@
namespace dali {

DALI_SCHEMA(NormalDistribution)
.DocStr(R"code(Creates a tensor that consists of data distributed normally.
.DocStr(R"code(Creates a tensor that consists of data distributed normally.
This operator can be ran in 3 modes, which determine the shape of the output tensor:
1. Providing an input batch to this operator results in a batch of output tensors, which have the same shape as the input tensors.
2. Providing a custom `shape` as an argument results in an output batch, where every tensor has the same (given) shape.
3. Providing no input arguments results in an output batch of scalars, distributed normally.)code")
.NumInput(0, 1)
.NumOutput(detail::kNumOutputs)
.AddOptionalArg(detail::kMean, R"code(Mean value of the distribution)code",
0.f, true)
.AddOptionalArg(detail::kStddev,
R"code(Standard deviation of the distribution)code",
1.f, true)
.AddOptionalArg(detail::kShape,
R"code(Shape of single output tensor in a batch)code",
detail::kShapeDefaultValue)
.AddOptionalArg(arg_names::kDtype, R"code(Data type for the output)code",
DALI_FLOAT);
.NumInput(0, 1)
.InputDoc(0, "data", "TensorList",
"If provided, the output is given the same shape as `data` (`data` contents are ignored)")
.NumOutput(detail::kNumOutputs)
.AddOptionalArg(detail::kMean, R"code(Mean value of the distribution)code",
0.f, true)
.AddOptionalArg(detail::kStddev,
R"code(Standard deviation of the distribution)code",
1.f, true)
.AddOptionalArg(detail::kShape,
R"code(Shape of single output tensor in a batch)code",
detail::kShapeDefaultValue)
.AddOptionalArg(arg_names::kDtype, R"code(Data type for the output)code",
DALI_FLOAT);

DALI_REGISTER_OPERATOR(NormalDistribution, NormalDistributionCpu, CPU);

Expand Down
7 changes: 7 additions & 0 deletions dali/operators/reader/caffe2_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ DALI_SCHEMA(Caffe2Reader)
int has_bbox = static_cast<int>(spec.GetArgument<bool>("bbox"));
return img_idx + num_label_outputs + additional_inputs + has_bbox;
})
.OutputDocStr(R"code(images : 1D TensorList of uint8, optional
encoded image data, only if ``image_available = true``.
*labels : optional
One or more output batches of labels, depending on the reader configuration.
*additional_outputs : optional
Additional auxiliary data tensors provided for each sample.
)code")
.AddArg("path",
R"code(List of paths to Caffe2 LMDB directories.)code",
DALI_STRING_VEC)
Expand Down
5 changes: 5 additions & 0 deletions dali/operators/reader/caffe_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ DALI_SCHEMA(CaffeReader)
auto label_available = spec.GetArgument<bool>("label_available");
return image_available + label_available;
})
.OutputDocStr(R"code(images : 1D TensorList of uint8, optional
encoded image data, only if ``image_available = true``.
labels : 1D TensorList of int, optional
Batch of labels corresponding to images, only if ``label_available = true``.
)code")
.AddArg("path",
R"code(List of paths to Caffe LMDB directories.)code",
DALI_STRING_VEC)
Expand Down
13 changes: 11 additions & 2 deletions dali/operators/reader/coco_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,17 @@ DALI_SCHEMA(COCOReader)
.NumInput(0)
.NumOutput(3)
.DocStr(R"code(Read data from a COCO dataset composed of directory with images
and an annotation files. For each image, with `m` bboxes, returns its bboxes as `(m,4)`
Tensor (``m * [x, y, w, h]`` or ``m * [left, top, right, bottom]``) and labels as `(m,1)` Tensor (``m * category_id``).)code")
and an annotation files. For each image ``i``, with ``m_i`` bboxes, returns its bboxes as
``{m_i ,4}`` Tensor (``m_i * [x, y, w, h]`` or ``m_i * [left, top, right, bottom]``)
and labels as ``{m_i, 1}`` Tensor (``m * category_id``).)code")
.OutputDocStr(R"code(images : 1D TensorList of uint8
Encoded image data.s.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is data.s.?

bboxes : 2D TensorList of float
Adjusted bounding boxes. Each sample ``i`` has shape ``{m_i, 4}`` representing ``m_i`` bounding
boxes that are valid for given crop window.
labels : 2D TensorList of int, optional
Labels corresponding to bounding boxes. Each sample ``i`` has shape ``{m_i, 1}`` representing
``m_i`` labels.)code")
.AddOptionalArg(
"meta_files_path",
"Path to directory with meta files containing preprocessed COCO annotations.",
Expand Down
6 changes: 4 additions & 2 deletions dali/operators/reader/file_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ DALI_SCHEMA(FileReader)
.DocStr("Read (Image, label) pairs from a directory")
.NumInput(0)
.NumOutput(2) // (Images, Labels)
.OutputDoc(0, "data", "1D TensorList of uint8", "Raw file contents.")
.OutputDoc(1, "labels", "1D TensorList of int", "Batch of labels corresponding to files.")
.AddArg("file_root",
R"code(Path to a directory containing data files.
``FileReader`` supports flat directory structure. ``file_root`` directory should contain
Expand All @@ -33,8 +35,8 @@ directories with images in them. To obtain labels ``FileReader`` sorts directori
.AddOptionalArg("file_list",
R"code(Path to a text file containing rows of ``filename label`` pairs, where the filenames are
relative to ``file_root``.
If left empty, ``file_root`` is traversed for subdirectories (only those at one level deep from
``file_root``) containing files associated with the same label. When traversing subdirectories,
If left empty, ``file_root`` is traversed for subdirectories (only those at one level deep from
``file_root``) containing files associated with the same label. When traversing subdirectories,
labels are assigned consecutive numbers.)code",
std::string())
.AddOptionalArg("shuffle_after_epoch",
Expand Down
2 changes: 2 additions & 0 deletions dali/operators/reader/mxnet_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ DALI_SCHEMA(MXNetReader)
.DocStr("Read sample data from a MXNet RecordIO.")
.NumInput(0)
.NumOutput(2)
.OutputDoc(0, "data", "1D TensorList of uint8", "Raw data buffers.")
.OutputDoc(1, "labels", "1D TensorList of int", "Batch of labels corresponding to the buffers.")
.AddArg("path",
R"code(List of paths to RecordIO files.)code",
DALI_STRING_VEC)
Expand Down
31 changes: 16 additions & 15 deletions dali/operators/sequence/optical_flow/optical_flow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,29 @@ As an optional input, operator accepts external hints for OF calculation.
The output format of this operator matches the output format of OF driver API.
Dali uses Turing optical flow hardware implementation: https://developer.nvidia.com/opticalflow-sdk
)code")
.NumInput(1, 2)
.NumOutput(1)
.AddOptionalArg(detail::kPresetArgName, R"code(Setting quality level of OF calculation.
.NumInput(1, 2)
.InputDoc(0, "frame_seq", "TensorList of uint8", "Batch of input sequences to calculate OF")

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd write "optical flow" instead of OF

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the rest of the doc already use OF extensively.

@jantonguirao jantonguirao Apr 10, 2020

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok then, but I don't like using "OF" as if it was a well-established acronym

.InputDoc(1, "hints", "TensorList of float", "Batch of external hints for OF")
.NumOutput(1)
.AddOptionalArg(detail::kPresetArgName, R"code(Setting quality level of OF calculation.
0.0f ... 1.0f, where 1.0f is best quality, lowest speed)code", .0f, false)
.AddOptionalArg(detail::kOutputFormatArgName,
R"code(Setting grid size for output vector.
.AddOptionalArg(detail::kOutputFormatArgName,
R"code(Setting grid size for output vector.
Value defines width of grid square (e.g. if value == 4, 4x4 grid is used).
For values <=0, grid size is undefined. Currently only grid_size=4 is supported.)code", -1, false)
.AddOptionalArg(detail::kEnableTemporalHintsArgName,
R"code(enabling/disabling temporal hints for sequences longer than 2 images.
.AddOptionalArg(detail::kEnableTemporalHintsArgName,
R"code(enabling/disabling temporal hints for sequences longer than 2 images.
They are used to speed up calculation: previous OF result in sequence is used to calculate current flow. You might
want to use temporal hints for sequences, that don't have much changes in the scene (e.g. only moving objects))code",
false, false)
.AddOptionalArg(detail::kEnableExternalHintsArgName,
R"code(enabling/disabling external hints for OF calculation. External hints
.AddOptionalArg(detail::kEnableExternalHintsArgName,
R"code(enabling/disabling external hints for OF calculation. External hints
are analogous to temporal hints, only they come from external source. When this option is enabled,
Operator requires 2 inputs.)code",
false, false)
.AddOptionalArg(detail::kImageTypeArgName,
R"code(Type of input images (RGB, BGR, GRAY))code", DALI_RGB,
false)
.AllowSequences();
Operator requires 2 inputs.)code", false, false)
.AddOptionalArg(detail::kImageTypeArgName,
R"code(Type of input images (RGB, BGR, GRAY))code", DALI_RGB,
false)
.AllowSequences();


DALI_REGISTER_OPERATOR(OpticalFlow, OpticalFlow<GPUBackend>, GPU);
Expand Down
Loading