Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions be/src/core/data_type/data_type_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ namespace ErrorCodes {
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}

DataTypeArray::DataTypeArray(const DataTypePtr& nested_) : nested {nested_} {}
DataTypeArray::DataTypeArray(const DataTypePtr& nested_) {
DataTypePtr nullable_nested = make_nullable(nested_);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrapping every array child in make_nullable() changes the type metadata you export later. DataTypeArray::to_protobuf() uses nested->is_nullable(), and FE's FoldConstantRuleOnBE.convertToNereidsType() reconstructs ArrayType from that flag. After this change, a folded expression like CAST([1] AS ARRAY<INT NOT NULL>) will round-trip back to FE as ARRAY<INT> because BE now always reports contains_null=true. FE still treats ArrayType.containsNull as part of exact type matching, so this is a real regression in the constant-folding path, not just an internal invariant cleanup. Please preserve the original child-nullability flag on the exported type metadata path or add a compatibility fix/test for it.

auto nested_type = std::dynamic_pointer_cast<const DataTypeNullable>(nullable_nested);
DORIS_CHECK(nested_type != nullptr);
nested = std::move(nested_type);
nested_as_base = nested;
}

MutableColumnPtr DataTypeArray::create_column() const {
return ColumnArray::create(nested->create_column(), ColumnArray::ColumnOffsets::create());
Expand All @@ -72,9 +78,10 @@ bool DataTypeArray::equals(const IDataType& rhs) const {

// here we should remove nullable, otherwise here always be 1
size_t DataTypeArray::get_number_of_dimensions() const {
const DataTypeArray* nested_array =
typeid_cast<const DataTypeArray*>(remove_nullable(nested).get());
if (!nested_array) return 1;
auto* nested_array = typeid_cast<const DataTypeArray*>(remove_nullable(nested).get());
if (!nested_array) {
return 1;
}
return 1 +
nested_array
->get_number_of_dimensions(); /// Every modern C++ compiler optimizes tail recursion.
Expand Down Expand Up @@ -133,7 +140,7 @@ const char* DataTypeArray::deserialize(const char* buf, MutableColumnPtr* column

void DataTypeArray::to_pb_column_meta(PColumnMeta* col_meta) const {
IDataType::to_pb_column_meta(col_meta);
auto children = col_meta->add_children();
auto* children = col_meta->add_children();
get_nested_type()->to_pb_column_meta(children);
}

Expand Down
11 changes: 7 additions & 4 deletions be/src/core/data_type/data_type_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "common/status.h"
#include "core/data_type/data_type.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/define_primitive_type.h"
#include "core/data_type_serde/data_type_array_serde.h"
#include "core/data_type_serde/data_type_serde.h"
Expand All @@ -47,7 +48,8 @@ namespace doris {
class DataTypeArray final : public IDataType {
private:
/// The type of array elements.
DataTypePtr nested;
DataTypeNullablePtr nested;
DataTypePtr nested_as_base;

public:
static constexpr PrimitiveType PType = TYPE_ARRAY;
Expand Down Expand Up @@ -79,7 +81,8 @@ class DataTypeArray final : public IDataType {

bool equals(const IDataType& rhs) const override;

const DataTypePtr& get_nested_type() const { return nested; }
const DataTypePtr& get_nested_type() const { return nested_as_base; }
const DataTypeNullablePtr& get_nullable_nested_type() const { return nested; }

/// 1 for plain array, 2 for array of arrays and so on.
size_t get_number_of_dimensions() const;
Expand All @@ -99,15 +102,15 @@ class DataTypeArray final : public IDataType {
void to_protobuf(PTypeDesc* ptype, PTypeNode* node, PScalarType* scalar_type) const override {
node->set_type(TTypeNodeType::ARRAY);
node->set_contains_null(nested->is_nullable());
nested->to_protobuf(ptype);
get_nested_type()->to_protobuf(ptype);
}

#ifdef BE_TEST
void to_thrift(TTypeDesc& thrift_type, TTypeNode& node) const override {
node.type = TTypeNodeType::ARRAY;
node.__isset.contains_nulls = true;
node.contains_nulls.push_back(nested->is_nullable());
nested->to_thrift(thrift_type);
get_nested_type()->to_thrift(thrift_type);
}
#endif
};
Expand Down
18 changes: 13 additions & 5 deletions be/src/core/data_type/data_type_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,16 @@
namespace doris {

DataTypeMap::DataTypeMap(const DataTypePtr& key_type_, const DataTypePtr& value_type_) {
key_type = key_type_;
value_type = value_type_;
auto nullable_key_type =
std::dynamic_pointer_cast<const DataTypeNullable>(make_nullable(key_type_));
auto nullable_value_type =
std::dynamic_pointer_cast<const DataTypeNullable>(make_nullable(value_type_));
DORIS_CHECK(nullable_key_type != nullptr);
DORIS_CHECK(nullable_value_type != nullptr);
key_type = std::move(nullable_key_type);
value_type = std::move(nullable_value_type);
key_type_as_base = key_type;
value_type_as_base = value_type;
}

Field DataTypeMap::get_default() const {
Expand All @@ -68,8 +76,8 @@ Status DataTypeMap::check_column(const IColumn& column) const {

void DataTypeMap::to_pb_column_meta(PColumnMeta* col_meta) const {
IDataType::to_pb_column_meta(col_meta);
auto key_children = col_meta->add_children();
auto value_children = col_meta->add_children();
auto* key_children = col_meta->add_children();
auto* value_children = col_meta->add_children();
key_type->to_pb_column_meta(key_children);
value_type->to_pb_column_meta(value_children);
}
Expand All @@ -79,7 +87,7 @@ bool DataTypeMap::equals(const IDataType& rhs) const {
return false;
}

const DataTypeMap& rhs_map = static_cast<const DataTypeMap&>(rhs);
const auto& rhs_map = static_cast<const DataTypeMap&>(rhs);

if (!key_type->equals(*rhs_map.key_type)) {
return false;
Expand Down
21 changes: 13 additions & 8 deletions be/src/core/data_type/data_type_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "common/status.h"
#include "core/data_type/data_type.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/define_primitive_type.h"
#include "core/data_type_serde/data_type_map_serde.h"
#include "core/data_type_serde/data_type_serde.h"
Expand All @@ -47,8 +48,10 @@ namespace doris {
*/
class DataTypeMap final : public IDataType {
private:
DataTypePtr key_type;
DataTypePtr value_type;
DataTypeNullablePtr key_type;
DataTypeNullablePtr value_type;
DataTypePtr key_type_as_base;
DataTypePtr value_type_as_base;

public:
static constexpr bool is_parametric = true;
Expand All @@ -74,8 +77,10 @@ class DataTypeMap final : public IDataType {
}

bool equals(const IDataType& rhs) const override;
const DataTypePtr& get_key_type() const { return key_type; }
const DataTypePtr& get_value_type() const { return value_type; }
const DataTypePtr& get_key_type() const { return key_type_as_base; }
const DataTypePtr& get_value_type() const { return value_type_as_base; }
const DataTypeNullablePtr& get_nullable_key_type() const { return key_type; }
const DataTypeNullablePtr& get_nullable_value_type() const { return value_type; }

int64_t get_uncompressed_serialized_bytes(const IColumn& column,
int be_exec_version) const override;
Expand All @@ -93,16 +98,16 @@ class DataTypeMap final : public IDataType {
node->set_type(TTypeNodeType::MAP);
node->add_contains_nulls(key_type->is_nullable());
node->add_contains_nulls(value_type->is_nullable());
key_type->to_protobuf(ptype);
value_type->to_protobuf(ptype);
get_key_type()->to_protobuf(ptype);
get_value_type()->to_protobuf(ptype);
}
#ifdef BE_TEST
void to_thrift(TTypeDesc& thrift_type, TTypeNode& node) const override {
node.type = TTypeNodeType::MAP;
node.contains_nulls.push_back(key_type->is_nullable());
node.contains_nulls.push_back(value_type->is_nullable());
key_type->to_thrift(thrift_type);
value_type->to_thrift(thrift_type);
get_key_type()->to_thrift(thrift_type);
get_value_type()->to_thrift(thrift_type);
}
#endif
};
Expand Down
2 changes: 2 additions & 0 deletions be/src/core/data_type/primitive_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ class DataTypeHLL;
class DataTypeJsonb;
class DataTypeArray;
class DataTypeMap;
class DataTypeNullable;
class DataTypeVariant;
class DataTypeStruct;
class DataTypeBitMap;
class DataTypeQuantileState;
using DataTypeNullablePtr = std::shared_ptr<const DataTypeNullable>;
template <PrimitiveType T>
class ColumnVector;
using ColumnUInt8 = ColumnVector<TYPE_BOOLEAN>;
Expand Down
4 changes: 2 additions & 2 deletions be/src/core/data_type_serde/data_type_array_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
DCHECK(nested_column.is_nullable());
DORIS_CHECK(nested_column.is_nullable());
if (slice[0] != '[') {
return Status::InvalidArgument("Array does not start with '[' character, found '{}'",
slice[0]);
Expand Down Expand Up @@ -162,7 +162,7 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text(
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
DCHECK(nested_column.is_nullable());
DORIS_CHECK(nested_column.is_nullable());

char collection_delimiter =
options.get_collection_delimiter(hive_text_complex_type_delimiter_level);
Expand Down
4 changes: 3 additions & 1 deletion be/src/core/data_type_serde/data_type_array_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ class IDataType;
class DataTypeArraySerDe : public DataTypeSerDe {
public:
DataTypeArraySerDe(DataTypeSerDeSPtr _nested_serde, int nesting_level = 1)
: DataTypeSerDe(nesting_level), nested_serde(std::move(_nested_serde)) {}
: DataTypeSerDe(nesting_level), nested_serde(std::move(_nested_serde)) {
DORIS_CHECK(nested_serde != nullptr);
}

std::string get_name() const override { return "Array(" + nested_serde->get_name() + ")"; }

Expand Down
8 changes: 4 additions & 4 deletions be/src/core/data_type_serde/data_type_map_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(
auto& offsets = array_column.get_offsets();
IColumn& nested_key_column = array_column.get_keys();
IColumn& nested_val_column = array_column.get_values();
DCHECK(nested_key_column.is_nullable());
DCHECK(nested_val_column.is_nullable());
DORIS_CHECK(nested_key_column.is_nullable());
DORIS_CHECK(nested_val_column.is_nullable());

char collection_delimiter =
options.get_collection_delimiter(hive_text_complex_type_delimiter_level);
Expand Down Expand Up @@ -193,8 +193,8 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slice&
auto& offsets = array_column.get_offsets();
IColumn& nested_key_column = array_column.get_keys();
IColumn& nested_val_column = array_column.get_values();
DCHECK(nested_key_column.is_nullable());
DCHECK(nested_val_column.is_nullable());
DORIS_CHECK(nested_key_column.is_nullable());
DORIS_CHECK(nested_val_column.is_nullable());
if (slice[0] != '{') {
std::stringstream ss;
ss << slice[0] << '\'';
Expand Down
14 changes: 9 additions & 5 deletions be/src/core/data_type_serde/data_type_map_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stdint.h>

#include <ostream>
#include <utility>

#include "common/status.h"
#include "core/data_type_serde/data_type_serde.h"
Expand All @@ -33,9 +34,14 @@ class Arena;

class DataTypeMapSerDe : public DataTypeSerDe {
public:
DataTypeMapSerDe(const DataTypeSerDeSPtr& _key_serde, const DataTypeSerDeSPtr& _value_serde,
DataTypeMapSerDe(DataTypeSerDeSPtr _key_serde, DataTypeSerDeSPtr _value_serde,
int nesting_level = 1)
: DataTypeSerDe(nesting_level), key_serde(_key_serde), value_serde(_value_serde) {}
: DataTypeSerDe(nesting_level),
key_serde(std::move(_key_serde)),
value_serde(std::move(_value_serde)) {
DORIS_CHECK(key_serde != nullptr);
DORIS_CHECK(value_serde != nullptr);
}

std::string get_name() const override {
return "Map(" + key_serde->get_name() + ", " + value_serde->get_name() + ")";
Expand Down Expand Up @@ -108,9 +114,7 @@ class DataTypeMapSerDe : public DataTypeSerDe {
Status serialize_column_to_jsonb(const IColumn& from_column, int64_t row_num,
JsonbWriter& writer) const override;

virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
return {key_serde, value_serde};
}
DataTypeSerDeSPtrs get_nested_serdes() const override { return {key_serde, value_serde}; }

void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
Expand Down
2 changes: 1 addition & 1 deletion be/src/exprs/function/cast/cast_to_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ WrapperType create_array_wrapper(FunctionContext* context, const DataTypePtr& fr
"CAST AS Array can only be performed between same-dimensional array types");
}

const DataTypePtr& to_nested_type = to_type.get_nested_type();
DataTypePtr to_nested_type = to_type.get_nested_type();

/// Prepare nested type conversion
const auto nested_function =
Expand Down
Loading