From 17ab3f63e89c2dd21f2bea5a3d0a943aaafdb6a2 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Mon, 3 Jun 2019 10:57:34 +0800 Subject: [PATCH 1/2] Change strategy of incorrect data This change adds a config named enable_load_strict which is used to prohibit the incorrect data. When the config is set to false, the incorrect data will be loaded by NULL just like before. When the config is set to true, the inncorrect data will be filtered. --- be/src/common/config.h | 5 +++++ be/src/exec/broker_scanner.cpp | 28 +++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 4529ad2e8508f9..15a990769a5ab3 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -419,6 +419,11 @@ namespace config { // same cache size configuration. // TODO(cmy): use different config to set different client cache if necessary. CONF_Int32(max_client_cache_size_per_host, "10"); + + // This config is used to strict the incorrect data when loading. + // If it is set to true, the incorrect data from file will be filtered + // If it is set to false, the incorrect data will be loaded and is set to NULL. + CONF_Bool(enable_load_strict, "true"); } // namespace config } // namespace doris diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index e56c7033beb146..caf194dd125dfa 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -586,8 +586,8 @@ bool BrokerScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPoo } ExprContext* ctx = _dest_expr_ctx[ctx_idx++]; - void* value = ctx->get_value(_src_tuple_row); - if (value == nullptr) { + // if src slot is null + if (_src_tuple_row->get_tuple(0)->is_null(slot_desc->null_indicator_offset())) { if (slot_desc->is_nullable()) { dest_tuple->set_null(slot_desc->null_indicator_offset()); continue; @@ -600,9 +600,27 @@ bool BrokerScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPoo return false; } } - dest_tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); - RawValue::write(value, slot, slot_desc->type(), mem_pool); + // if src slot is not null + else { + void* value = ctx->get_value(_src_tuple_row); + // current slot is a incorrect data + if ((value == nullptr) && (config::enable_load_strict)) { + std::stringstream error_msg; + error_msg << "column(" << slot_desc->col_name() << ") value is incorrect"; + _state->append_error_msg_to_file( + std::string(line.data, line.size), error_msg.str()); + _counter->num_rows_filtered++; + return false; + } + else if (value == nullptr) { + dest_tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } + dest_tuple->set_not_null(slot_desc->null_indicator_offset()); + void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); + RawValue::write(value, slot, slot_desc->type(), mem_pool); + } + } return true; } From 568da9b43aa1ea412730ce4eb461e659149e72a3 Mon Sep 17 00:00:00 2001 From: emmymiao87 <522274284@qq.com> Date: Tue, 4 Jun 2019 13:03:15 +0800 Subject: [PATCH 2/2] Add introduction of enable_load_strict --- .../administrator-guide/config/be_config.md | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 docs/documentation/cn/administrator-guide/config/be_config.md diff --git a/docs/documentation/cn/administrator-guide/config/be_config.md b/docs/documentation/cn/administrator-guide/config/be_config.md new file mode 100644 index 00000000000000..7457f867b08087 --- /dev/null +++ b/docs/documentation/cn/administrator-guide/config/be_config.md @@ -0,0 +1,20 @@ +# be ������˵�� +## enable load strict ����˵�� + +be ����������һ������ enable_load_strict �����������е��뷽ʽ������ת����������������ݣ��Ƿ��ϸ� filter�� + +enable load strict ����ֻ�Ե����е�����ת����Ч����������ת����˵����� enable_load_strict Ϊtrue�����������ݽ���filter�� + +���ڵ����ij�а��������任�ģ������ֵ�ͺ����Ľ��һ�£�strict ���䲻����Ӱ�졣������ strftime �� broker ϵͳ֧�ֵĺ���Ҳ�������ࣩ�� + +### strict ������ת����ϵ + +������������Ϊ int ������ +ע�������е������������ֵʱ + +source data | source data example | string to int | enable_load_strict | load_data +------------|---------------------|-----------------|--------------------|--------- +��ֵ | \N | N/A | true or false | NULL +not null | aaa | NULL | true | filtered +not null | aaa | NULL | false | NULL +not null | 1 | 1 | true or false | correct data