Skip to content

Commit

Permalink
Merge branch 'master' into test_alter-table-partition
Browse files Browse the repository at this point in the history
  • Loading branch information
cjj2010 authored Oct 11, 2024
2 parents d245552 + 82eaba8 commit c82e516
Show file tree
Hide file tree
Showing 1,029 changed files with 41,674 additions and 5,323 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
popd
export PATH="${DEFAULT_DIR}/ldb-toolchain/bin/:$(pwd)/thirdparty/installed/bin/:${PATH}"
DISABLE_BE_JAVA_EXTENSIONS=ON DO_NOT_CHECK_JAVA_ENV=ON DORIS_TOOLCHAIN=clang ENABLE_PCH=OFF OUTPUT_BE_BINARY=0 ./build.sh --be --cloud
DISABLE_BE_JAVA_EXTENSIONS=ON DO_NOT_CHECK_JAVA_ENV=ON DORIS_TOOLCHAIN=clang ENABLE_PCH=OFF OUTPUT_BE_BINARY=0 ./build.sh --be
fi
echo "should_check=${{ steps.filter.outputs.cpp_changes }}" >>${GITHUB_OUTPUT}
Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/scope-label.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
---
name: Add Scope Labeler

on:
pull_request_target:
types:
- opened
- synchronize
# This action has some error, skip it temporarily
#on:
# pull_request_target:
# types:
# - opened
# - synchronize

jobs:
process:
Expand Down
25 changes: 20 additions & 5 deletions be/src/agent/be_exec_version_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ Status BeExecVersionManager::check_be_exec_version(int be_exec_version) {

int BeExecVersionManager::get_function_compatibility(int be_exec_version,
std::string function_name) {
if (_function_restrict_map.contains(function_name) && be_exec_version != get_newest_version()) {
throw Exception(Status::InternalError(
"function {} do not support old be exec version, maybe it's because doris are "
"doing a rolling upgrade. newest_version={}, input_be_exec_version={}",
function_name, get_newest_version(), be_exec_version));
}

auto it = _function_change_map.find(function_name);
if (it == _function_change_map.end()) {
// 0 means no compatibility issues need to be dealt with
Expand Down Expand Up @@ -82,7 +89,7 @@ void BeExecVersionManager::check_function_compatibility(int current_be_exec_vers
* 3: start from doris 2.0.0 (by some mistakes)
* a. aggregation function do not serialize bitmap to string.
* b. support window funnel mode.
* 4/5: start from doris 2.1.0
* 4: start from doris 2.1.0
* a. ignore this line, window funnel mode should be enabled from 2.0.
* b. array contains/position/countequal function return nullable in less situations.
* c. cleared old version of Version 2.
Expand All @@ -92,14 +99,22 @@ void BeExecVersionManager::check_function_compatibility(int current_be_exec_vers
* g. do local merge of remote runtime filter
* h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS
*
* 7: start from doris 3.0.0
* 5: start from doris 3.0.0
* a. change some agg function nullable property: PR #37215
*
* 6: start from doris 3.0.1 and 2.1.6
* a. change the impl of percentile (need fix)
* b. clear old version of version 3->4
* c. change FunctionIsIPAddressInRange from AlwaysNotNullable to DependOnArguments
* d. change some agg function nullable property: PR #37215
* e. change variant serde to fix PR #38413
* d. change variant serde to fix PR #38413
*
* 7: start from doris 3.0.2
* a. window funnel logic change
* b. support const column in serialize/deserialize function: PR #41175
*/
const int BeExecVersionManager::max_be_exec_version = 7;

const int BeExecVersionManager::max_be_exec_version = 8;
const int BeExecVersionManager::min_be_exec_version = 0;
std::map<std::string, std::set<int>> BeExecVersionManager::_function_change_map {};
std::set<std::string> BeExecVersionManager::_function_restrict_map;
} // namespace doris
9 changes: 8 additions & 1 deletion be/src/agent/be_exec_version_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@

namespace doris {

constexpr static int AGG_FUNCTION_NEW_WINDOW_FUNNEL = 6;
constexpr inline int BITMAP_SERDE = 3;
constexpr inline int USE_NEW_SERDE = 4; // release on DORIS version 2.1
constexpr inline int OLD_WAL_SERDE = 3; // use to solve compatibility issues, see pr #32299
constexpr inline int AGG_FUNCTION_NULLABLE = 5; // change some agg nullable property: PR #37215
constexpr inline int VARIANT_SERDE = 6; // change variant serde to fix PR #38413
constexpr inline int AGGREGATION_2_1_VERSION =
6; // some aggregation changed the data format after this version
constexpr inline int USE_CONST_SERDE =
8; // support const column in serialize/deserialize function: PR #41175

class BeExecVersionManager {
public:
Expand All @@ -57,11 +58,17 @@ class BeExecVersionManager {
_function_change_map[function_name].insert(breaking_old_version);
}

static void registe_restrict_function_compatibility(std::string function_name) {
_function_restrict_map.insert(function_name);
}

private:
static const int max_be_exec_version;
static const int min_be_exec_version;
// [function name] -> [breaking change start version]
static std::map<std::string, std::set<int>> _function_change_map;
// those function must has input newest be exec version
static std::set<std::string> _function_restrict_map;
};

} // namespace doris
5 changes: 3 additions & 2 deletions be/src/cloud/cloud_rowset_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "cloud/cloud_rowset_writer.h"

#include "common/status.h"
#include "io/cache/block_file_cache_factory.h"
#include "io/fs/file_system.h"
#include "olap/rowset/rowset_factory.h"
Expand All @@ -34,7 +35,7 @@ Status CloudRowsetWriter::init(const RowsetWriterContext& rowset_writer_context)
if (_context.is_local_rowset()) {
// In cloud mode, this branch implies it is an intermediate rowset for external merge sort,
// we use `global_local_filesystem` to write data to `tmp_file_dir`(see `local_segment_path`).
_context.tablet_path = io::FileCacheFactory::instance()->get_cache_path();
_context.tablet_path = io::FileCacheFactory::instance()->pick_one_cache_path();
} else {
_rowset_meta->set_remote_storage_resource(*_context.storage_resource);
}
Expand Down Expand Up @@ -93,7 +94,7 @@ Status CloudRowsetWriter::build(RowsetSharedPtr& rowset) {
// transfer 0 (PREPARED -> COMMITTED): finish writing a rowset and the rowset' meta will not be changed
// transfer 1 (PREPARED -> BEGIN_PARTIAL_UPDATE): finish writing a rowset, but may append new segments later and the rowset's meta may be changed
// transfer 2 (BEGIN_PARTIAL_UPDATE -> VISIBLE): finish adding new segments and the rowset' meta will not be changed, the rowset is visible to users
if (_context.partial_update_info && _context.partial_update_info->is_partial_update) {
if (_context.partial_update_info && _context.partial_update_info->is_partial_update()) {
_rowset_meta->set_rowset_state(BEGIN_PARTIAL_UPDATE);
} else {
_rowset_meta->set_rowset_state(COMMITTED);
Expand Down
2 changes: 2 additions & 0 deletions be/src/cloud/cloud_stream_load_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "common/logging.h"
#include "common/status.h"
#include "runtime/stream_load/stream_load_context.h"
#include "util/debug_points.h"

namespace doris {

Expand Down Expand Up @@ -96,6 +97,7 @@ Status CloudStreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) {
}

Status CloudStreamLoadExecutor::commit_txn(StreamLoadContext* ctx) {
DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.block", DBUG_BLOCK);
// forward to fe to excute commit transaction for MoW table
if (ctx->is_mow_table() || !config::enable_stream_load_commit_txn_on_be ||
ctx->load_type == TLoadType::ROUTINE_LOAD) {
Expand Down
2 changes: 1 addition & 1 deletion be/src/cloud/cloud_tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t tx
RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info(
txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, PublishStatus::PREPARE));

if (txn_info->partial_update_info && txn_info->partial_update_info->is_partial_update &&
if (txn_info->partial_update_info && txn_info->partial_update_info->is_partial_update() &&
rowset_writer->num_rows() > 0) {
const auto& rowset_meta = rowset->rowset_meta();
RETURN_IF_ERROR(_engine.meta_mgr().update_tmp_rowset(*rowset_meta));
Expand Down
11 changes: 11 additions & 0 deletions be/src/cloud/injection_point_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,17 @@ void register_suites() {
arg0->second = true;
});
});
// curl be_ip:http_port/api/injection_point/apply_suite?name=Segment::parse_footer:magic_number_corruption'
suite_map.emplace("Segment::parse_footer:magic_number_corruption", [] {
auto* sp = SyncPoint::get_instance();
sp->set_call_back("Segment::parse_footer:magic_number_corruption_inj", [](auto&& args) {
if (auto p = std::any_cast<uint8_t*>(args[0])) {
memset(p, 0, 12);
} else {
std::cerr << "Failed to cast std::any to uint8_t*" << std::endl;
}
});
});
}

void set_sleep(const std::string& point, HttpRequest* req) {
Expand Down
4 changes: 4 additions & 0 deletions be/src/cloud/pb_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, const TabletSchemaPB
out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids());
out->set_inverted_index_storage_format(in.inverted_index_storage_format());
out->set_enable_variant_flatten_nested(in.variant_enable_flatten_nested());
out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx());
}

void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, TabletSchemaPB&& in) {
Expand Down Expand Up @@ -313,6 +314,7 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, TabletSchemaPB&& in)
out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids());
out->set_inverted_index_storage_format(in.inverted_index_storage_format());
out->set_enable_variant_flatten_nested(in.variant_enable_flatten_nested());
out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx());
}

TabletSchemaPB cloud_tablet_schema_to_doris(const TabletSchemaCloudPB& in) {
Expand Down Expand Up @@ -353,6 +355,7 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, const TabletSchemaCloudPB
out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids());
out->set_inverted_index_storage_format(in.inverted_index_storage_format());
out->set_variant_enable_flatten_nested(in.enable_variant_flatten_nested());
out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx());
}

void cloud_tablet_schema_to_doris(TabletSchemaPB* out, TabletSchemaCloudPB&& in) {
Expand Down Expand Up @@ -381,6 +384,7 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, TabletSchemaCloudPB&& in)
out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids());
out->set_inverted_index_storage_format(in.inverted_index_storage_format());
out->set_variant_enable_flatten_nested(in.enable_variant_flatten_nested());
out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx());
}

TabletMetaCloudPB doris_tablet_meta_to_cloud(const TabletMetaPB& in) {
Expand Down
73 changes: 73 additions & 0 deletions be/src/common/cast_set.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <limits>
#include <type_traits>

#include "common/exception.h"
#include "common/status.h"

namespace doris {

template <typename T, typename U>
void check_cast_value(U b) {
if constexpr (std::is_unsigned_v<U>) {
if (b > std::numeric_limits<T>::max()) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"value {} cast to type {} out of range [{},{}]", b,
typeid(T).name(), std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
} else if constexpr (std::is_unsigned_v<T>) {
if (b < 0 || b > std::numeric_limits<T>::max()) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"value {} cast to type {} out of range [{},{}]", b,
typeid(T).name(), std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
} else {
if (b < std::numeric_limits<T>::min() || b > std::numeric_limits<T>::max()) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"value {} cast to type {} out of range [{},{}]", b,
typeid(T).name(), std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
}
}
}

template <typename T, typename U, bool need_check_value = true>
requires std::is_integral_v<T> && std::is_integral_v<U>
void cast_set(T& a, U b) {
if constexpr (need_check_value) {
check_cast_value<T>(b);
}
a = static_cast<T>(b);
}

template <typename T, typename U, bool need_check_value = true>
requires std::is_integral_v<T> && std::is_integral_v<U>
T cast_set(U b) {
if constexpr (need_check_value) {
check_cast_value<T>(b);
}
return static_cast<T>(b);
}

} // namespace doris
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,8 @@

#pragma once

#include <vector>

#include "exec/schema_scanner.h"

namespace doris {
class SchemaStatisticsScanner : public SchemaScanner {
ENABLE_FACTORY_CREATOR(SchemaStatisticsScanner);

public:
SchemaStatisticsScanner();
~SchemaStatisticsScanner() override;

private:
static std::vector<SchemaScanner::ColumnDesc> _s_cols_statistics;
};
} // namespace doris
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic error "-Wshorten-64-to-32"
#endif
//#include "common/compile_check_begin.h"
23 changes: 23 additions & 0 deletions be/src/common/compile_check_end.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#ifdef __clang__
#pragma clang diagnostic pop
#endif
// #include "common/compile_check_end.h"
14 changes: 12 additions & 2 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,7 @@ DEFINE_Bool(enable_file_cache, "false");
// or use the default storage value:
// {"path": "memory", "total_size":53687091200}
// Both will use the directory "memory" on the disk instead of the real RAM.
DEFINE_String(file_cache_path, "");
DEFINE_String(file_cache_path, "[{\"path\":\"${DORIS_HOME}/file_cache\"}]");
DEFINE_Int64(file_cache_each_block_size, "1048576"); // 1MB

DEFINE_Bool(clear_file_cache, "false");
Expand All @@ -1010,6 +1010,9 @@ DEFINE_mInt64(file_cache_ttl_valid_check_interval_second, "0"); // zero for not
DEFINE_Bool(enable_ttl_cache_evict_using_lru, "true");
// rename ttl filename to new format during read, with some performance cost
DEFINE_mBool(translate_to_new_ttl_format_during_read, "false");
DEFINE_mBool(enbale_dump_error_file, "true");
// limit the max size of error log on disk
DEFINE_mInt64(file_cache_error_log_limit_bytes, "209715200"); // 200MB

DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
Expand All @@ -1033,7 +1036,7 @@ DEFINE_Int32(inverted_index_read_buffer_size, "4096");
// tree depth for bkd index
DEFINE_Int32(max_depth_in_bkd_tree, "32");
// index compaction
DEFINE_mBool(inverted_index_compaction_enable, "true");
DEFINE_mBool(inverted_index_compaction_enable, "false");
// Only for debug, do not use in production
DEFINE_mBool(debug_inverted_index_compaction, "false");
// index by RAM directory
Expand Down Expand Up @@ -1680,6 +1683,13 @@ bool init(const char* conf_file, bool fill_conf_map, bool must_exist, bool set_t
SET_FIELD(it.second, std::vector<std::string>, fill_conf_map, set_to_default);
}

if (config::is_cloud_mode()) {
auto st = config::set_config("enable_file_cache", "true", true, true);
LOG(INFO) << "set config enable_file_cache "
<< "true"
<< " " << st;
}

return true;
}

Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,9 @@ DECLARE_mInt64(file_cache_ttl_valid_check_interval_second);
DECLARE_Bool(enable_ttl_cache_evict_using_lru);
// rename ttl filename to new format during read, with some performance cost
DECLARE_Bool(translate_to_new_ttl_format_during_read);
DECLARE_mBool(enbale_dump_error_file);
// limit the max size of error log on disk
DECLARE_mInt64(file_cache_error_log_limit_bytes);

// inverted index searcher cache
// cache entry stay time after lookup
Expand Down
1 change: 1 addition & 0 deletions be/src/common/daemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ void Daemon::memory_maintenance_thread() {
// TODO replace memory_gc_thread.

// step 6. Refresh weighted memory ratio of workload groups.
doris::ExecEnv::GetInstance()->workload_group_mgr()->do_sweep();
doris::ExecEnv::GetInstance()->workload_group_mgr()->refresh_wg_weighted_memory_limit();

// step 7. Analyze blocking queries.
Expand Down
Loading

0 comments on commit c82e516

Please sign in to comment.