Skip to content

Commit

Permalink
feat: Implement custom function module in milvus expr
Browse files Browse the repository at this point in the history
OSPP 2024 project: https://summer-ospp.ac.cn/org/prodetail/247410235?list=org&navpage=org

Solutions:

- parser (planparserv2)
    - add CallExpr in planparserv2/Plan.g4
    - update parser_visitor and show_visitor
- grpc protobuf
    - add CallExpr in plan.proto
- execution (`core/src/exec`)
    - add `CallExpr`, `ValueExpr` and `ColumnExpr` (both logical and
      physical) for function call and function parameters
- function factory (`core/src/exec/expression/function`)
    - create a global hashmap when starting milvus (see server.go)
    - the global hashmap stores function signatures and their function
      pointers, the CallExpr in execution engine can get the function pointer by
      function signature.
- `common/Vector.h`
    - add ConstantVector, update ColumnVector
- `segcore/SegmentChunkReader.h`
    - extracted some methods from `CompareExpr.h`
- custom functions
    - empty(string)
    - starts_with(string, string)
- add cpp/go unittests and E2E tests

closes: #36559

Signed-off-by: Yinzuo Jiang <[email protected]>
  • Loading branch information
jiangyinzuo committed Oct 24, 2024
1 parent 0dbf948 commit 8382c28
Show file tree
Hide file tree
Showing 50 changed files with 2,988 additions and 776 deletions.
13 changes: 4 additions & 9 deletions docs/design_docs/segcore/visitor.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
# Visitor Pattern
Visitor Pattern is used in segcore for parse and execute Execution Plan.

1. Inside `${core}/src/query/PlanNode.h`, contains physical plan for vector search:
1. Inside `${internal/core}/src/query/PlanNode.h`, contains physical plan for vector search:
1. `FloatVectorANNS` FloatVector search execution node
2. `BinaryVectorANNS` BinaryVector search execution node
2. `${core}/src/query/Expr.h` contains physical plan for scalar expression:
2. `${internal/core}/src/query/Expr.h` contains physical plan for scalar expression:
1. `TermExpr` support operation like `col in [1, 2, 3]`
2. `RangeExpr` support constant compare with data column like `a >= 5` `1 < b < 2`
3. `CompareExpr` support compare with different columns, like `a < b`
4. `LogicalBinaryExpr` support and/or
5. `LogicalUnaryExpr` support not

Currently, under `${core/query/visitors}` directory, there are the following visitors:
1. `ShowPlanNodeVisitor` prints PlanNode in json
2. `ShowExprVisitor` Expr -> json
3. `Verify...Visitor` validates ...
4. `ExtractInfo...Visitor` extracts info from..., including involved_fields and else
5. `ExecExprVisitor` generates bitmask according to expression
6. `ExecPlanNodeVistor` physical plan executor only supports ANNS node for now
Currently, under `${internal/core/src/query}` directory, there are the following visitors:
1. `ExecPlanNodeVistor` physical plan executor only supports ANNS node for now
6 changes: 6 additions & 0 deletions internal/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,12 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/segcore/
FILES_MATCHING PATTERN "*_c.h"
)

# Install exec/expression/function
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/exec/expression/function/
DESTINATION include/exec/expression/function
FILES_MATCHING PATTERN "*_c.h"
)

# Install indexbuilder
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/indexbuilder/
DESTINATION include/indexbuilder
Expand Down
3 changes: 2 additions & 1 deletion internal/core/src/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ using float16 = knowhere::fp16;
using bfloat16 = knowhere::bf16;
using bin1 = knowhere::bin1;

// See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto
enum class DataType {
NONE = 0,
BOOL = 1,
Expand Down Expand Up @@ -682,4 +683,4 @@ struct fmt::formatter<milvus::OpType> : formatter<string_view> {
}
return formatter<string_view>::format(name, ctx);
}
};
};
117 changes: 92 additions & 25 deletions internal/core/src/common/Vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,20 @@
#pragma once

#include <memory>
#include <string>

#include "EasyAssert.h"
#include "Types.h"
#include "bitset/bitset.h"
#include "common/FieldData.h"
#include "common/FieldDataInterface.h"
#include "common/Types.h"

namespace milvus {

/**
* @brief base class for different type vector
* @todo implement full null value support
*/

class BaseVector {
public:
BaseVector(DataType data_type,
Expand Down Expand Up @@ -58,18 +59,39 @@ class BaseVector {

using VectorPtr = std::shared_ptr<BaseVector>;

/**
* SimpleVector abstracts over various Columnar Storage Formats,
* it is used in custom functions.
*/
class SimpleVector : public BaseVector {
public:
SimpleVector(DataType data_type,
size_t length,
std::optional<size_t> null_count = std::nullopt)
: BaseVector(data_type, length, null_count) {
}

virtual void*
RawValueAt(size_t index, size_t size_of_element) = 0;

virtual bool
ValidAt(size_t index) = 0;
};

/**
* @brief Single vector for scalar types
* @todo using memory pool && buffer replace FieldData
*/
class ColumnVector final : public BaseVector {
class ColumnVector final : public SimpleVector {
public:
ColumnVector(DataType data_type,
size_t length,
std::optional<size_t> null_count = std::nullopt)
: BaseVector(data_type, length, null_count) {
: SimpleVector(data_type, length, null_count),
is_bitmap_(false),
valid_values_(length,
!null_count.has_value() || null_count.value() == 0) {
values_ = InitScalarFieldData(data_type, false, length);
valid_values_ = InitScalarFieldData(data_type, false, length);
}

// ColumnVector(FixedVector<bool>&& data)
Expand All @@ -78,20 +100,14 @@ class ColumnVector final : public BaseVector {
// std::make_shared<FieldData<bool>>(DataType::BOOL, std::move(data));
// }

// // the size is the number of bits
// ColumnVector(TargetBitmap&& bitmap)
// : BaseVector(DataType::INT8, bitmap.size()) {
// values_ = std::make_shared<FieldDataImpl<uint8_t, false>>(
// bitmap.size(), DataType::INT8, false, std::move(bitmap).into());
// }

// the size is the number of bits
// TODO: separate the usage of bitmap from scalar field data
ColumnVector(TargetBitmap&& bitmap, TargetBitmap&& valid_bitmap)
: BaseVector(DataType::INT8, bitmap.size()) {
: SimpleVector(DataType::INT8, bitmap.size()),
is_bitmap_(true),
valid_values_(std::move(valid_bitmap)) {
values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(DataType::INT8,
std::move(bitmap));
valid_values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(
DataType::INT8, std::move(valid_bitmap));
}

virtual ~ColumnVector() override {
Expand All @@ -100,28 +116,81 @@ class ColumnVector final : public BaseVector {
}

void*
GetRawData() {
RawValueAt(size_t index, size_t size_of_element) override {
return reinterpret_cast<char*>(GetRawData()) + index * size_of_element;
}

bool
ValidAt(size_t index) override {
return valid_values_[index];
}

void*
GetRawData() const {
return values_->Data();
}

void*
GetValidRawData() {
return valid_values_->Data();
return valid_values_.data();
}

template <typename As>
const As*
As*
RawAsValues() const {
return reinterpret_cast<const As*>(values_->Data());
return reinterpret_cast<As*>(values_->Data());
}

bool
IsBitmap() const {
return is_bitmap_;
}

private:
bool is_bitmap_; // TODO: remove the field after implementing BitmapVector
FieldDataPtr values_;
FieldDataPtr valid_values_;
TargetBitmap valid_values_; // false means the value is null
};

using ColumnVectorPtr = std::shared_ptr<ColumnVector>;

template <typename T>
class ConstantVector : public SimpleVector {
public:
ConstantVector(DataType data_type,
size_t length,
const T& val,
std::optional<size_t> null_count = std::nullopt)
: SimpleVector(data_type, length),
val_(val),
is_null_(null_count.has_value() && null_count.value() > 0) {
}

void*
RawValueAt(size_t _index, size_t _size_of_element) override {
return &val_;
}

bool
ValidAt(size_t _index) override {
return !is_null_;
}

const T&
GetValue() const {
return val_;
}

bool
IsNull() const {
return is_null_;
}

private:
T val_;
bool is_null_;
};

/**
* @brief Multi vectors for scalar types
* mainly using it to pass internal result in segcore scalar engine system
Expand Down Expand Up @@ -149,8 +218,7 @@ class RowVector : public BaseVector {
}

RowVector(std::vector<VectorPtr>&& children)
: BaseVector(DataType::ROW, 0) {
children_values_ = std::move(children);
: BaseVector(DataType::ROW, 0), children_values_(std::move(children)) {
for (auto& child : children_values_) {
if (child->size() > length_) {
length_ = child->size();
Expand All @@ -159,12 +227,12 @@ class RowVector : public BaseVector {
}

const std::vector<VectorPtr>&
childrens() {
childrens() const {
return children_values_;
}

VectorPtr
child(int index) {
child(int index) const {
assert(index < children_values_.size());
return children_values_[index];
}
Expand All @@ -174,5 +242,4 @@ class RowVector : public BaseVector {
};

using RowVectorPtr = std::shared_ptr<RowVector>;

} // namespace milvus
2 changes: 1 addition & 1 deletion internal/core/src/common/init_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ SetTrace(CTraceConfig* config) {
config->oltpSecure,
config->nodeID};
milvus::tracer::initTelemetry(traceConfig);
}
}
2 changes: 1 addition & 1 deletion internal/core/src/exec/Task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,4 @@ Task::Next(ContinueFuture* future) {
}

} // namespace exec
} // namespace milvus
} // namespace milvus
46 changes: 46 additions & 0 deletions internal/core/src/exec/expression/CallExpr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "common/FieldDataInterface.h"
#include "common/Vector.h"
#include "exec/expression/CallExpr.h"
#include "exec/expression/EvalCtx.h"
#include "exec/expression/function/FunctionFactory.h"

#include <utility>
#include <vector>

namespace milvus {
namespace exec {

void
PhyCallExpr::Eval(EvalCtx& context, VectorPtr& result) {
AssertInfo(inputs_.size() == expr_->inputs().size(),
"logical call expr needs {} inputs, but {} inputs are provided",
expr_->inputs().size(),
inputs_.size());
std::vector<VectorPtr> args;
for (auto& input : this->inputs_) {
VectorPtr arg_result;
input->Eval(context, arg_result);
args.push_back(std::move(arg_result));
}
RowVector row_vector(std::move(args));
this->expr_->function_ptr()(row_vector, result);
}

} // namespace exec
} // namespace milvus
Loading

0 comments on commit 8382c28

Please sign in to comment.