Skip to content

Commit

Permalink
[CodeStyle][CINN] format cpp code via clang-format (#54961)
Browse files Browse the repository at this point in the history
* fix clang-format

* 'fix_clang-format'

* fix remaining errors

* format

* empty commit, re-trigger all ci

* empty commit, re-trigger all ci

---------

Co-authored-by: SigureMo <[email protected]>
  • Loading branch information
Ainavo and SigureMo authored Jun 29, 2023
1 parent a7419ff commit af12734
Show file tree
Hide file tree
Showing 683 changed files with 39,431 additions and 23,013 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ repos:
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$
exclude: |
(?x)^(
paddle/utils/.*
paddle/utils/.*|
paddle/cinn/utils/registry.h
)$
# For Python files
- repo: https://github.com/psf/black.git
Expand Down
53 changes: 35 additions & 18 deletions paddle/cinn/auto_schedule/analysis/analyze_ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ std::vector<ir::Var> IndicesToVars(const std::vector<ir::Expr>& indices) {
for (const ir::Expr& e : indices) {
// Whether we have to convert other types, like const numbers to Var?
if (e.As<ir::_Var_>() != nullptr) {
ir::Expr copy_e = optim::IRCopy(e);
ir::Expr copy_e = optim::IRCopy(e);
ir::_Var_* var_ref = copy_e.As<ir::_Var_>();
result.emplace_back(ir::Var(var_ref));
}
Expand All @@ -58,26 +58,32 @@ void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block) {
const ir::Load* load_expr = x->As<ir::Load>();
if (load_expr != nullptr) {
const ir::Tensor t = load_expr->tensor.as_tensor_ref();
sche_block->read_buffers.emplace_back(ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
sche_block->read_buffers.emplace_back(
ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
return false;
}
const ir::Store* store_expr = x->As<ir::Store>();
if (store_expr != nullptr) {
const ir::Tensor t = store_expr->tensor.as_tensor_ref();
sche_block->write_buffers.emplace_back(ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
sche_block->write_buffers.emplace_back(
ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
return false;
}
return false;
});
}

bool ContainsNodeType(ir::Expr expr, const std::unordered_set<ir::IrNodeTy>& node_types) {
std::set<ir::Expr> collection = ir::CollectIRNodesWithoutTensor(
expr, [&](const Expr* x) { return node_types.find(x->node_type()) != node_types.end(); });
bool ContainsNodeType(ir::Expr expr,
const std::unordered_set<ir::IrNodeTy>& node_types) {
std::set<ir::Expr> collection =
ir::CollectIRNodesWithoutTensor(expr, [&](const Expr* x) {
return node_types.find(x->node_type()) != node_types.end();
});
return !collection.empty();
}

std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<ir::LoweredFunc>& lowered_funcs) {
std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(
const std::vector<ir::LoweredFunc>& lowered_funcs) {
std::unordered_set<std::string> result;
for (const ir::LoweredFunc& func : lowered_funcs) {
for (const ir::Argument& arg : func->args) {
Expand All @@ -90,18 +96,22 @@ std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<
}

bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
const ir::ScheduleBlock* sche_block = sche_block_realize.schedule_block.As<ir::ScheduleBlock>();
if (sche_block->write_buffers.size() != 1 || sche_block->read_buffers.empty()) {
const ir::ScheduleBlock* sche_block =
sche_block_realize.schedule_block.As<ir::ScheduleBlock>();
if (sche_block->write_buffers.size() != 1 ||
sche_block->read_buffers.empty()) {
return false;
}
const ir::Expr& write_buffer = sche_block->write_buffers[0].As<ir::_BufferRange_>()->buffer;
const ir::Expr& write_buffer =
sche_block->write_buffers[0].As<ir::_BufferRange_>()->buffer;

// Enumerate each read region, get the number of schedule block iter vars
// which are not used to index the read region
int total_unused_iter_vars = 0;

for (const ir::Expr& read_buffer_expr : sche_block->read_buffers) {
const ir::_BufferRange_* read_buffer = read_buffer_expr.As<ir::_BufferRange_>();
const ir::_BufferRange_* read_buffer =
read_buffer_expr.As<ir::_BufferRange_>();
// Skip the reduction buffer
if (read_buffer->buffer == write_buffer) {
continue;
Expand Down Expand Up @@ -133,18 +143,22 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
return total_unused_iter_vars >= 1;
}

ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body) {
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
const ir::LoweredFunc& old_func,
ir::Expr& body) {
ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body}));
ir::IRSchedule ir_sch(mod_expr);

// temp_bufs may be deleted during auto tuning (such as auto inline),
// we have to check from old temp bufs and set them as local buffer.
for (const ir::Buffer& buf : old_func->temp_bufs) {
const std::string& buf_name = buf->name;
const std::string& buf_name = buf->name;
std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
for (ir::Expr& e : all_block_realizes) {
const ir::ScheduleBlockRealize* sche_block_realize = e.As<ir::ScheduleBlockRealize>();
const std::string& sche_name = sche_block_realize->schedule_block.As<ir::ScheduleBlock>()->name;
const ir::ScheduleBlockRealize* sche_block_realize =
e.As<ir::ScheduleBlockRealize>();
const std::string& sche_name =
sche_block_realize->schedule_block.As<ir::ScheduleBlock>()->name;
if (buf_name == "_" + sche_name) {
VLOG(6) << "Set local buffer for temp buffer " << buf_name;
ir_sch.SetBuffer(e, "local", true);
Expand All @@ -159,14 +173,17 @@ ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::Lo
#endif

// Get new temp bufs by analyzing.
std::vector<ir::Buffer> new_temp_bufs = lang::GetTempBuffers(old_func->args, updated_body);
ir::LoweredFunc new_func = ir::_LoweredFunc_::Make(old_func->name, old_func->args, updated_body, new_temp_bufs);
std::vector<ir::Buffer> new_temp_bufs =
lang::GetTempBuffers(old_func->args, updated_body);
ir::LoweredFunc new_func = ir::_LoweredFunc_::Make(
old_func->name, old_func->args, updated_body, new_temp_bufs);
#ifdef CINN_WITH_CUDA
if (target == common::DefaultNVGPUTarget()) {
new_func->PrepareCudaAxisInfoFromBody();
}
#endif
new_func = optim::Optimize(Expr(new_func), target, false).as_lowered_func_ref();
new_func =
optim::Optimize(Expr(new_func), target, false).as_lowered_func_ref();
new_func->PrepareBufferCastExprs(/*with_expr_gen_tensor = */ false);

return new_func;
Expand Down
10 changes: 7 additions & 3 deletions paddle/cinn/auto_schedule/analysis/analyze_ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@ namespace auto_schedule {

void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block);

bool ContainsNodeType(ir::Expr expr, const std::unordered_set<ir::IrNodeTy>& node_types);
bool ContainsNodeType(ir::Expr expr,
const std::unordered_set<ir::IrNodeTy>& node_types);

/**
* Collects all input lowered_funcs and return names of all output arguments
*/
std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<ir::LoweredFunc>& lowered_funcs);
std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(
const std::vector<ir::LoweredFunc>& lowered_funcs);

/**
* Determine whether a schedule block needs multileveltiling
Expand All @@ -42,7 +44,9 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize);
/**
* Update a LoweredFunc by regenerating related fields with a new function body
*/
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body);
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
const ir::LoweredFunc& old_func,
ir::Expr& body);

} // namespace auto_schedule
} // namespace cinn
36 changes: 23 additions & 13 deletions paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
ir::Tensor B = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i, j); }, "B");

poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec(
"SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);

ASSERT_FALSE(funcs.empty());
ir::Expr ast_expr = funcs[0]->body;
Expand All @@ -65,8 +66,10 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
ASSERT_EQ(all_block_realizes.size(), 1UL);

ir::ScheduleBlockRealize* sche_block_realize = all_block_realizes[0].As<ir::ScheduleBlockRealize>();
ir::ScheduleBlock* sche_block = sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
ir::ScheduleBlockRealize* sche_block_realize =
all_block_realizes[0].As<ir::ScheduleBlockRealize>();
ir::ScheduleBlock* sche_block =
sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
AnalyzeScheduleBlockReadWriteBuffer(sche_block);

/*
Expand Down Expand Up @@ -112,8 +115,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
ir::Tensor C = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i) + B(j); }, "C");

poly::StageMap stages = poly::CreateStages({C});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("AddDiffShape", stages, {C}, {}, {}, nullptr, target, true);
poly::StageMap stages = poly::CreateStages({C});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec(
"AddDiffShape", stages, {C}, {}, {}, nullptr, target, true);

ir::Expr ast_expr = funcs[0]->body;
VLOG(6) << "Expr before MultiLevelTiling: ";
Expand All @@ -126,8 +130,10 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
ASSERT_EQ(all_block_realizes.size(), 1UL);

ir::ScheduleBlockRealize* sche_block_realize = all_block_realizes[0].As<ir::ScheduleBlockRealize>();
ir::ScheduleBlock* sche_block = sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
ir::ScheduleBlockRealize* sche_block_realize =
all_block_realizes[0].As<ir::ScheduleBlockRealize>();
ir::ScheduleBlock* sche_block =
sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
AnalyzeScheduleBlockReadWriteBuffer(sche_block);

VLOG(6) << "ScheduleBlockRealize: ";
Expand Down Expand Up @@ -163,18 +169,22 @@ TEST(AnalyzeIr, ContainsNodeType) {
ir::Tensor B = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i, j); }, "B");

poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec(
"SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);

ASSERT_FALSE(funcs.empty());
ir::Expr ast_expr = funcs[0]->body;

VLOG(6) << "Analyzing for Expr:";
VLOG(6) << ast_expr;

ASSERT_TRUE(ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::Store}));
ASSERT_TRUE(ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::IfThenElse}));
ASSERT_FALSE(ContainsNodeType(ast_expr, {ir::IrNodeTy::IfThenElse, ir::IrNodeTy::Sum}));
ASSERT_TRUE(
ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::Store}));
ASSERT_TRUE(ContainsNodeType(ast_expr,
{ir::IrNodeTy::Load, ir::IrNodeTy::IfThenElse}));
ASSERT_FALSE(ContainsNodeType(ast_expr,
{ir::IrNodeTy::IfThenElse, ir::IrNodeTy::Sum}));
}

} // namespace auto_schedule
Expand Down
57 changes: 38 additions & 19 deletions paddle/cinn/auto_schedule/auto_tuner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,17 @@
namespace cinn {
namespace auto_schedule {

AutoTuner::AutoTuner(const common::Target& target, hlir::framework::Graph* graph) : target_(target), graph_(graph) {}
AutoTuner::AutoTuner(const common::Target& target,
hlir::framework::Graph* graph)
: target_(target), graph_(graph) {}

void AutoTuner::Initialize(const Config& config, hlir::framework::GraphCompiler* graph_compiler) {
void AutoTuner::Initialize(const Config& config,
hlir::framework::GraphCompiler* graph_compiler) {
// create builder, runner, and schedule measurer
builder_ = std::make_unique<SimpleBuilder>(graph_compiler);
runner_ = std::make_unique<SimpleRunner>(config.runner_repeat_times);
schedule_measurer_ = std::make_unique<ScheduleMeasurer>(builder_.get(), runner_.get());
builder_ = std::make_unique<SimpleBuilder>(graph_compiler);
runner_ = std::make_unique<SimpleRunner>(config.runner_repeat_times);
schedule_measurer_ =
std::make_unique<ScheduleMeasurer>(builder_.get(), runner_.get());

// initialize database
database_ = std::move(Database::Make(config.database_config));
Expand All @@ -53,29 +57,43 @@ void AutoTuner::Initialize(const Config& config, hlir::framework::GraphCompiler*
TaskCreator task_creator;
tasks_ = task_creator.CreateTuneTaskOpLevel(graph_);

const auto& dtype_dict = graph_->GetAttrs<absl::flat_hash_map<std::string, common::Type>>("inferdtype");
const auto& shape_dict = graph_->GetAttrs<absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");
const auto& dtype_dict =
graph_->GetAttrs<absl::flat_hash_map<std::string, common::Type>>(
"inferdtype");
const auto& shape_dict = graph_->GetAttrs<
absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");

op_lowerer_ = std::make_unique<hlir::framework::OpLowerer>(dtype_dict, shape_dict, target_);
op_lowerer_ = std::make_unique<hlir::framework::OpLowerer>(
dtype_dict, shape_dict, target_);
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
for (auto i = 0; i < tasks_.size(); ++i) {
auto&& task = tasks_[i];
task.Initialize(shape_dict, dtype_dict, op_lowerer_.get());
// Register the initial ModuleExpr corresponding to the task
task_registry->Regist(task.serialized_key, ir::ModuleExpr(task.GetLoweredFuncBodyExprs()));
VLOG(3) << "Add a task, id:" << i << ", serialized_key:\n" << task.serialized_key;
task_registry->Regist(task.serialized_key,
ir::ModuleExpr(task.GetLoweredFuncBodyExprs()));
VLOG(3) << "Add a task, id:" << i << ", serialized_key:\n"
<< task.serialized_key;
}

// create task optimizers
utils::LinearRandomEngine::StateType initial_seed = utils::LinearRandomEngine::GetDeviceRandomValue();
utils::LinearRandomEngine::StateType initial_seed =
utils::LinearRandomEngine::GetDeviceRandomValue();
task_optimizers_.resize(tasks_.size());
std::transform(tasks_.begin(), tasks_.end(), task_optimizers_.begin(), [&](TuneTask& task) {
return std::make_unique<TaskOptimizer>(
&task, schedule_measurer_.get(), database_.get(), utils::ForkRandomState(&initial_seed));
});
std::transform(tasks_.begin(),
tasks_.end(),
task_optimizers_.begin(),
[&](TuneTask& task) {
return std::make_unique<TaskOptimizer>(
&task,
schedule_measurer_.get(),
database_.get(),
utils::ForkRandomState(&initial_seed));
});

// create task scheduler
task_scheduler_ = TaskScheduler::Make(tasks_, config.task_schedule_config, config.task_schedule_strategy);
task_scheduler_ = TaskScheduler::Make(
tasks_, config.task_schedule_config, config.task_schedule_strategy);
}

void PrintResult(std::shared_ptr<hlir::framework::Graph::Group> group) {
Expand Down Expand Up @@ -127,7 +145,8 @@ void PrintResult(const TuningResult& result) {

TuningResult AutoTuner::Tune(const TuningOptions& options) {
CHECK_GT(options.num_tuning_rounds, 0) << "Invalid config";
VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds << ", tasks size=" << tasks_.size();
VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds
<< ", tasks size=" << tasks_.size();

TuningResult result;
result.subgraphs.resize(tasks_.size());
Expand All @@ -136,7 +155,7 @@ TuningResult AutoTuner::Tune(const TuningOptions& options) {
// as default result of graph tuning, and that should be updated
// once we support graph tuning.
for (auto i = 0; i < tasks_.size(); ++i) {
auto&& task = tasks_.at(i);
auto&& task = tasks_.at(i);
result.subgraphs[i] = task.subgraph;
}

Expand All @@ -146,7 +165,7 @@ TuningResult AutoTuner::Tune(const TuningOptions& options) {
task_scheduler_->Reset();
while ((run_id = task_scheduler_->NextTaskId()) != -1) {
VLOG(3) << "Start tuning Task-" << run_id;
auto* opt = task_optimizers_.at(run_id).get();
auto* opt = task_optimizers_.at(run_id).get();
auto function_group = opt->Optimize(options);
VLOG(3) << "Task-" << run_id << " finished, print optimized functions:\n";
PrintResult(function_group);
Expand Down
3 changes: 2 additions & 1 deletion paddle/cinn/auto_schedule/auto_tuner.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ class AutoTuner {
AutoTuner(const common::Target& target, hlir::framework::Graph* graph);

// Initialize tuner with specific config and auxiliary objects.
void Initialize(const Config& config, hlir::framework::GraphCompiler* graph_compiler);
void Initialize(const Config& config,
hlir::framework::GraphCompiler* graph_compiler);

// Perform the tuning process and return the final result
TuningResult Tune(const TuningOptions& options);
Expand Down
Loading

0 comments on commit af12734

Please sign in to comment.