[CodeStyle][CINN] format cpp code via clang-format (#54961)

* fix clang-format * 'fix_clang-format' * fix remaining errors * format * empty commit, re-trigger all ci * empty commit, re-trigger all ci --------- Co-authored-by: SigureMo <[email protected]>
PaddlePaddle · Jun 29, 2023 · af12734 · af12734
1 parent a7419ff
commit af12734
Show file tree

Hide file tree

Showing 683 changed files with 39,431 additions and 23,013 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -47,7 +47,8 @@ repos:
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$
         exclude: |
             (?x)^(
-                paddle/utils/.*
+                paddle/utils/.*|
+                paddle/cinn/utils/registry.h
             )$
 # For Python files
 -   repo: https://github.com/psf/black.git

diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc
@@ -41,7 +41,7 @@ std::vector<ir::Var> IndicesToVars(const std::vector<ir::Expr>& indices) {
   for (const ir::Expr& e : indices) {
     // Whether we have to convert other types, like const numbers to Var?
     if (e.As<ir::_Var_>() != nullptr) {
-      ir::Expr copy_e    = optim::IRCopy(e);
+      ir::Expr copy_e = optim::IRCopy(e);
       ir::_Var_* var_ref = copy_e.As<ir::_Var_>();
       result.emplace_back(ir::Var(var_ref));
     }
@@ -58,26 +58,32 @@ void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block) {
     const ir::Load* load_expr = x->As<ir::Load>();
     if (load_expr != nullptr) {
       const ir::Tensor t = load_expr->tensor.as_tensor_ref();
-      sche_block->read_buffers.emplace_back(ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
+      sche_block->read_buffers.emplace_back(
+          ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
       return false;
     }
     const ir::Store* store_expr = x->As<ir::Store>();
     if (store_expr != nullptr) {
       const ir::Tensor t = store_expr->tensor.as_tensor_ref();
-      sche_block->write_buffers.emplace_back(ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
+      sche_block->write_buffers.emplace_back(
+          ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
       return false;
     }
     return false;
   });
 }
 
-bool ContainsNodeType(ir::Expr expr, const std::unordered_set<ir::IrNodeTy>& node_types) {
-  std::set<ir::Expr> collection = ir::CollectIRNodesWithoutTensor(
-      expr, [&](const Expr* x) { return node_types.find(x->node_type()) != node_types.end(); });
+bool ContainsNodeType(ir::Expr expr,
+                      const std::unordered_set<ir::IrNodeTy>& node_types) {
+  std::set<ir::Expr> collection =
+      ir::CollectIRNodesWithoutTensor(expr, [&](const Expr* x) {
+        return node_types.find(x->node_type()) != node_types.end();
+      });
   return !collection.empty();
 }
 
-std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<ir::LoweredFunc>& lowered_funcs) {
+std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(
+    const std::vector<ir::LoweredFunc>& lowered_funcs) {
   std::unordered_set<std::string> result;
   for (const ir::LoweredFunc& func : lowered_funcs) {
     for (const ir::Argument& arg : func->args) {
@@ -90,18 +96,22 @@ std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<
 }
 
 bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
-  const ir::ScheduleBlock* sche_block = sche_block_realize.schedule_block.As<ir::ScheduleBlock>();
-  if (sche_block->write_buffers.size() != 1 || sche_block->read_buffers.empty()) {
+  const ir::ScheduleBlock* sche_block =
+      sche_block_realize.schedule_block.As<ir::ScheduleBlock>();
+  if (sche_block->write_buffers.size() != 1 ||
+      sche_block->read_buffers.empty()) {
     return false;
   }
-  const ir::Expr& write_buffer = sche_block->write_buffers[0].As<ir::_BufferRange_>()->buffer;
+  const ir::Expr& write_buffer =
+      sche_block->write_buffers[0].As<ir::_BufferRange_>()->buffer;
 
   // Enumerate each read region, get the number of schedule block iter vars
   // which  are not used to index the read region
   int total_unused_iter_vars = 0;
 
   for (const ir::Expr& read_buffer_expr : sche_block->read_buffers) {
-    const ir::_BufferRange_* read_buffer = read_buffer_expr.As<ir::_BufferRange_>();
+    const ir::_BufferRange_* read_buffer =
+        read_buffer_expr.As<ir::_BufferRange_>();
     // Skip the reduction buffer
     if (read_buffer->buffer == write_buffer) {
       continue;
@@ -133,18 +143,22 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
   return total_unused_iter_vars >= 1;
 }
 
-ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body) {
+ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
+                                      const ir::LoweredFunc& old_func,
+                                      ir::Expr& body) {
   ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body}));
   ir::IRSchedule ir_sch(mod_expr);
 
   // temp_bufs may be deleted during auto tuning (such as auto inline),
   // we have to check from old temp bufs and set them as local buffer.
   for (const ir::Buffer& buf : old_func->temp_bufs) {
-    const std::string& buf_name              = buf->name;
+    const std::string& buf_name = buf->name;
     std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
     for (ir::Expr& e : all_block_realizes) {
-      const ir::ScheduleBlockRealize* sche_block_realize = e.As<ir::ScheduleBlockRealize>();
-      const std::string& sche_name = sche_block_realize->schedule_block.As<ir::ScheduleBlock>()->name;
+      const ir::ScheduleBlockRealize* sche_block_realize =
+          e.As<ir::ScheduleBlockRealize>();
+      const std::string& sche_name =
+          sche_block_realize->schedule_block.As<ir::ScheduleBlock>()->name;
       if (buf_name == "_" + sche_name) {
         VLOG(6) << "Set local buffer for temp buffer " << buf_name;
         ir_sch.SetBuffer(e, "local", true);
@@ -159,14 +173,17 @@ ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::Lo
 #endif
 
   // Get new temp bufs by analyzing.
-  std::vector<ir::Buffer> new_temp_bufs = lang::GetTempBuffers(old_func->args, updated_body);
-  ir::LoweredFunc new_func = ir::_LoweredFunc_::Make(old_func->name, old_func->args, updated_body, new_temp_bufs);
+  std::vector<ir::Buffer> new_temp_bufs =
+      lang::GetTempBuffers(old_func->args, updated_body);
+  ir::LoweredFunc new_func = ir::_LoweredFunc_::Make(
+      old_func->name, old_func->args, updated_body, new_temp_bufs);
 #ifdef CINN_WITH_CUDA
   if (target == common::DefaultNVGPUTarget()) {
     new_func->PrepareCudaAxisInfoFromBody();
   }
 #endif
-  new_func = optim::Optimize(Expr(new_func), target, false).as_lowered_func_ref();
+  new_func =
+      optim::Optimize(Expr(new_func), target, false).as_lowered_func_ref();
   new_func->PrepareBufferCastExprs(/*with_expr_gen_tensor = */ false);
 
   return new_func;

diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.h b/paddle/cinn/auto_schedule/analysis/analyze_ir.h
@@ -27,12 +27,14 @@ namespace auto_schedule {
 
 void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block);
 
-bool ContainsNodeType(ir::Expr expr, const std::unordered_set<ir::IrNodeTy>& node_types);
+bool ContainsNodeType(ir::Expr expr,
+                      const std::unordered_set<ir::IrNodeTy>& node_types);
 
 /**
  * Collects all input lowered_funcs and return names of all output arguments
  */
-std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<ir::LoweredFunc>& lowered_funcs);
+std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(
+    const std::vector<ir::LoweredFunc>& lowered_funcs);
 
 /**
  * Determine whether a schedule block needs multileveltiling
@@ -42,7 +44,9 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize);
 /**
  * Update a LoweredFunc by regenerating related fields with a new function body
  */
-ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body);
+ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
+                                      const ir::LoweredFunc& old_func,
+                                      ir::Expr& body);
 
 }  // namespace auto_schedule
 }  // namespace cinn
diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc
@@ -49,8 +49,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
   ir::Tensor B = lang::Compute(
       {M, N}, [&](Var i, Var j) { return A(i, j); }, "B");
 
-  poly::StageMap stages              = poly::CreateStages({A, B});
-  std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
+  poly::StageMap stages = poly::CreateStages({A, B});
+  std::vector<ir::LoweredFunc> funcs = lang::LowerVec(
+      "SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
 
   ASSERT_FALSE(funcs.empty());
   ir::Expr ast_expr = funcs[0]->body;
@@ -65,8 +66,10 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
   std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
   ASSERT_EQ(all_block_realizes.size(), 1UL);
 
-  ir::ScheduleBlockRealize* sche_block_realize = all_block_realizes[0].As<ir::ScheduleBlockRealize>();
-  ir::ScheduleBlock* sche_block                = sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
+  ir::ScheduleBlockRealize* sche_block_realize =
+      all_block_realizes[0].As<ir::ScheduleBlockRealize>();
+  ir::ScheduleBlock* sche_block =
+      sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
   AnalyzeScheduleBlockReadWriteBuffer(sche_block);
 
   /*
@@ -112,8 +115,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
   ir::Tensor C = lang::Compute(
       {M, N}, [&](Var i, Var j) { return A(i) + B(j); }, "C");
 
-  poly::StageMap stages              = poly::CreateStages({C});
-  std::vector<ir::LoweredFunc> funcs = lang::LowerVec("AddDiffShape", stages, {C}, {}, {}, nullptr, target, true);
+  poly::StageMap stages = poly::CreateStages({C});
+  std::vector<ir::LoweredFunc> funcs = lang::LowerVec(
+      "AddDiffShape", stages, {C}, {}, {}, nullptr, target, true);
 
   ir::Expr ast_expr = funcs[0]->body;
   VLOG(6) << "Expr before MultiLevelTiling: ";
@@ -126,8 +130,10 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
   std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
   ASSERT_EQ(all_block_realizes.size(), 1UL);
 
-  ir::ScheduleBlockRealize* sche_block_realize = all_block_realizes[0].As<ir::ScheduleBlockRealize>();
-  ir::ScheduleBlock* sche_block                = sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
+  ir::ScheduleBlockRealize* sche_block_realize =
+      all_block_realizes[0].As<ir::ScheduleBlockRealize>();
+  ir::ScheduleBlock* sche_block =
+      sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
   AnalyzeScheduleBlockReadWriteBuffer(sche_block);
 
   VLOG(6) << "ScheduleBlockRealize: ";
@@ -163,18 +169,22 @@ TEST(AnalyzeIr, ContainsNodeType) {
   ir::Tensor B = lang::Compute(
       {M, N}, [&](Var i, Var j) { return A(i, j); }, "B");
 
-  poly::StageMap stages              = poly::CreateStages({A, B});
-  std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
+  poly::StageMap stages = poly::CreateStages({A, B});
+  std::vector<ir::LoweredFunc> funcs = lang::LowerVec(
+      "SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
 
   ASSERT_FALSE(funcs.empty());
   ir::Expr ast_expr = funcs[0]->body;
 
   VLOG(6) << "Analyzing for Expr:";
   VLOG(6) << ast_expr;
 
-  ASSERT_TRUE(ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::Store}));
-  ASSERT_TRUE(ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::IfThenElse}));
-  ASSERT_FALSE(ContainsNodeType(ast_expr, {ir::IrNodeTy::IfThenElse, ir::IrNodeTy::Sum}));
+  ASSERT_TRUE(
+      ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::Store}));
+  ASSERT_TRUE(ContainsNodeType(ast_expr,
+                               {ir::IrNodeTy::Load, ir::IrNodeTy::IfThenElse}));
+  ASSERT_FALSE(ContainsNodeType(ast_expr,
+                                {ir::IrNodeTy::IfThenElse, ir::IrNodeTy::Sum}));
 }
 
 }  // namespace auto_schedule

diff --git a/paddle/cinn/auto_schedule/auto_tuner.cc b/paddle/cinn/auto_schedule/auto_tuner.cc
@@ -38,13 +38,17 @@
 namespace cinn {
 namespace auto_schedule {
 
-AutoTuner::AutoTuner(const common::Target& target, hlir::framework::Graph* graph) : target_(target), graph_(graph) {}
+AutoTuner::AutoTuner(const common::Target& target,
+                     hlir::framework::Graph* graph)
+    : target_(target), graph_(graph) {}
 
-void AutoTuner::Initialize(const Config& config, hlir::framework::GraphCompiler* graph_compiler) {
+void AutoTuner::Initialize(const Config& config,
+                           hlir::framework::GraphCompiler* graph_compiler) {
   // create builder, runner, and schedule measurer
-  builder_           = std::make_unique<SimpleBuilder>(graph_compiler);
-  runner_            = std::make_unique<SimpleRunner>(config.runner_repeat_times);
-  schedule_measurer_ = std::make_unique<ScheduleMeasurer>(builder_.get(), runner_.get());
+  builder_ = std::make_unique<SimpleBuilder>(graph_compiler);
+  runner_ = std::make_unique<SimpleRunner>(config.runner_repeat_times);
+  schedule_measurer_ =
+      std::make_unique<ScheduleMeasurer>(builder_.get(), runner_.get());
 
   // initialize database
   database_ = std::move(Database::Make(config.database_config));
@@ -53,29 +57,43 @@ void AutoTuner::Initialize(const Config& config, hlir::framework::GraphCompiler*
   TaskCreator task_creator;
   tasks_ = task_creator.CreateTuneTaskOpLevel(graph_);
 
-  const auto& dtype_dict = graph_->GetAttrs<absl::flat_hash_map<std::string, common::Type>>("inferdtype");
-  const auto& shape_dict = graph_->GetAttrs<absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");
+  const auto& dtype_dict =
+      graph_->GetAttrs<absl::flat_hash_map<std::string, common::Type>>(
+          "inferdtype");
+  const auto& shape_dict = graph_->GetAttrs<
+      absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");
 
-  op_lowerer_                        = std::make_unique<hlir::framework::OpLowerer>(dtype_dict, shape_dict, target_);
+  op_lowerer_ = std::make_unique<hlir::framework::OpLowerer>(
+      dtype_dict, shape_dict, target_);
   InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
   for (auto i = 0; i < tasks_.size(); ++i) {
     auto&& task = tasks_[i];
     task.Initialize(shape_dict, dtype_dict, op_lowerer_.get());
     // Register the initial ModuleExpr corresponding to the task
-    task_registry->Regist(task.serialized_key, ir::ModuleExpr(task.GetLoweredFuncBodyExprs()));
-    VLOG(3) << "Add a task, id:" << i << ", serialized_key:\n" << task.serialized_key;
+    task_registry->Regist(task.serialized_key,
+                          ir::ModuleExpr(task.GetLoweredFuncBodyExprs()));
+    VLOG(3) << "Add a task, id:" << i << ", serialized_key:\n"
+            << task.serialized_key;
   }
 
   // create task optimizers
-  utils::LinearRandomEngine::StateType initial_seed = utils::LinearRandomEngine::GetDeviceRandomValue();
+  utils::LinearRandomEngine::StateType initial_seed =
+      utils::LinearRandomEngine::GetDeviceRandomValue();
   task_optimizers_.resize(tasks_.size());
-  std::transform(tasks_.begin(), tasks_.end(), task_optimizers_.begin(), [&](TuneTask& task) {
-    return std::make_unique<TaskOptimizer>(
-        &task, schedule_measurer_.get(), database_.get(), utils::ForkRandomState(&initial_seed));
-  });
+  std::transform(tasks_.begin(),
+                 tasks_.end(),
+                 task_optimizers_.begin(),
+                 [&](TuneTask& task) {
+                   return std::make_unique<TaskOptimizer>(
+                       &task,
+                       schedule_measurer_.get(),
+                       database_.get(),
+                       utils::ForkRandomState(&initial_seed));
+                 });
 
   // create task scheduler
-  task_scheduler_ = TaskScheduler::Make(tasks_, config.task_schedule_config, config.task_schedule_strategy);
+  task_scheduler_ = TaskScheduler::Make(
+      tasks_, config.task_schedule_config, config.task_schedule_strategy);
 }
 
 void PrintResult(std::shared_ptr<hlir::framework::Graph::Group> group) {
@@ -127,7 +145,8 @@ void PrintResult(const TuningResult& result) {
 
 TuningResult AutoTuner::Tune(const TuningOptions& options) {
   CHECK_GT(options.num_tuning_rounds, 0) << "Invalid config";
-  VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds << ", tasks size=" << tasks_.size();
+  VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds
+          << ", tasks size=" << tasks_.size();
 
   TuningResult result;
   result.subgraphs.resize(tasks_.size());
@@ -136,7 +155,7 @@ TuningResult AutoTuner::Tune(const TuningOptions& options) {
   // as default result of graph tuning, and that should be updated
   // once we support graph tuning.
   for (auto i = 0; i < tasks_.size(); ++i) {
-    auto&& task         = tasks_.at(i);
+    auto&& task = tasks_.at(i);
     result.subgraphs[i] = task.subgraph;
   }
 
@@ -146,7 +165,7 @@ TuningResult AutoTuner::Tune(const TuningOptions& options) {
     task_scheduler_->Reset();
     while ((run_id = task_scheduler_->NextTaskId()) != -1) {
       VLOG(3) << "Start tuning Task-" << run_id;
-      auto* opt           = task_optimizers_.at(run_id).get();
+      auto* opt = task_optimizers_.at(run_id).get();
       auto function_group = opt->Optimize(options);
       VLOG(3) << "Task-" << run_id << " finished, print optimized functions:\n";
       PrintResult(function_group);

diff --git a/paddle/cinn/auto_schedule/auto_tuner.h b/paddle/cinn/auto_schedule/auto_tuner.h
@@ -49,7 +49,8 @@ class AutoTuner {
   AutoTuner(const common::Target& target, hlir::framework::Graph* graph);
 
   // Initialize tuner with specific config and auxiliary objects.
-  void Initialize(const Config& config, hlir::framework::GraphCompiler* graph_compiler);
+  void Initialize(const Config& config,
+                  hlir::framework::GraphCompiler* graph_compiler);
 
   // Perform the tuning process and return the final result
   TuningResult Tune(const TuningOptions& options);