From 725ddd5918997ad6bc92cf5f577008edf0607795 Mon Sep 17 00:00:00 2001
From: Fisher <fisheryung@outlook.com>
Date: Wed, 14 Jun 2023 21:24:35 +0800
Subject: [PATCH] Fixed jitify commit to prevent header file conflicts (#1522)

* Fixed jitify commit to prevent header file conflicts

* Set random seed for debug floor_divide

* Avoid oom error

* Just for debug ci

* Fix floor_divide error when input dtype is int

* Fix bugs and add more tests for floor_divide
---
 cinn/hlir/op/contrib/repeat_test.cc      |   4 +-
 cinn/lang/builtin.cc                     |  12 +-
 cinn/runtime/cuda/cuda_intrinsics.cc     |   1 -
 cmake/external/jitify.cmake              |   2 +-
 python/tests/ops/test_floor_divide_op.py | 182 +++++++----------------
 python/tests/ops/test_sign_op.py         |   4 +-
 6 files changed, 73 insertions(+), 132 deletions(-)

diff --git a/cinn/hlir/op/contrib/repeat_test.cc b/cinn/hlir/op/contrib/repeat_test.cc
index 71aebe22e2..02977ea19f 100755
--- a/cinn/hlir/op/contrib/repeat_test.cc
+++ b/cinn/hlir/op/contrib/repeat_test.cc
@@ -65,7 +65,7 @@ function TestGenerateCodeCpu_Repeat (_test_repeat)
         ScheduleBlock(test_repeat)
         {
           i0, i1 = axis.bind(i, j)
-          test_repeat[i0, i1] = in[(i0 / 2), i1]
+          test_repeat[i0, i1] = in[select((((i0 > 0) and (2 > 0)) or ((i0 < 0) and (2 < 0))), (i0 / 2), select(((i0 % 2) == 0), (i0 / 2), ((i0 / 2) - 1))), i1]
         }
       }
     }
@@ -100,7 +100,7 @@ void TestGenerateCodeCpu_Repeat(void* _args, int32_t num_args)
   int32_t* test_repeat = ((int32_t*)(_test_repeat->memory));
   for (int32_t i = 0; i < 8; i += 1) {
     for (int32_t j = 0; j < 4; j += 1) {
-      test_repeat[((4 * i) + j)] = in[(((i / 2) * 4) + j)];
+      test_repeat[((4 * i) + j)] = in[((4 * (((((i > 0) && (2 > 0)) || ((i < 0) && (2 < 0)))) ? (i / 2) : ((((i & 1) == 0)) ? (i / 2) : ((i / 2) + -1)))) + j)];
     };
   };
   cinn_buffer_free((void*)(0), _in);
diff --git a/cinn/lang/builtin.cc b/cinn/lang/builtin.cc
index 0abf8dc986..266f704a76 100644
--- a/cinn/lang/builtin.cc
+++ b/cinn/lang/builtin.cc
@@ -107,7 +107,17 @@ Expr One(const Type& type) { return ir::One(type); }
 
 Expr FloorDivide(Expr a, Expr b) {
   CHECK_EQ(a.type(), b.type()) << "FloorDivide's inputs type not equal, where a:" << a.type() << " but b:" << b.type();
-  return a.type().is_float() ? Floor(a / b) : a / b;
+  if (a.type().is_float()) {
+    return Floor(a / b);
+  } else if (a.type().is_uint()) {
+    return a / b;
+  } else {
+    auto div = a / b;
+    auto mod = a % b;
+    auto ret = ir::Select::Make(
+        ir::EQ::Make(mod, common::make_const(a.type(), 0)), div, div - common::make_const(a.type(), 1));
+    return ir::Select::Make((a > 0 && b > 0) || (a < 0 && b < 0), div, ret);
+  }
 }
 
 Expr min_value(const Type& type) {
diff --git a/cinn/runtime/cuda/cuda_intrinsics.cc b/cinn/runtime/cuda/cuda_intrinsics.cc
index 81d717032b..88e48973b3 100644
--- a/cinn/runtime/cuda/cuda_intrinsics.cc
+++ b/cinn/runtime/cuda/cuda_intrinsics.cc
@@ -230,7 +230,6 @@ CINN_REGISTER_HELPER(cuda_intrinsics) {
   REGISTER_EXTERN_FUNC_2_IN_1_INT32(bitwise_and)
   REGISTER_EXTERN_FUNC_2_IN_1_INT32(bitwise_or)
   REGISTER_EXTERN_FUNC_2_IN_1_INT32(bitwise_xor)
-  REGISTER_EXTERN_FUNC_2_IN_1_INT32(floor_divide)
   REGISTER_EXTERN_FUNC_2_IN_1_INT32(logical_right_shift)
   REGISTER_EXTERN_FUNC_2_IN_1_INT32(mod)
 
diff --git a/cmake/external/jitify.cmake b/cmake/external/jitify.cmake
index 5868d5e14a..080b8b93ee 100644
--- a/cmake/external/jitify.cmake
+++ b/cmake/external/jitify.cmake
@@ -11,7 +11,7 @@ ExternalProject_Add(
   external_jitify
   ${EXTERNAL_PROJECT_LOG_ARGS}
   GIT_REPOSITORY "https://github.com/NVIDIA/jitify.git"
-  GIT_TAG master
+  GIT_TAG 57de649139c866eb83acacfe50c92ad7c6278776
   PREFIX ${THIRD_PARTY_PATH}/jitify
   SOURCE_DIR ${JITIFY_SOURCE_PATH}
   CONFIGURE_COMMAND ""
diff --git a/python/tests/ops/test_floor_divide_op.py b/python/tests/ops/test_floor_divide_op.py
index b5f89a6580..996262fab4 100644
--- a/python/tests/ops/test_floor_divide_op.py
+++ b/python/tests/ops/test_floor_divide_op.py
@@ -36,13 +36,13 @@ def init_case(self):
         self.x_np = self.random(
             shape=self.case["x_shape"],
             dtype=self.case["x_dtype"],
-            low=-10,
-            high=10)
+            low=self.case["x_low"],
+            high=self.case["x_high"])
         self.y_np = self.random(
             shape=self.case["y_shape"],
             dtype=self.case["y_dtype"],
-            low=1,
-            high=10)
+            low=self.case["y_low"],
+            high=self.case["y_high"])
 
     def build_paddle_program(self, target):
         x = paddle.to_tensor(self.x_np, stop_gradient=True)
@@ -66,7 +66,7 @@ def build_cinn_program(self, target):
         res = self.get_cinn_output(prog, target, [x, y],
                                    [self.x_np, self.y_np], [out])
 
-        self.cinn_outputs = [res[0]]
+        self.cinn_outputs = res
 
     def test_check_results(self):
         max_relative_error = self.case[
@@ -74,7 +74,7 @@ def test_check_results(self):
         self.check_outputs_and_grads(max_relative_error=max_relative_error)
 
 
-class TestFloorDivideAll(TestCaseHelper):
+class TestFloorDivideShape(TestCaseHelper):
     def init_attrs(self):
         self.class_name = "TestFloorDivideOpCase"
         self.cls = TestFloorDivideOp
@@ -109,18 +109,26 @@ def init_attrs(self):
                 "x_dtype": "int32",
                 "y_dtype": "int32",
             },
+        ]
+        self.attrs = [
             {
-                "x_dtype": "int64",
-                "y_dtype": "int64",
+                "x_low": -10,
+                "x_high": 10,
+                "y_low": -10,
+                "y_high": -1,
+            },
+            {
+                "x_low": -10,
+                "x_high": 10,
+                "y_low": 1,
+                "y_high": 10,
             },
         ]
-        self.attrs = []
 
 
-class TestFloorDivideAllWithBroadcast(TestCaseHelper):
+class TestFloorDivideBroadcast(TestFloorDivideShape):
     def init_attrs(self):
-        self.class_name = "TestFloorDivideOpCase"
-        self.cls = TestFloorDivideOp
+        super().init_attrs()
         self.inputs = [
             {
                 "x_shape": [1],
@@ -147,97 +155,26 @@ def init_attrs(self):
                 "y_shape": [1, 1, 1, 1, 1],
             },
         ]
-        self.dtypes = [
-            {
-                "x_dtype": "int32",
-                "y_dtype": "int32",
-            },
-            {
-                "x_dtype": "int64",
-                "y_dtype": "int64",
-            },
-        ]
-        self.attrs = []
-
-
-class TestFloorDivideNegOp(OpTest):
-    def setUp(self):
-        print(f"\nRunning {self.__class__.__name__}: {self.case}")
-        self.init_case()
 
-    def init_case(self):
-        self.x_np = self.random(
-            shape=self.case["x_shape"],
-            dtype=self.case["x_dtype"],
-            low=-10,
-            high=10)
-        self.y_np = self.random(
-            shape=self.case["y_shape"],
-            dtype=self.case["y_dtype"],
-            low=-10,
-            high=-1)
-
-    def build_paddle_program(self, target):
-        x = paddle.to_tensor(self.x_np, stop_gradient=True)
-        y = paddle.to_tensor(self.y_np, stop_gradient=True)
-
-        out = paddle.floor_divide(x, y)
-
-        self.paddle_outputs = [out]
-
-    def build_cinn_program(self, target):
-        builder = NetBuilder("pow")
-        x = builder.create_input(
-            self.nptype2cinntype(self.case["x_dtype"]), self.case["x_shape"],
-            "x")
-        y = builder.create_input(
-            self.nptype2cinntype(self.case["y_dtype"]), self.case["y_shape"],
-            "y")
-        out = builder.floor_divide(x, y)
-
-        prog = builder.build()
-        res = self.get_cinn_output(prog, target, [x, y],
-                                   [self.x_np, self.y_np], [out])
-
-        self.cinn_outputs = [res[0]]
-
-    def test_check_results(self):
-        max_relative_error = self.case[
-            "max_relative_error"] if "max_relative_error" in self.case else 1e-5
-        self.check_outputs_and_grads(max_relative_error=max_relative_error)
 
-
-class TestFloorDivideNegAll(TestCaseHelper):
+class TestFloorDivideDtype(TestFloorDivideShape):
     def init_attrs(self):
-        self.class_name = "TestFloorDivideNegOpCase"
-        self.cls = TestFloorDivideNegOp
+        super().init_attrs()
         self.inputs = [
-            {
-                "x_shape": [1],
-                "y_shape": [1],
-            },
             {
                 "x_shape": [1024],
                 "y_shape": [1024],
             },
+        ]
+        self.dtypes = [
             {
-                "x_shape": [512, 256],
-                "y_shape": [512, 256],
-            },
-            {
-                "x_shape": [128, 64, 32],
-                "y_shape": [128, 64, 32],
-            },
-            {
-                "x_shape": [16, 8, 4, 2],
-                "y_shape": [16, 8, 4, 2],
+                "x_dtype": "int8",
+                "y_dtype": "int8",
             },
             {
-                "x_shape": [16, 8, 4, 2, 1],
-                "y_shape": [16, 8, 4, 2, 1],
+                "x_dtype": "int16",
+                "y_dtype": "int16",
             },
-        ]
-        self.dtypes = [
             {
                 "x_dtype": "int32",
                 "y_dtype": "int32",
@@ -246,55 +183,50 @@ def init_attrs(self):
                 "x_dtype": "int64",
                 "y_dtype": "int64",
             },
-        ]
-        self.attrs = []
-
-
-class TestFloorDivideNegAllWithBroadcast(TestCaseHelper):
-    def init_attrs(self):
-        self.class_name = "TestFloorDivideNegOpCase"
-        self.cls = TestFloorDivideNegOp
-        self.inputs = [
-            {
-                "x_shape": [1],
-                "y_shape": [1],
-            },
             {
-                "x_shape": [1024],
-                "y_shape": [1],
-            },
-            {
-                "x_shape": [512, 256],
-                "y_shape": [1, 1],
+                "x_dtype": "float16",
+                "y_dtype": "float16",
+                "max_relative_error": 1,
             },
             {
-                "x_shape": [128, 64, 32],
-                "y_shape": [1, 1, 1],
+                "x_dtype": "float32",
+                "y_dtype": "float32",
             },
             {
-                "x_shape": [16, 8, 4, 2],
-                "y_shape": [1, 1, 1, 1],
+                "x_dtype": "float64",
+                "y_dtype": "float64",
             },
+        ]
+
+
+class TestFloorDivideUINT(TestCaseHelper):
+    def init_attrs(self):
+        self.class_name = "TestFloorDivideOpCase"
+        self.cls = TestFloorDivideOp
+        self.inputs = [
             {
-                "x_shape": [16, 8, 4, 2, 1],
-                "y_shape": [1, 1, 1, 1, 1],
+                "x_shape": [1024],
+                "y_shape": [1024],
             },
         ]
         self.dtypes = [
             {
-                "x_dtype": "int32",
-                "y_dtype": "int32",
+                "x_dtype": "uint8",
+                "y_dtype": "uint8",
             },
+        ]
+        self.attrs = [
             {
-                "x_dtype": "int64",
-                "y_dtype": "int64",
+                "x_low": 1,
+                "x_high": 10,
+                "y_low": 1,
+                "y_high": 10,
             },
         ]
-        self.attrs = []
 
 
 if __name__ == "__main__":
-    TestFloorDivideAll().run()
-    TestFloorDivideNegAll().run()
-    TestFloorDivideAllWithBroadcast().run()
-    TestFloorDivideNegAllWithBroadcast().run()
+    TestFloorDivideShape().run()
+    TestFloorDivideBroadcast().run()
+    TestFloorDivideDtype().run()
+    TestFloorDivideUINT().run()
diff --git a/python/tests/ops/test_sign_op.py b/python/tests/ops/test_sign_op.py
index 920cda2564..b70faaff2c 100644
--- a/python/tests/ops/test_sign_op.py
+++ b/python/tests/ops/test_sign_op.py
@@ -87,10 +87,10 @@ def init_attrs(self):
                 "shape": [80, 1, 5, 7],
             },
             {
-                "shape": [80, 3, 1024, 7],
+                "shape": [80, 3, 32, 7],
             },
             {
-                "shape": [10, 5, 1024, 2048],
+                "shape": [10, 5, 32, 32],
             },
             {
                 "shape": [1],