From 894ecf8b3c6d7eabb3baa2714b63b65aa233f2fe Mon Sep 17 00:00:00 2001 From: wenming2014 <2279939962@qq.com> Date: Fri, 25 Sep 2020 08:39:30 +0000 Subject: [PATCH] fix ci problems --- 0924.log | 9432 +++++++++++++++++++++++++++ Testing/Temporary/CTestCostData.txt | 1 + Testing/Temporary/LastTest.log | 3 + cinn/hlir/pe/nn.cc | 78 +- python/tests/conv2d_utils.py | 11 +- python/tests/test_op_nn.py | 6 +- python/tests/test_pe_reduction.py | 2 +- 7 files changed, 9519 insertions(+), 14 deletions(-) create mode 100644 0924.log create mode 100644 Testing/Temporary/CTestCostData.txt create mode 100644 Testing/Temporary/LastTest.log diff --git a/0924.log b/0924.log new file mode 100644 index 0000000000..74bb078c67 --- /dev/null +++ b/0924.log @@ -0,0 +1,9432 @@ +/home/wangyue50/CINN-my/CINN/build +-- The C compiler identification is GNU 7.5.0 +-- The CXX compiler identification is GNU 7.5.0 +-- Check for working C compiler: /usr/bin/cc +-- Check for working C compiler: /usr/bin/cc -- works +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Detecting C compile features +-- Detecting C compile features - done +-- Check for working CXX compiler: /usr/bin/c++ +-- Check for working CXX compiler: /usr/bin/c++ -- works +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Looking for pthread.h +-- Looking for pthread.h - found +-- Looking for pthread_create +-- Looking for pthread_create - not found +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- pybind path: /home/wangyue50/CINN-my/CINN/build/thirds/pybind/src/extern_pybind/include +-- third: /home/wangyue50/CINN-my/CINN/build/thirds +-- Protobuf protoc executable: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/bin/protoc +-- Protobuf-lite library: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/libprotobuf-lite.a +-- Protobuf library: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/libprotobuf.a +-- Protoc library: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/libprotoc.a +-- Protobuf version: 3.1.0 +-- MKLML_VER: Glibc225_vsErf_mklml_lnx_2019.0.1.20181227, MKLML_URL: http://paddlepaddledeps.bj.bcebos.com/Glibc225_vsErf_mklml_lnx_2019.0.1.20181227.tgz +-- ginac: /usr/lib/libginac.so +-- core_src: ;cinn/common/shared.cc;cinn/common/cinn_value.cc;cinn/common/type.cc;cinn/common/target.cc;cinn/common/object.cc;cinn/common/debug_manager.cc;cinn/common/info_registry.cc;cinn/common/graph_utils.cc;cinn/common/context.cc;cinn/common/axis.cc;cinn/common/ir_util.cc;cinn/common/test_helper.cc;cinn/common/arithmatic.cc;cinn/common/cas.cc;cinn/common/union_find.cc;cinn/utils/string.cc;cinn/utils/functional.cc;cinn/utils/dot_lang.cc;cinn/utils/timer.cc;cinn/poly/dim.cc;cinn/poly/domain.cc;cinn/poly/map.cc;cinn/poly/stage.cc;cinn/poly/isl_utils.cc;cinn/poly/schedule.cc;cinn/poly/naive_scheduler.cc;cinn/poly/poly_scheduler.cc;cinn/poly/ast_gen.cc;cinn/poly/graph.cc;cinn/poly/compute_at_transform.cc;cinn/runtime/cpu/host_intrinsics.cc;cinn/runtime/cpu/mkl_math.cc;cinn/runtime/cpu/cblas.cc;cinn/runtime/intrinsic.cc;cinn/runtime/cinn_runtime.cc;cinn/runtime/intrinsic_types.cc;cinn/ir/node.cc;cinn/ir/ir.cc;cinn/ir/node.cc;cinn/ir/ir_visitor.cc;cinn/ir/ir_printer.cc;cinn/ir/ir_mutator.cc;cinn/ir/function_definition.cc;cinn/ir/lowered_func.cc;cinn/ir/ir_operators.cc;cinn/ir/buffer.cc;cinn/ir/function_base.cc;cinn/ir/operation.cc;cinn/ir/collect_ir_nodes.cc;cinn/ir/registry.cc;cinn/ir/tensor.cc +-- Compile core_api without CUDA support +-- Download inference test stuff from http://paddle-inference-dist.bj.bcebos.com/lite_naive_model.tar.gz +-- Configuring done +-- Generating done +-- Build files have been written to: /home/wangyue50/CINN-my/CINN/build +Scanning dependencies of target extern_gtest +[ 0%] Creating directories for 'extern_gtest' +[ 50%] Performing download step (git clone) for 'extern_gtest' +[ 50%] No patch step for 'extern_gtest' +[ 50%] No update step for 'extern_gtest' +[100%] Performing configure step for 'extern_gtest' +loading initial cache file /home/wangyue50/CINN-my/CINN/build/thirds/gtest/tmp/extern_gtest-cache-.cmake +-- The C compiler identification is GNU 7.5.0 +-- The CXX compiler identification is GNU 7.5.0 +-- Check for working C compiler: /usr/bin/cc +-- Check for working C compiler: /usr/bin/cc -- works +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Detecting C compile features +-- Detecting C compile features - done +-- Check for working CXX compiler: /usr/bin/c++ +-- Check for working CXX compiler: /usr/bin/c++ -- works +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Found PythonInterp: /usr/bin/python (found version "3.6.9") +-- Configuring done +-- Generating done +-- Build files have been written to: /home/wangyue50/CINN-my/CINN/build/thirds/gtest/src/extern_gtest-build +[100%] Performing build step for 'extern_gtest' +Scanning dependencies of target gmock_main +[ 9%] Building CXX object googlemock/CMakeFiles/gmock_main.dir/__/googletest/src/gtest-all.cc.o +[ 18%] Building CXX object googlemock/CMakeFiles/gmock_main.dir/src/gmock-all.cc.o +[ 27%] Building CXX object googlemock/CMakeFiles/gmock_main.dir/src/gmock_main.cc.o +[ 36%] Linking CXX static library libgmock_main.a +[ 36%] Built target gmock_main +Scanning dependencies of target gmock +[ 45%] Building CXX object googlemock/CMakeFiles/gmock.dir/__/googletest/src/gtest-all.cc.o +[ 54%] Building CXX object googlemock/CMakeFiles/gmock.dir/src/gmock-all.cc.o +[ 63%] Linking CXX static library libgmock.a +[ 63%] Built target gmock +Scanning dependencies of target gtest +[ 72%] Building CXX object googlemock/gtest/CMakeFiles/gtest.dir/src/gtest-all.cc.o +[ 81%] Linking CXX static library libgtest.a +[ 81%] Built target gtest +Scanning dependencies of target gtest_main +[ 90%] Building CXX object googlemock/gtest/CMakeFiles/gtest_main.dir/src/gtest_main.cc.o +[100%] Linking CXX static library libgtest_main.a +[100%] Built target gtest_main +[100%] Performing install step for 'extern_gtest' +[ 36%] Built target gmock_main +[ 63%] Built target gmock +[ 81%] Built target gtest +[100%] Built target gtest_main +Install the project... +-- Install configuration: "" +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/lib/libgmock.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/lib/libgmock_main.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-more-matchers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-cardinalities.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-matchers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-function-mockers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-matchers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-more-actions.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-actions.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-function-mockers.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-nice-strict.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-matchers.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/gmock-generated-internal-utils.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/gmock-internal-utils.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/gmock-port.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/gmock-generated-internal-utils.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/custom +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/custom/gmock-matchers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/custom/gmock-generated-actions.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/custom/gmock-port.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/internal/custom/gmock-generated-actions.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-spec-builders.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-actions.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-generated-nice-strict.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gmock/gmock-actions.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/lib/libgtest.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/lib/libgtest_main.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-param-test.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-death-test.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-message.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-spi.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-param-test.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-printers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest_prod.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-typed-test.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest_pred_impl.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-internal.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-type-util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-param-util-generated.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-filepath.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-param-util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-port-arch.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-tuple.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-port.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-death-test-internal.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-string.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/custom +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/custom/gtest-printers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/custom/gtest-port.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/custom/gtest.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-type-util.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-linked_ptr.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/internal/gtest-tuple.h.pump +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gtest/include/gtest/gtest-test-part.h +[100%] Completed 'extern_gtest' +[100%] Built target extern_gtest +Scanning dependencies of target extern_gflags +Scanning dependencies of target extern_mklml +Scanning dependencies of target extern_protobuf +Scanning dependencies of target GEN_LLVM_RUNTIME_IR_HEADER +[ 3%] Built target extern_gtest +[ 3%] Creating directories for 'extern_mklml' +[ 5%] Creating directories for 'extern_gflags' +[ 5%] Generating cinn_runtime_llvm_ir.h +[ 5%] Creating directories for 'extern_protobuf' +[ 5%] Performing download step (git clone) for 'extern_gflags' +[ 7%] Performing download step (git clone) for 'extern_protobuf' +[ 7%] Performing download step (download, verify and extract) for 'extern_mklml' +-- Downloading... + dst='/home/wangyue50/CINN-my/CINN/build/thirds/mklml/src/extern_mklml/Glibc225_vsErf_mklml_lnx_2019.0.1.20181227.tgz' + timeout='none' +-- Using src='http://paddlepaddledeps.bj.bcebos.com/Glibc225_vsErf_mklml_lnx_2019.0.1.20181227.tgz' +[ 7%] Built target GEN_LLVM_RUNTIME_IR_HEADER +Submodule path 'doc': checked out '971dd2a4fadac9cdab174c523c22df79efd63aa5' +[ 9%] No patch step for 'extern_gflags' +[ 9%] No update step for 'extern_gflags' +[ 9%] Performing configure step for 'extern_gflags' +loading initial cache file /home/wangyue50/CINN-my/CINN/build/thirds/gflags/tmp/extern_gflags-cache-.cmake +-- The CXX compiler identification is GNU 7.5.0 +-- Check for working CXX compiler: /usr/bin/c++ +-- Check for working CXX compiler: /usr/bin/c++ -- works +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Looking for C++ include unistd.h +-- Looking for C++ include unistd.h - found +-- Looking for C++ include stdint.h +-- Looking for C++ include stdint.h - found +-- Looking for C++ include inttypes.h +-- Looking for C++ include inttypes.h - found +-- Looking for C++ include sys/types.h +-- Looking for C++ include sys/types.h - found +-- Looking for C++ include sys/stat.h +-- Looking for C++ include sys/stat.h - found +-- Looking for C++ include fnmatch.h +-- Looking for C++ include fnmatch.h - found +-- Looking for C++ include stddef.h +-- Looking for C++ include stddef.h - found +-- Check size of uint32_t +-- Check size of uint32_t - done +-- Looking for strtoll +-- Looking for strtoll - found +-- Looking for C++ include pthread.h +-- Looking for C++ include pthread.h - found +-- Looking for pthread_create +-- Looking for pthread_create - not found +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Check size of pthread_rwlock_t +-- Check size of pthread_rwlock_t - done +-- Configuring done +-- Generating done +-- Build files have been written to: /home/wangyue50/CINN-my/CINN/build/thirds/gflags/src/extern_gflags-build +[ 9%] Performing build step for 'extern_gflags' +Scanning dependencies of target gflags_nothreads_static +Scanning dependencies of target gflags_static +[ 12%] Building CXX object CMakeFiles/gflags_nothreads_static.dir/src/gflags.cc.o +[ 25%] Building CXX object CMakeFiles/gflags_nothreads_static.dir/src/gflags_completions.cc.o +[ 37%] Building CXX object CMakeFiles/gflags_nothreads_static.dir/src/gflags_reporting.cc.o +[ 62%] Building CXX object CMakeFiles/gflags_static.dir/src/gflags.cc.o +[ 62%] Building CXX object CMakeFiles/gflags_static.dir/src/gflags_reporting.cc.o +[ 75%] Building CXX object CMakeFiles/gflags_static.dir/src/gflags_completions.cc.o +[ 87%] Linking CXX static library lib/libgflags.a +[100%] Linking CXX static library lib/libgflags_nothreads.a +[100%] Built target gflags_static +[100%] Built target gflags_nothreads_static +[ 9%] Performing install step for 'extern_gflags' +[ 50%] Built target gflags_nothreads_static +[100%] Built target gflags_static +Install the project... +-- Install configuration: "" +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/libgflags.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/libgflags_nothreads.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/include/gflags/gflags.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/include/gflags/gflags_declare.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/include/gflags/gflags_completions.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/include/gflags/gflags_gflags.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/cmake/gflags/gflags-config.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/cmake/gflags/gflags-config-version.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/cmake/gflags/gflags-targets.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/cmake/gflags/gflags-targets-noconfig.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/bin/gflags_completions.sh +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/gflags/lib/pkgconfig/gflags.pc +-- Installing: /root/.cmake/packages/gflags/a4bd6f63375cbd4b0411e04dd0704cce +[ 9%] Completed 'extern_gflags' +[ 9%] Built target extern_gflags +Scanning dependencies of target extern_glog +[ 9%] Creating directories for 'extern_glog' +[ 9%] Performing download step (git clone) for 'extern_glog' +-- Downloading... done +-- extracting... + src='/home/wangyue50/CINN-my/CINN/build/thirds/mklml/src/extern_mklml/Glibc225_vsErf_mklml_lnx_2019.0.1.20181227.tgz' + dst='/home/wangyue50/CINN-my/CINN/build/thirds/mklml/src/extern_mklml' +-- extracting... [tar xfz] +-- extracting... [analysis] +-- extracting... [rename] +-- extracting... [clean up] +-- extracting... done +[ 11%] No update step for 'extern_mklml' +[ 11%] No patch step for 'extern_mklml' +[ 11%] No configure step for 'extern_mklml' +[ 11%] No build step for 'extern_mklml' +[ 13%] Performing install step for 'extern_mklml' +[ 13%] Completed 'extern_mklml' +[ 13%] Built target extern_mklml +[ 15%] No update step for 'extern_glog' +[ 15%] No patch step for 'extern_glog' +[ 15%] Performing configure step for 'extern_glog' +loading initial cache file /home/wangyue50/CINN-my/CINN/build/thirds/glog/tmp/extern_glog-cache-.cmake +-- The C compiler identification is GNU 7.5.0 +-- The CXX compiler identification is GNU 7.5.0 +-- Check for working C compiler: /usr/bin/cc +-- Check for working C compiler: /usr/bin/cc -- works +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Detecting C compile features +-- Detecting C compile features - done +-- Check for working CXX compiler: /usr/bin/c++ +-- Check for working CXX compiler: /usr/bin/c++ -- works +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +Branch 'mobile' set up to track remote branch 'mobile' from 'origin'. +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Looking for gflags namespace +-- Looking for gflags namespace - gflags +-- Looking for pthread.h +-- Looking for pthread.h - found +-- Looking for pthread_create +-- Looking for pthread_create - not found +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Looking for dlfcn.h +-- Looking for dlfcn.h - found +-- Looking for execinfo.h +-- Looking for execinfo.h - found +-- Looking for glob.h +-- Looking for glob.h - found +-- Looking for inttypes.h +-- Looking for inttypes.h - found +-- Looking for libunwind.h +-- Looking for libunwind.h - not found +-- Looking for memory.h +-- Looking for memory.h - found +-- Looking for pwd.h +-- Looking for pwd.h - found +-- Looking for stdint.h +-- Looking for stdint.h - found +-- Looking for stdlib.h +-- Looking for stdlib.h - found +-- Looking for string.h +-- Looking for string.h - found +-- Looking for strings.h +-- Looking for strings.h - found +-- Looking for sys/stat.h +-- Looking for sys/stat.h - found +-- Looking for sys/syscall.h +-- Looking for sys/syscall.h - found +-- Looking for sys/time.h +-- Looking for sys/time.h - found +-- Looking for sys/types.h +-- Looking for sys/types.h - found +-- Looking for sys/utsname.h +-- Looking for sys/utsname.h - found +-- Looking for syscall.h +-- Looking for syscall.h - found +-- Looking for syslog.h +-- Looking for syslog.h - found +-- Looking for ucontext.h +-- Looking for ucontext.h - found +-- Looking for unistd.h +-- Looking for unistd.h - found +-- Looking for unwind.h +-- Looking for unwind.h - found +-- Looking for C++ include ext/hash_map +-- Looking for C++ include ext/hash_map - found +-- Looking for C++ include ext/hash_set +-- Looking for C++ include ext/hash_set - found +-- Looking for C++ include ext/slist +Submodule path 'third_party/benchmark': checked out '360e66c1c4777c99402cf8cd535aa510fee16573' +-- Looking for C++ include ext/slist - found +-- Looking for C++ include tr1/unordered_map +[ 15%] No update step for 'extern_protobuf' +[ 15%] No patch step for 'extern_protobuf' +[ 15%] Performing configure step for 'extern_protobuf' +-- The C compiler identification is GNU 7.5.0 +-- Looking for C++ include tr1/unordered_map - found +-- Looking for C++ include tr1/unordered_set +-- The CXX compiler identification is GNU 7.5.0 +-- Check for working C compiler: /usr/bin/cc +-- Check for working C compiler: /usr/bin/cc -- works +-- Detecting C compiler ABI info +-- Looking for C++ include tr1/unordered_set - found +-- Looking for C++ include unordered_map +-- Detecting C compiler ABI info - done +-- Detecting C compile features +-- Looking for C++ include unordered_map - found +-- Looking for C++ include unordered_set +-- Detecting C compile features - done +-- Check for working CXX compiler: /usr/bin/c++ +-- Check for working CXX compiler: /usr/bin/c++ -- works +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Detecting CXX compile features +-- Looking for C++ include unordered_set - found +-- Looking for stddef.h +-- Looking for stddef.h - found +-- Check size of unsigned __int16 +-- Check size of unsigned __int16 - failed +-- Check size of u_int16_t +-- Check size of u_int16_t - done +-- Check size of uint16_t +-- Check size of uint16_t - done +-- Looking for dladdr +-- Looking for dladdr - not found +-- Looking for fcntl +-- Detecting CXX compile features - done +-- Looking for pthread.h +-- Looking for fcntl - found +-- Looking for pread +-- Looking for pthread.h - found +-- Looking for pthread_create +-- Looking for pread - found +-- Looking for pwrite +-- Looking for pthread_create - not found +-- Looking for pthread_create in pthreads +-- Looking for pwrite - found +-- Looking for sigaction +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for sigaction - found +-- Looking for sigaltstack +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Configuring done +-- Looking for sigaltstack - found +-- Performing Test HAVE_NO_DEPRECATED +-- Generating done +-- Build files have been written to: /home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf-build +[ 16%] Performing build step for 'extern_protobuf' +-- Performing Test HAVE_NO_DEPRECATED - Success +-- Performing Test HAVE_NO_UNNAMED_TYPE_TEMPLATE_ARGS +-- Performing Test HAVE_NO_UNNAMED_TYPE_TEMPLATE_ARGS - Failed +-- Looking for snprintf +Scanning dependencies of target libprotobuf-lite +[ 0%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/arena.cc.o +[ 1%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/arenastring.cc.o +[ 2%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/extension_set.cc.o +[ 2%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/generated_message_util.cc.o +[ 2%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/coded_stream.cc.o +[ 3%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/zero_copy_stream.cc.o +Scanning dependencies of target libprotobuf +-- Looking for snprintf - found +-- Looking for get_static_proc_name in unwind +[ 3%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/arena.cc.o +-- Looking for get_static_proc_name in unwind - not found +-- Performing Test HAVE___ATTRIBUTE__ +[ 4%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/arenastring.cc.o +[ 5%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/extension_set.cc.o +[ 5%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/generated_message_util.cc.o +-- Performing Test HAVE___ATTRIBUTE__ - Success +-- Performing Test HAVE___ATTRIBUTE__VISIBILITY_DEFAULT +[ 5%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/coded_stream.cc.o +[ 5%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/zero_copy_stream_impl_lite.cc.o +-- Performing Test HAVE___ATTRIBUTE__VISIBILITY_DEFAULT - Success +-- Performing Test HAVE___ATTRIBUTE__VISIBILITY_HIDDEN +[ 6%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/message_lite.cc.o +[ 6%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/repeated_field.cc.o +[ 7%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/zero_copy_stream.cc.o +-- Performing Test HAVE___ATTRIBUTE__VISIBILITY_HIDDEN - Success +-- Performing Test HAVE___BUILTIN_EXPECT +-- Performing Test HAVE___BUILTIN_EXPECT - Success +-- Performing Test HAVE___SYNC_VAL_COMPARE_AND_SWAP +[ 8%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/atomicops_internals_x86_gcc.cc.o +-- Performing Test HAVE___SYNC_VAL_COMPARE_AND_SWAP - Success +-- Performing Test HAVE_RWLOCK +[ 8%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/atomicops_internals_x86_msvc.cc.o +[ 8%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/zero_copy_stream_impl_lite.cc.o +-- Performing Test HAVE_RWLOCK - Failed +-- Performing Test HAVE___DECLSPEC +-- Performing Test HAVE___DECLSPEC - Failed +-- Performing Test STL_NO_NAMESPACE +[ 9%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/message_lite.cc.o +[ 9%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/repeated_field.cc.o +[ 10%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/atomicops_internals_x86_gcc.cc.o +-- Performing Test STL_NO_NAMESPACE - Failed +-- Performing Test STL_STD_NAMESPACE +[ 10%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/atomicops_internals_x86_msvc.cc.o +[ 11%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/bytestream.cc.o +[ 11%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/common.cc.o +[ 12%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/int128.cc.o +-- Performing Test STL_STD_NAMESPACE - Success +-- Performing Test HAVE_USING_OPERATOR +[ 12%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/once.cc.o +[ 13%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/status.cc.o +[ 14%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/bytestream.cc.o +[ 14%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/common.cc.o +[ 15%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/int128.cc.o +-- Performing Test HAVE_USING_OPERATOR - Success +-- Performing Test HAVE_NAMESPACES +[ 15%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/once.cc.o +[ 16%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/status.cc.o +-- Performing Test HAVE_NAMESPACES - Success +[ 16%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/statusor.cc.o +[ 17%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/stringprintf.cc.o +[ 17%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/stringpiece.cc.o +[ 18%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/structurally_valid.cc.o +[ 19%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/strutil.cc.o +[ 19%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/statusor.cc.o +[ 19%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/time.cc.o +[ 20%] Building CXX object CMakeFiles/libprotobuf-lite.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/wire_format_lite.cc.o +[ 21%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/stringpiece.cc.o +[ 21%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/stringprintf.cc.o +[ 22%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/structurally_valid.cc.o +[ 22%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/strutil.cc.o +[ 23%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/time.cc.o +[ 23%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/wire_format_lite.cc.o +[ 24%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/any.cc.o +[ 24%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/any.pb.cc.o +-- Configuring done +-- Generating done +-- Build files have been written to: /home/wangyue50/CINN-my/CINN/build/thirds/glog/src/extern_glog-build +[ 25%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/api.pb.cc.o +[ 18%] Performing build step for 'extern_glog' +Scanning dependencies of target glog +[ 12%] Building CXX object CMakeFiles/glog.dir/src/demangle.cc.o +[ 25%] Building CXX object CMakeFiles/glog.dir/src/logging.cc.o +[ 25%] Linking CXX static library libprotobuf-lite.a +[ 25%] Built target libprotobuf-lite +[ 37%] Building CXX object CMakeFiles/glog.dir/src/raw_logging.cc.o +[ 50%] Building CXX object CMakeFiles/glog.dir/src/symbolize.cc.o +[ 75%] Building CXX object CMakeFiles/glog.dir/src/utilities.cc.o +[ 75%] Building CXX object CMakeFiles/glog.dir/src/vlog_is_on.cc.o +[ 25%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/importer.cc.o +[ 26%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/parser.cc.o +[ 87%] Building CXX object CMakeFiles/glog.dir/src/signalhandler.cc.o +[ 26%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/descriptor.cc.o +[ 27%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/descriptor.pb.cc.o +[ 27%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/descriptor_database.cc.o +[ 28%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/duration.pb.cc.o +[ 28%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/dynamic_message.cc.o +[100%] Linking CXX static library libglog.a +[100%] Built target glog +[ 20%] Performing install step for 'extern_glog' +[100%] Built target glog +Install the project... +-- Install configuration: "" +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/lib/libglog.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/include/glog/config.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/include/glog/logging.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/include/glog/raw_logging.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/include/glog/stl_logging.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/include/glog/vlog_is_on.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/include/glog/log_severity.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/lib/cmake/glog/glog-config.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/lib/cmake/glog/glog-config-version.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/lib/cmake/glog/glog-targets.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/glog/lib/cmake/glog/glog-targets-noconfig.cmake +[ 20%] Completed 'extern_glog' +[ 20%] Built target extern_glog +[ 29%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/empty.pb.cc.o +Scanning dependencies of target cinn_gtest_main +[ 20%] Building CXX object cinn/CMakeFiles/cinn_gtest_main.dir/gtest_main.cc.o +[ 29%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/extension_set_heavy.cc.o +[ 30%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/field_mask.pb.cc.o +[ 30%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/generated_message_reflection.cc.o +[ 20%] Linking CXX static library libcinn_gtest_main.a +[ 20%] Built target cinn_gtest_main +[ 31%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/gzip_stream.cc.o +[ 31%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/printer.cc.o +[ 32%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/strtod.cc.o +[ 32%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/tokenizer.cc.o +[ 33%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/io/zero_copy_stream_impl.cc.o +[ 33%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/map_field.cc.o +[ 34%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/message.cc.o +[ 34%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/reflection_ops.cc.o +[ 35%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/service.cc.o +[ 35%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/source_context.pb.cc.o +[ 36%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/struct.pb.cc.o +[ 37%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/mathlimits.cc.o +[ 37%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/stubs/substitute.cc.o +[ 38%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/text_format.cc.o +[ 38%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/timestamp.pb.cc.o +[ 39%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/type.pb.cc.o +[ 39%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/unknown_field_set.cc.o +[ 40%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/field_comparator.cc.o +[ 40%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/field_mask_util.cc.o +[ 41%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/datapiece.cc.o +[ 41%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/default_value_objectwriter.cc.o +[ 42%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/error_listener.cc.o +[ 42%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/field_mask_utility.cc.o +[ 43%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/json_escaping.cc.o +[ 43%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/json_objectwriter.cc.o +[ 44%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/json_stream_parser.cc.o +[ 44%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/object_writer.cc.o +[ 45%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/proto_writer.cc.o +[ 45%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/protostream_objectsource.cc.o +[ 46%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/protostream_objectwriter.cc.o +[ 46%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/type_info.cc.o +[ 47%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/type_info_test_helper.cc.o +[ 47%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/internal/utility.cc.o +[ 48%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/json_util.cc.o +[ 48%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/message_differencer.cc.o +[ 49%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/time_util.cc.o +[ 49%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/util/type_resolver_util.cc.o +[ 50%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/wire_format.cc.o +[ 50%] Building CXX object CMakeFiles/libprotobuf.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/wrappers.pb.cc.o +[ 51%] Linking CXX static library libprotobuf.a +[ 51%] Built target libprotobuf +Scanning dependencies of target libprotoc +[ 52%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/code_generator.cc.o +[ 52%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/command_line_interface.cc.o +[ 52%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_enum_field.cc.o +[ 54%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_extension.cc.o +[ 54%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_field.cc.o +[ 54%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_enum.cc.o +[ 55%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_file.cc.o +[ 55%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_generator.cc.o +[ 56%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_helpers.cc.o +[ 56%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_map_field.cc.o +[ 57%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_message.cc.o +[ 57%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_message_field.cc.o +[ 58%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc.o +[ 58%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_service.cc.o +[ 59%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/cpp/cpp_string_field.cc.o +[ 59%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_doc_comment.cc.o +[ 60%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_enum.cc.o +[ 60%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_enum_field.cc.o +[ 61%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_field_base.cc.o +[ 61%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_generator.cc.o +[ 62%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_helpers.cc.o +[ 62%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_map_field.cc.o +[ 63%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_message.cc.o +[ 63%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_message_field.cc.o +[ 64%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_primitive_field.cc.o +[ 64%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_reflection_class.cc.o +[ 65%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_enum_field.cc.o +[ 65%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_message_field.cc.o +[ 66%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.cc.o +[ 66%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_source_generator_base.cc.o +[ 67%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/csharp/csharp_wrapper_field.cc.o +[ 67%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_context.cc.o +[ 68%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_doc_comment.cc.o +[ 68%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_enum.cc.o +[ 69%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_enum_field.cc.o +[ 69%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_enum_field_lite.cc.o +[ 70%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_enum_lite.cc.o +[ 70%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_extension.cc.o +[ 71%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_extension_lite.cc.o +[ 71%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_field.cc.o +[ 72%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_file.cc.o +[ 72%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_generator.cc.o +[ 73%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_helpers.cc.o +[ 73%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_generator_factory.cc.o +[ 74%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_lazy_message_field.cc.o +[ 75%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_lazy_message_field_lite.cc.o +[ 75%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_map_field.cc.o +[ 76%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_map_field_lite.cc.o +[ 76%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_message.cc.o +[ 77%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_message_builder.cc.o +[ 77%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_message_builder_lite.cc.o +[ 78%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_message_field.cc.o +[ 78%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_message_field_lite.cc.o +[ 78%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_name_resolver.cc.o +[ 79%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_message_lite.cc.o +[ 80%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_primitive_field.cc.o +[ 80%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_primitive_field_lite.cc.o +[ 81%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_service.cc.o +[ 81%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_shared_code_generator.cc.o +[ 82%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_string_field.cc.o +[ 82%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/java/java_string_field_lite.cc.o +[ 83%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_enum.cc.o +[ 83%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_enum_field.cc.o +[ 84%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_extension.cc.o +[ 84%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_field.cc.o +[ 85%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_file.cc.o +[ 85%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_generator.cc.o +[ 86%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_helpers.cc.o +[ 86%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_map_field.cc.o +[ 87%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_message.cc.o +[ 87%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_message_field.cc.o +[ 88%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/javanano/javanano_primitive_field.cc.o +[ 88%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/js/js_generator.cc.o +[ 89%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_enum.cc.o +[ 89%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_enum_field.cc.o +[ 90%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_extension.cc.o +[ 90%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_field.cc.o +[ 91%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_file.cc.o +[ 91%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_generator.cc.o +[ 92%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc.o +[ 92%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_map_field.cc.o +[ 93%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_message.cc.o +[ 93%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_message_field.cc.o +[ 94%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_oneof.cc.o +[ 94%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/objectivec/objectivec_primitive_field.cc.o +[ 95%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/php/php_generator.cc.o +[ 95%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/plugin.cc.o +[ 96%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/plugin.pb.cc.o +[ 96%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/python/python_generator.cc.o +[ 97%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/ruby/ruby_generator.cc.o +[ 97%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/subprocess.cc.o +[ 98%] Building CXX object CMakeFiles/libprotoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/zip_writer.cc.o +[ 98%] Linking CXX static library libprotoc.a +[ 98%] Built target libprotoc +Scanning dependencies of target protoc +[ 99%] Building CXX object CMakeFiles/protoc.dir/home/wangyue50/CINN-my/CINN/build/thirds/protobuf/src/extern_protobuf/src/google/protobuf/compiler/main.cc.o +[100%] Linking CXX executable protoc +[100%] Built target protoc +[ 20%] Performing install step for 'extern_protobuf' +[ 12%] Built target libprotobuf-lite +[ 51%] Built target libprotobuf +[ 98%] Built target libprotoc +[100%] Built target protoc +Install the project... +-- Install configuration: "" +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/libprotobuf-lite.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/libprotobuf.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/libprotoc.a +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/bin/protoc +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/any.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/any.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/api.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/arena.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/arenastring.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/code_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/command_line_interface.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/cpp/cpp_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/csharp/csharp_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/csharp/csharp_names.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/importer.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/java/java_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/java/java_names.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/javanano/javanano_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/js/js_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/objectivec/objectivec_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/objectivec/objectivec_helpers.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/parser.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/php/php_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/plugin.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/plugin.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/python/python_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/ruby/ruby_generator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/descriptor.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/descriptor.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/descriptor_database.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/duration.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/dynamic_message.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/empty.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/extension_set.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/field_mask.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/generated_enum_reflection.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/generated_enum_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/generated_message_reflection.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/generated_message_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/has_bits.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/coded_stream.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/gzip_stream.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/printer.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/strtod.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/tokenizer.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/zero_copy_stream.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/zero_copy_stream_impl.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/io/zero_copy_stream_impl_lite.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map_entry.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map_entry_lite.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map_field.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map_field_inl.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map_field_lite.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/map_type_handler.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/message.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/message_lite.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/metadata.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/reflection.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/reflection_ops.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/repeated_field.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/service.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/source_context.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/struct.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomic_sequence_num.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_arm64_gcc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_arm_gcc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_arm_qnx.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_atomicword_compat.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_generic_c11_atomic.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_generic_gcc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_macosx.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_mips_gcc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_power.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_ppc_gcc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_solaris.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_tsan.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_x86_gcc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/atomicops_internals_x86_msvc.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/bytestream.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/callback.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/casts.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/common.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/fastmem.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/hash.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/logging.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/macros.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/mutex.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/once.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/platform_macros.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/port.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/scoped_ptr.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/shared_ptr.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/singleton.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/status.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/stl_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/stringpiece.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/template_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/stubs/type_traits.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/text_format.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/timestamp.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/type.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/unknown_field_set.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/field_comparator.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/field_mask_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/json_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/message_differencer.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/time_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/type_resolver.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/util/type_resolver_util.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/wire_format.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/wire_format_lite.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/wire_format_lite_inl.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/wrappers.pb.h +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/descriptor.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/any.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/api.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/duration.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/empty.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/field_mask.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/source_context.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/struct.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/timestamp.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/type.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/wrappers.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/include/google/protobuf/compiler/plugin.proto +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf/protobuf-targets.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf/protobuf-targets-noconfig.cmake +-- Up-to-date: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf/protobuf-config-version.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf/protobuf-config.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf/protobuf-module.cmake +-- Installing: /home/wangyue50/CINN-my/CINN/build/thirds/install/protobuf/lib/cmake/protobuf/protobuf-options.cmake +[ 22%] Completed 'extern_protobuf' +[ 22%] Built target extern_protobuf +[ 24%] Running C++ protocol buffer compiler on framework.proto +Scanning dependencies of target framework_proto +[ 24%] Building CXX object cinn/frontend/paddle/CMakeFiles/framework_proto.dir/framework.pb.cc.o +[ 24%] Linking CXX static library libframework_proto.a +[ 24%] Built target framework_proto +Scanning dependencies of target core +[ 24%] Building CXX object CMakeFiles/core.dir/cinn/common/shared.cc.o +[ 24%] Building CXX object CMakeFiles/core.dir/cinn/common/target.cc.o +[ 24%] Building CXX object CMakeFiles/core.dir/cinn/common/type.cc.o +[ 24%] Building CXX object CMakeFiles/core.dir/cinn/common/info_registry.cc.o +[ 24%] Building CXX object CMakeFiles/core.dir/cinn/common/graph_utils.cc.o +[ 24%] Building CXX object CMakeFiles/core.dir/cinn/common/debug_manager.cc.o +[ 26%] Building CXX object CMakeFiles/core.dir/cinn/common/cinn_value.cc.o +[ 28%] Building CXX object CMakeFiles/core.dir/cinn/common/object.cc.o +[ 30%] Building CXX object CMakeFiles/core.dir/cinn/common/context.cc.o +[ 30%] Building CXX object CMakeFiles/core.dir/cinn/common/axis.cc.o +[ 30%] Building CXX object CMakeFiles/core.dir/cinn/common/ir_util.cc.o +[ 32%] Building CXX object CMakeFiles/core.dir/cinn/common/test_helper.cc.o +[ 32%] Building CXX object CMakeFiles/core.dir/cinn/common/arithmatic.cc.o +[ 32%] Building CXX object CMakeFiles/core.dir/cinn/common/cas.cc.o +[ 32%] Building CXX object CMakeFiles/core.dir/cinn/common/union_find.cc.o +[ 33%] Building CXX object CMakeFiles/core.dir/cinn/utils/string.cc.o +[ 33%] Building CXX object CMakeFiles/core.dir/cinn/utils/functional.cc.o +[ 33%] Building CXX object CMakeFiles/core.dir/cinn/utils/dot_lang.cc.o +[ 35%] Building CXX object CMakeFiles/core.dir/cinn/utils/timer.cc.o +[ 35%] Building CXX object CMakeFiles/core.dir/cinn/poly/dim.cc.o +[ 35%] Building CXX object CMakeFiles/core.dir/cinn/poly/domain.cc.o +[ 35%] Building CXX object CMakeFiles/core.dir/cinn/poly/map.cc.o +[ 37%] Building CXX object CMakeFiles/core.dir/cinn/poly/stage.cc.o +[ 37%] Building CXX object CMakeFiles/core.dir/cinn/poly/isl_utils.cc.o +[ 37%] Building CXX object CMakeFiles/core.dir/cinn/poly/schedule.cc.o +[ 37%] Building CXX object CMakeFiles/core.dir/cinn/poly/poly_scheduler.cc.o +[ 39%] Building CXX object CMakeFiles/core.dir/cinn/poly/naive_scheduler.cc.o +[ 39%] Building CXX object CMakeFiles/core.dir/cinn/poly/ast_gen.cc.o +[ 39%] Building CXX object CMakeFiles/core.dir/cinn/poly/graph.cc.o +[ 41%] Building CXX object CMakeFiles/core.dir/cinn/poly/compute_at_transform.cc.o +[ 41%] Building CXX object CMakeFiles/core.dir/cinn/runtime/cpu/host_intrinsics.cc.o +[ 41%] Building CXX object CMakeFiles/core.dir/cinn/runtime/cpu/mkl_math.cc.o +[ 43%] Building CXX object CMakeFiles/core.dir/cinn/runtime/cpu/cblas.cc.o +[ 43%] Building CXX object CMakeFiles/core.dir/cinn/runtime/intrinsic.cc.o +[ 43%] Building CXX object CMakeFiles/core.dir/cinn/runtime/cinn_runtime.cc.o +[ 45%] Building CXX object CMakeFiles/core.dir/cinn/runtime/intrinsic_types.cc.o +[ 45%] Building CXX object CMakeFiles/core.dir/cinn/ir/node.cc.o +[ 45%] Building CXX object CMakeFiles/core.dir/cinn/ir/ir.cc.o +[ 45%] Building CXX object CMakeFiles/core.dir/cinn/ir/ir_visitor.cc.o +[ 47%] Building CXX object CMakeFiles/core.dir/cinn/ir/ir_printer.cc.o +[ 47%] Building CXX object CMakeFiles/core.dir/cinn/ir/ir_mutator.cc.o +[ 47%] Building CXX object CMakeFiles/core.dir/cinn/ir/function_definition.cc.o +[ 49%] Building CXX object CMakeFiles/core.dir/cinn/ir/lowered_func.cc.o +[ 49%] Building CXX object CMakeFiles/core.dir/cinn/ir/ir_operators.cc.o +[ 49%] Building CXX object CMakeFiles/core.dir/cinn/ir/buffer.cc.o +[ 49%] Building CXX object CMakeFiles/core.dir/cinn/ir/function_base.cc.o +[ 50%] Building CXX object CMakeFiles/core.dir/cinn/ir/operation.cc.o +[ 50%] Building CXX object CMakeFiles/core.dir/cinn/ir/collect_ir_nodes.cc.o +[ 50%] Building CXX object CMakeFiles/core.dir/cinn/ir/registry.cc.o +[ 52%] Building CXX object CMakeFiles/core.dir/cinn/ir/tensor.cc.o +[ 52%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/llvm_util.cc.o +[ 52%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/runtime_symbol_registry.cc.o +[ 52%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/codegen_llvm.cc.o +[ 54%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/codegen_x86.cc.o +[ 54%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/simple_jit.cc.o +[ 54%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/execution_engine.cc.o +[ 56%] Building CXX object CMakeFiles/core.dir/cinn/backends/llvm/llvm_optimizer.cc.o +[ 56%] Building CXX object CMakeFiles/core.dir/cinn/backends/outputs.cc.o +[ 56%] Building CXX object CMakeFiles/core.dir/cinn/backends/codegen_c.cc.o +[ 56%] Building CXX object CMakeFiles/core.dir/cinn/backends/codegen_c_x86.cc.o +[ 58%] Building CXX object CMakeFiles/core.dir/cinn/backends/codegen_c_x86_test.cc.o +[ 58%] Building CXX object CMakeFiles/core.dir/cinn/backends/codegen_cuda_dev.cc.o +[ 58%] Building CXX object CMakeFiles/core.dir/cinn/backends/codegen_cuda_host.cc.o +[ 60%] Building CXX object CMakeFiles/core.dir/cinn/backends/extern_func_emitter.cc.o +[ 60%] Building CXX object CMakeFiles/core.dir/cinn/backends/extern_func_emitter_builtin.cc.o +[ 60%] Building CXX object CMakeFiles/core.dir/cinn/backends/function_prototype.cc.o +[ 60%] Building CXX object CMakeFiles/core.dir/cinn/backends/extern_func_protos.cc.o +[ 62%] Building CXX object CMakeFiles/core.dir/cinn/backends/extern_func_jit_register.cc.o +[ 62%] Building CXX object CMakeFiles/core.dir/cinn/backends/modular.cc.o +[ 62%] Building CXX object CMakeFiles/core.dir/cinn/backends/compiler.cc.o +[ 64%] Building CXX object CMakeFiles/core.dir/cinn/lang/buffer.cc.o +[ 64%] Building CXX object CMakeFiles/core.dir/cinn/lang/compute.cc.o +[ 64%] Building CXX object CMakeFiles/core.dir/cinn/lang/placeholder.cc.o +[ 64%] Building CXX object CMakeFiles/core.dir/cinn/lang/module.cc.o +[ 66%] Building CXX object CMakeFiles/core.dir/cinn/lang/lower.cc.o +[ 66%] Building CXX object CMakeFiles/core.dir/cinn/lang/builtin.cc.o +[ 66%] Building CXX object CMakeFiles/core.dir/cinn/lang/lower_impl.cc.o +[ 67%] Building CXX object CMakeFiles/core.dir/cinn/lang/compute_at_postprocess.cc.o +[ 67%] Building CXX object CMakeFiles/core.dir/cinn/lang/packed_func.cc.o +[ 67%] Building CXX object CMakeFiles/core.dir/cinn/optim/remove_nested_block.cc.o +[ 69%] Building CXX object CMakeFiles/core.dir/cinn/optim/replace_call_with_expr.cc.o +[ 69%] Building CXX object CMakeFiles/core.dir/cinn/optim/ir_copy.cc.o +[ 69%] Building CXX object CMakeFiles/core.dir/cinn/optim/ir_replace.cc.o +[ 69%] Building CXX object CMakeFiles/core.dir/cinn/optim/replace_var_with_expr.cc.o +[ 71%] Building CXX object CMakeFiles/core.dir/cinn/optim/tensor_write_tell.cc.o +[ 71%] Building CXX object CMakeFiles/core.dir/cinn/optim/ir_simplify.cc.o +[ 71%] Building CXX object CMakeFiles/core.dir/cinn/optim/optimize.cc.o +[ 73%] Building CXX object CMakeFiles/core.dir/cinn/optim/vectorize_loops.cc.o +[ 73%] Building CXX object CMakeFiles/core.dir/cinn/optim/unroll_loops.cc.o +[ 73%] Building CXX object CMakeFiles/core.dir/cinn/optim/transform_polyfor_to_for.cc.o +[ 73%] Building CXX object CMakeFiles/core.dir/cinn/optim/eliminate_broadcast_in_forloop.cc.o +[ 75%] Building CXX object CMakeFiles/core.dir/cinn/optim/fold_cinn_call_arguments.cc.o +[ 75%] Building CXX object CMakeFiles/core.dir/cinn/optim/call_arg_list_to_pod_value.cc.o +[ 75%] Building CXX object CMakeFiles/core.dir/cinn/optim/insert_debug_log_callee.cc.o +[ 77%] Building CXX object CMakeFiles/core.dir/cinn/optim/lower_function_call_bind_vars.cc.o +[ 77%] Building CXX object CMakeFiles/core.dir/cinn/optim/extern_call_process.cc.o +[ 77%] Building CXX object CMakeFiles/core.dir/cinn/optim/map_extern_call.cc.o +[ 77%] Building CXX object CMakeFiles/core.dir/cinn/optim/cache_read_write_replace.cc.o +[ 79%] Building CXX object CMakeFiles/core.dir/cinn/optim/compute_inline_expand.cc.o +[ 79%] Building CXX object CMakeFiles/core.dir/cinn/optim/buffer_assign.cc.o +[ 79%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/tensor.cc.o +[ 81%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/scope.cc.o +[ 81%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/variable.cc.o +[ 81%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/buffer.cc.o +[ 81%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/memory.cc.o +[ 83%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/instruction.cc.o +[ 83%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/graph_compiler.cc.o +[ 83%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/graph.cc.o +[ 84%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/node.cc.o +[ 84%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/pass.cc.o +[ 84%] Building CXX object CMakeFiles/core.dir/cinn/hlir/framework/op_strategy.cc.o +[ 84%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/broadcast.cc.o +[ 86%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/elementwise.cc.o +[ 86%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/nn.cc.o +[ 86%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/reduction.cc.o +[ 88%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/schedule.cc.o +[ 88%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/transform.cc.o +[ 88%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pe/vision.cc.o +[ 88%] Building CXX object CMakeFiles/core.dir/cinn/hlir/op/nn.cc.o +[ 90%] Building CXX object CMakeFiles/core.dir/cinn/hlir/op/broadcast.cc.o +[ 90%] Building CXX object CMakeFiles/core.dir/cinn/hlir/op/transform.cc.o +[ 90%] Building CXX object CMakeFiles/core.dir/cinn/hlir/pass/infershape.cc.o +[ 92%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/cpp/var_desc.cc.o +[ 92%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/cpp/op_desc.cc.o +[ 92%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/cpp/block_desc.cc.o +[ 94%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/cpp/program_desc.cc.o +[ 94%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/pb/var_desc.cc.o +[ 94%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/pb/op_desc.cc.o +[ 94%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/pb/block_desc.cc.o +[ 96%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/pb/program_desc.cc.o +[ 96%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/model_parser.cc.o +[ 96%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle/compatible_pb.cc.o +[ 98%] Building CXX object CMakeFiles/core.dir/cinn/frontend/syntax.cc.o +[ 98%] Building CXX object CMakeFiles/core.dir/cinn/frontend/paddle_model_to_program.cc.o +[ 98%] Building CXX object CMakeFiles/core.dir/cinn/frontend/executor.cc.o +[ 98%] Linking CXX static library libcore.a +[ 98%] Built target core +Scanning dependencies of target test01_elementwise_add_main +[ 98%] Building CXX object tests/CMakeFiles/test01_elementwise_add_main.dir/test01_elementwise_add_main.cc.o +[100%] Linking CXX executable test01_elementwise_add_main +[100%] Built target test01_elementwise_add_main +[ 5%] Built target extern_protobuf +[ 9%] Built target extern_gtest +[ 16%] Built target extern_mklml +[ 16%] Built target extern_gflags +[ 16%] Built target GEN_LLVM_RUNTIME_IR_HEADER +[ 22%] Built target extern_glog +[ 22%] Built target cinn_gtest_main +[ 24%] Built target framework_proto +[ 98%] Built target core +Scanning dependencies of target test02_matmul_main +[100%] Building CXX object tests/CMakeFiles/test02_matmul_main.dir/test02_matmul_main.cc.o +[100%] Linking CXX executable test02_matmul_main +[100%] Built target test02_matmul_main +[ 5%] Built target extern_protobuf +[ 9%] Built target extern_gflags +[ 9%] Built target GEN_LLVM_RUNTIME_IR_HEADER +[ 13%] Built target extern_mklml +[ 17%] Built target extern_gtest +[ 23%] Built target extern_glog +[ 23%] Built target cinn_gtest_main +[ 25%] Built target framework_proto +[100%] Built target core +Scanning dependencies of target test03_conv_main +[100%] Building CXX object tests/CMakeFiles/test03_conv_main.dir/test03_convolution_main.cc.o +[100%] Linking CXX executable test03_conv_main +[100%] Built target test03_conv_main +Test project /home/wangyue50/CINN-my/CINN/build + Start 51: test01_elementwise_add_main +1/1 Test #51: test01_elementwise_add_main ...... Passed 0.09 sec + +100% tests passed, 0 tests failed out of 1 + +Total Test time (real) = 0.09 sec +Test project /home/wangyue50/CINN-my/CINN/build + Start 53: test02_matmul_main +1/1 Test #53: test02_matmul_main ............... Passed 0.52 sec + +100% tests passed, 0 tests failed out of 1 + +Total Test time (real) = 0.52 sec +Test project /home/wangyue50/CINN-my/CINN/build + Start 55: test03_conv_main +1/1 Test #55: test03_conv_main ................. Passed 0.13 sec + +100% tests passed, 0 tests failed out of 1 + +Total Test time (real) = 0.13 sec +Scanning dependencies of target extern_pybind +Scanning dependencies of target extern_lite_download_lite_naive_model_tar_gz +[ 2%] Built target extern_mklml +[ 5%] Built target extern_gflags +[ 5%] Creating directories for 'extern_pybind' +[ 5%] Built target GEN_LLVM_RUNTIME_IR_HEADER +[ 8%] Built target extern_protobuf +[ 10%] Creating directories for 'extern_lite_download_lite_naive_model_tar_gz' +[ 11%] Built target extern_gtest +[ 14%] Built target extern_glog +Scanning dependencies of target __x86_source_fake_lib +Scanning dependencies of target cinn_runtime +[ 14%] Building CXX object cinn/backends/CMakeFiles/__x86_source_fake_lib.dir/_x86_builtin_source.cc.o +[ 15%] Built target cinn_gtest_main +[ 15%] Built target framework_proto +[ 16%] Building CXX object cinn/runtime/CMakeFiles/cinn_runtime.dir/buffer.cc.o +[ 16%] Building CXX object cinn/runtime/CMakeFiles/cinn_runtime.dir/cinn_runtime.cc.o +[ 16%] Performing download step (git clone) for 'extern_pybind' +[ 16%] Performing download step for 'extern_lite_download_lite_naive_model_tar_gz' +[ 55%] Built target core +Scanning dependencies of target test_shared +Scanning dependencies of target test_graph_utils +Scanning dependencies of target test_cinn_value +[ 55%] Building CXX object cinn/common/CMakeFiles/test_shared.dir/shared_test.cc.o +[ 55%] Building CXX object cinn/common/CMakeFiles/test_graph_utils.dir/graph_utils_test.cc.o +[ 55%] Building CXX object cinn/common/CMakeFiles/test_cinn_value.dir/cinn_value_test.cc.o +Scanning dependencies of target test_arithmatic +[ 56%] Building CXX object cinn/common/CMakeFiles/test_arithmatic.dir/arithmatic_test.cc.o +[ 57%] Linking CXX static library lib__x86_source_fake_lib.a +[ 57%] Linking CXX static library libcinn_runtime.a +[ 57%] Built target __x86_source_fake_lib +Scanning dependencies of target test_cas +[ 57%] Built target cinn_runtime +[ 57%] Building CXX object cinn/common/CMakeFiles/test_cas.dir/cas_test.cc.o +Scanning dependencies of target test_string +[ 57%] Building CXX object cinn/utils/CMakeFiles/test_string.dir/string_test.cc.o +[ 58%] Linking CXX executable test_shared +[ 59%] No patch step for 'extern_lite_download_lite_naive_model_tar_gz' +[ 59%] No update step for 'extern_lite_download_lite_naive_model_tar_gz' +[ 60%] Linking CXX executable test_string +[ 60%] No configure step for 'extern_lite_download_lite_naive_model_tar_gz' +[ 60%] No build step for 'extern_lite_download_lite_naive_model_tar_gz' +[ 60%] No install step for 'extern_lite_download_lite_naive_model_tar_gz' +[ 60%] Completed 'extern_lite_download_lite_naive_model_tar_gz' +[ 60%] Built target extern_lite_download_lite_naive_model_tar_gz +Scanning dependencies of target test_schedule +[ 61%] Building CXX object cinn/poly/CMakeFiles/test_schedule.dir/schedule_test.cc.o +[ 61%] Built target test_shared +Scanning dependencies of target test_compute_at_transform +[ 61%] Building CXX object cinn/poly/CMakeFiles/test_compute_at_transform.dir/compute_at_transform_test.cc.o +[ 61%] Built target test_string +Scanning dependencies of target test_stage +[ 61%] Building CXX object cinn/poly/CMakeFiles/test_stage.dir/stage_test.cc.o +[ 61%] Linking CXX executable test_graph_utils +[ 62%] Linking CXX executable test_cinn_value +[ 62%] Linking CXX executable test_arithmatic +[ 63%] Linking CXX executable test_cas +[ 63%] Linking CXX executable test_compute_at_transform +[ 63%] Linking CXX executable test_schedule +[ 63%] Built target test_graph_utils +Scanning dependencies of target test_cinn_runtime +[ 63%] Building CXX object cinn/runtime/CMakeFiles/test_cinn_runtime.dir/cinn_runtime_test.cc.o +[ 63%] Built target test_cinn_value +Scanning dependencies of target test_mkl_math +[ 64%] Building CXX object cinn/runtime/cpu/CMakeFiles/test_mkl_math.dir/mkl_math_test.cc.o +[ 64%] Linking CXX executable test_cinn_runtime +[ 64%] Built target test_arithmatic +[ 64%] Building CXX object cinn/runtime/cpu/CMakeFiles/test_mkl_math.dir/mkl_math.cc.o +[ 64%] Built target test_cinn_runtime +Scanning dependencies of target test_host_intrinsics +[ 65%] Building CXX object cinn/runtime/cpu/CMakeFiles/test_host_intrinsics.dir/host_intrinsics_test.cc.o +[ 65%] Built target test_compute_at_transform +[ 65%] Built target test_cas +Scanning dependencies of target test_tensor +Scanning dependencies of target test_collect_ir_nodes +[ 65%] Building CXX object cinn/ir/CMakeFiles/test_tensor.dir/tensor_test.cc.o +[ 65%] Building CXX object cinn/ir/CMakeFiles/test_collect_ir_nodes.dir/collect_ir_nodes_test.cc.o +Submodule path 'tools/clang': checked out '6a00cbc4a9b8e68b71caf7f774b3f9c753ae84d5' +[ 65%] No patch step for 'extern_pybind' +[ 65%] No update step for 'extern_pybind' +[ 66%] No configure step for 'extern_pybind' +[ 66%] No build step for 'extern_pybind' +[ 66%] No install step for 'extern_pybind' +[ 66%] No test step for 'extern_pybind' +[ 66%] Completed 'extern_pybind' +[ 66%] Built target test_schedule +Scanning dependencies of target test_buffer +[ 66%] Built target extern_pybind +[ 66%] Building CXX object cinn/ir/CMakeFiles/test_buffer.dir/buffer_test.cc.o +Scanning dependencies of target test_codegen_c +[ 66%] Building CXX object cinn/backends/CMakeFiles/test_codegen_c.dir/codegen_c_test.cc.o +[ 67%] Linking CXX executable test_collect_ir_nodes +[ 67%] Linking CXX executable test_buffer +Scanning dependencies of target test_codegen_c_x86 +[ 68%] Building CXX object cinn/backends/CMakeFiles/test_codegen_c_x86.dir/codegen_c_x86_test.cc.o +[ 68%] Built target test_collect_ir_nodes +Scanning dependencies of target test_compiler +[ 68%] Building CXX object cinn/backends/CMakeFiles/test_compiler.dir/compiler_test.cc.o +[ 68%] Built target test_buffer +Scanning dependencies of target test_codegen_llvm +[ 68%] Building CXX object cinn/backends/llvm/CMakeFiles/test_codegen_llvm.dir/codegen_llvm_test.cc.o +[ 68%] Linking CXX executable test_codegen_c_x86 +[ 68%] Linking CXX executable test_stage +[ 68%] Linking CXX executable test_codegen_c +[ 68%] Built target test_codegen_c_x86 +Scanning dependencies of target test_execution_engine +[ 69%] Building CXX object cinn/backends/llvm/CMakeFiles/test_execution_engine.dir/execution_engine_test.cc.o +[ 69%] Built target test_stage +[ 69%] Linking CXX executable test_mkl_math +[ 69%] Built target test_codegen_c +Scanning dependencies of target test_placeholder +Scanning dependencies of target test_codegen_x86 +[ 69%] Building CXX object cinn/lang/CMakeFiles/test_placeholder.dir/placeholder_test.cc.o +[ 69%] Building CXX object cinn/backends/llvm/CMakeFiles/test_codegen_x86.dir/codegen_x86_test.cc.o +[ 69%] Linking CXX executable test_tensor +[ 69%] Linking CXX executable test_host_intrinsics +[ 69%] Linking CXX executable test_placeholder +[ 69%] Built target test_tensor +[ 69%] Built target test_mkl_math +Scanning dependencies of target test_compute +Scanning dependencies of target test_lower +[ 70%] Building CXX object cinn/lang/CMakeFiles/test_compute.dir/compute_test.cc.o +[ 70%] Building CXX object cinn/lang/CMakeFiles/test_lower.dir/lower_test.cc.o +[ 71%] Linking CXX executable test_codegen_llvm +[ 71%] Built target test_host_intrinsics +Scanning dependencies of target test_lower_impl +[ 71%] Building CXX object cinn/lang/CMakeFiles/test_lower_impl.dir/lower_impl_test.cc.o +[ 71%] Built target test_placeholder +Scanning dependencies of target test_packed_func +[ 72%] Building CXX object cinn/lang/CMakeFiles/test_packed_func.dir/packed_func_test.cc.o +[ 72%] Linking CXX executable test_compute +[ 72%] Built target test_codegen_llvm +Scanning dependencies of target test_cache_read_write_replace +[ 73%] Building CXX object cinn/optim/CMakeFiles/test_cache_read_write_replace.dir/cache_read_write_replace_test.cc.o +[ 74%] Linking CXX executable test_lower +[ 74%] Linking CXX executable test_compiler +[ 74%] Linking CXX executable test_lower_impl +[ 74%] Linking CXX executable test_packed_func +[ 74%] Built target test_compute +Scanning dependencies of target test_optimize +[ 74%] Building CXX object cinn/optim/CMakeFiles/test_optimize.dir/optimize_test.cc.o +[ 74%] Linking CXX executable test_cache_read_write_replace +[ 74%] Built target test_lower +Scanning dependencies of target test_ir_copy +[ 74%] Building CXX object cinn/optim/CMakeFiles/test_ir_copy.dir/ir_copy_test.cc.o +[ 74%] Built target test_packed_func +Scanning dependencies of target test_remove_nested_block +[ 75%] Building CXX object cinn/optim/CMakeFiles/test_remove_nested_block.dir/remove_nested_block_test.cc.o +[ 75%] Built target test_compiler +Scanning dependencies of target test_replace_call_with_expr +[ 75%] Building CXX object cinn/optim/CMakeFiles/test_replace_call_with_expr.dir/replace_call_with_expr_test.cc.o +[ 75%] Linking CXX executable test_codegen_x86 +[ 75%] Linking CXX executable test_optimize +[ 75%] Built target test_lower_impl +Scanning dependencies of target test_ir_simplify +[ 76%] Building CXX object cinn/optim/CMakeFiles/test_ir_simplify.dir/ir_simplify_test.cc.o +[ 76%] Built target test_cache_read_write_replace +Scanning dependencies of target test_vectorize_loops +[ 76%] Building CXX object cinn/optim/CMakeFiles/test_vectorize_loops.dir/vectorize_loops_test.cc.o +[ 76%] Linking CXX executable test_execution_engine +[ 76%] Linking CXX executable test_ir_copy +[ 76%] Linking CXX executable test_remove_nested_block +[ 76%] Linking CXX executable test_replace_call_with_expr +[ 76%] Built target test_codegen_x86 +[ 76%] Linking CXX executable test_ir_simplify +Scanning dependencies of target test_transform_polyfor_to_for +[ 77%] Building CXX object cinn/optim/CMakeFiles/test_transform_polyfor_to_for.dir/transform_polyfor_to_for_test.cc.o +[ 77%] Built target test_optimize +Scanning dependencies of target test_hlir_framework_print_graph_pass +[ 77%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_print_graph_pass.dir/print_graph_pass_test.cc.o +[ 77%] Built target test_ir_copy +[ 78%] Linking CXX executable test_vectorize_loops +Scanning dependencies of target test_hlir_framework_infershape_pass +[ 78%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_infershape_pass.dir/infershape_pass_test.cc.o +[ 78%] Built target test_remove_nested_block +[ 78%] Built target test_replace_call_with_expr +Scanning dependencies of target test_hlir_framework_buffer +[ 79%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_buffer.dir/buffer_test.cc.o +Scanning dependencies of target test_hlir_framework_scope +[ 79%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_scope.dir/scope_test.cc.o +[ 79%] Built target test_execution_engine +Scanning dependencies of target test_hlir_framework_tensor +[ 79%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_tensor.dir/tensor_test.cc.o +[ 79%] Linking CXX executable test_hlir_framework_buffer +[ 79%] Linking CXX executable test_transform_polyfor_to_for +[ 79%] Built target test_hlir_framework_buffer +Scanning dependencies of target test_hlir_framework_instruction +[ 80%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_instruction.dir/instruction_test.cc.o +[ 80%] Built target test_ir_simplify +[ 80%] Linking CXX executable test_hlir_framework_tensor +[ 81%] Linking CXX executable test_hlir_framework_scope +Scanning dependencies of target test_hlir_framework_op +[ 81%] Building CXX object cinn/hlir/framework/CMakeFiles/test_hlir_framework_op.dir/op_test.cc.o +[ 81%] Linking CXX executable test_hlir_framework_print_graph_pass +[ 81%] Built target test_vectorize_loops +Scanning dependencies of target test_pe_transform +[ 81%] Building CXX object cinn/hlir/pe/CMakeFiles/test_pe_transform.dir/pe_transform_test.cc.o +[ 81%] Built target test_hlir_framework_tensor +[ 81%] Built target test_hlir_framework_scope +[ 81%] Built target test_transform_polyfor_to_for +Scanning dependencies of target test_pe_elementwise +[ 81%] Building CXX object cinn/hlir/pe/CMakeFiles/test_pe_elementwise.dir/pe_elementwise_test.cc.o +Scanning dependencies of target test_op_broadcast +Scanning dependencies of target test_pe_broadcast +[ 81%] Building CXX object cinn/hlir/op/CMakeFiles/test_op_broadcast.dir/op_broadcast_test.cc.o +[ 81%] Building CXX object cinn/hlir/pe/CMakeFiles/test_pe_broadcast.dir/pe_broadcast_test.cc.o +[ 81%] Built target test_hlir_framework_print_graph_pass +Scanning dependencies of target test_op_nn +[ 82%] Building CXX object cinn/hlir/op/CMakeFiles/test_op_nn.dir/op_nn_test.cc.o +[ 83%] Linking CXX executable test_hlir_framework_op +[ 83%] Linking CXX executable test_op_broadcast +[ 83%] Built target test_hlir_framework_op +Scanning dependencies of target core_api +[ 84%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/runtime.cc.o +[ 84%] Linking CXX executable test_hlir_framework_infershape_pass +[ 84%] Built target test_op_broadcast +[ 84%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/common.cc.o +[ 85%] Linking CXX executable test_pe_transform +[ 85%] Linking CXX executable test_hlir_framework_instruction +[ 85%] Built target test_hlir_framework_infershape_pass +[ 85%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/lang.cc.o +[ 86%] Linking CXX executable test_pe_broadcast +[ 86%] Linking CXX executable test_pe_elementwise +[ 86%] Built target test_pe_transform +[ 87%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/ir.cc.o +[ 87%] Built target test_hlir_framework_instruction +[ 87%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/poly.cc.o +[ 87%] Linking CXX executable test_op_nn +Scanning dependencies of target test_frontend_syntax +[ 87%] Building CXX object cinn/frontend/CMakeFiles/test_frontend_syntax.dir/syntax_test.cc.o +[ 87%] Built target test_pe_elementwise +[ 87%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/backends.cc.o +[ 87%] Built target test_pe_broadcast +[ 87%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/bind.cc.o +[ 87%] Built target test_op_nn +[ 88%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/optim.cc.o +Scanning dependencies of target test_frontend_executor +[ 88%] Building CXX object cinn/frontend/CMakeFiles/test_frontend_executor.dir/executor_test.cc.o +Scanning dependencies of target test_model_parser +[ 88%] Building CXX object cinn/frontend/paddle/CMakeFiles/test_model_parser.dir/model_parser_test.cc.o +[ 89%] Built target test01_elementwise_add_main +[ 89%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/pe.cc.o +[ 89%] Built target test03_conv_main +[ 89%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/frontend.cc.o +[ 90%] Built target test02_matmul_main +[ 91%] Building CXX object cinn/pybind/CMakeFiles/core_api.dir/framework.cc.o +[ 92%] Linking CXX executable test_model_parser +[ 92%] Built target test_model_parser +Scanning dependencies of target test_generated1 +[ 92%] Building CXX object cinn/backends/CMakeFiles/test_generated1.dir/generated_module1.cc.o +[ 93%] Linking CXX executable test_generated1 +[ 93%] Built target test_generated1 +Scanning dependencies of target test01_elementwise_add_case +[ 94%] Building CXX object tests/CMakeFiles/test01_elementwise_add_case.dir/test01_elementwise_add_case.cc.o +[ 94%] Linking CXX executable test_frontend_syntax +[ 94%] Building CXX object tests/CMakeFiles/test01_elementwise_add_case.dir/test01_elementwise_add.cc.o +Scanning dependencies of target test03_conv_case +[ 94%] Building CXX object tests/CMakeFiles/test03_conv_case.dir/test03_convolution_case.cc.o +[ 94%] Building CXX object tests/CMakeFiles/test03_conv_case.dir/test03_convolution.cc.o +[ 94%] Building CXX object tests/CMakeFiles/test01_elementwise_add_case.dir/test01_elementwise_add_compute_at.cc.o +[ 95%] Linking CXX executable test03_conv_case +Scanning dependencies of target test02_matmul_case +[ 95%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_case.cc.o +[ 95%] Built target test03_conv_case +[ 95%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul.cc.o +[ 95%] Building CXX object tests/CMakeFiles/test01_elementwise_add_case.dir/test01_elementwise_add_compute_at_level1.cc.o +[ 95%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_tile.cc.o +[ 95%] Built target test_frontend_syntax +[ 96%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_split.cc.o +[ 97%] Building CXX object tests/CMakeFiles/test01_elementwise_add_case.dir/test01_elementwise_add_vectorize.cc.o +[ 97%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_block.cc.o +[ 97%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_vectorize.cc.o +[ 98%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_loop_permutation.cc.o +[ 98%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_array_packing.cc.o +[ 99%] Linking CXX executable test_frontend_executor +[ 99%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_varient_shape.cc.o +[ 99%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_varient_shape_tile.cc.o +[100%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_array_packing_dynamic_shape.cc.o +[100%] Building CXX object tests/CMakeFiles/test02_matmul_case.dir/test02_matmul_call.cc.o +[100%] Linking CXX executable test01_elementwise_add_case +[100%] Built target test_frontend_executor +[100%] Built target test01_elementwise_add_case +[100%] Linking CXX executable test02_matmul_case +[100%] Built target test02_matmul_case +[100%] Linking CXX shared library core_api.so +[100%] Built target core_api +Scanning dependencies of target COPY_CINN_CORE_API +[100%] Generating cinn/core_api.so +[100%] Built target COPY_CINN_CORE_API +ResNet18/ +ResNet18/params +ResNet18/__model__ +MobileNetV2/ +MobileNetV2/params +MobileNetV2/__model__ +res fc_0.tmp_2 +res fc_5.tmp_2 +res relu_0.tmp_0 +UpdateCTestConfiguration from :/home/wangyue50/CINN-my/CINN/build/DartConfiguration.tcl +UpdateCTestConfiguration from :/home/wangyue50/CINN-my/CINN/build/DartConfiguration.tcl +Test project /home/wangyue50/CINN-my/CINN/build +Constructing a list of tests +Done constructing a list of tests +Updating test list for fixtures +Added 0 tests to meet fixture requirements +Checking test dependency graph... +Checking test dependency graph end +test 53 + Start 53: test02_matmul_main + +53: Test command: /home/wangyue50/CINN-my/CINN/build/tests/test02_matmul_main "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +53: Test timeout computed to be: 600 +test 55 + Start 55: test03_conv_main + +55: Test command: /home/wangyue50/CINN-my/CINN/build/tests/test03_conv_main "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +55: Test timeout computed to be: 600 +test 51 + Start 51: test01_elementwise_add_main + +51: Test command: /home/wangyue50/CINN-my/CINN/build/tests/test01_elementwise_add_main "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +51: Test timeout computed to be: 600 +test 1 + Start 1: test_cinn_value + +1: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/common/test_cinn_value "" +1: Test timeout computed to be: 600 +test 2 + Start 2: test_shared + +2: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/common/test_shared "" +2: Test timeout computed to be: 600 +test 3 + Start 3: test_graph_utils + +3: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/common/test_graph_utils "" +3: Test timeout computed to be: 600 +test 4 + Start 4: test_arithmatic + +4: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/common/test_arithmatic "" +4: Test timeout computed to be: 600 +test 5 + Start 5: test_cas + +5: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/common/test_cas "" +5: Test timeout computed to be: 600 +test 6 + Start 6: test_string + +6: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/utils/test_string "" +6: Test timeout computed to be: 600 +test 7 + Start 7: test_schedule + +7: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/poly/test_schedule "" +7: Test timeout computed to be: 600 +55: [==========] Running 1 test from 1 test case. +55: [----------] Global test environment set-up. +55: [----------] 1 test from test03_conv +55: [ RUN ] test03_conv.basic +55: WARNING: Logging before InitGoogleLogging() is written to STDERR +55: W0924 13:32:18.210212 27787 codegen_c.cc:24] Output C header to file ./test03_convolution.h +55: W0924 13:32:18.233403 27787 codegen_c.cc:33] Output C source to file ./test03_convolution.cc +55: [ OK ] test03_conv.basic (74 ms) +55: [----------] 1 test from test03_conv (74 ms total) +55: +55: [----------] Global test environment tear-down +55: [==========] 1 test from 1 test case ran. (74 ms total) +55: [ PASSED ] 1 test. +53: [==========] Running 9 tests from 2 test cases. +53: [----------] Global test environment set-up. +53: [----------] 1 test from test02_matmul +53: [ RUN ] test02_matmul.basic +53: WARNING: Logging before InitGoogleLogging() is written to STDERR +53: W0924 13:32:18.175216 27786 codegen_c.cc:24] Output C header to file ./test02_matmul.h +53: W0924 13:32:18.176230 27786 codegen_c.cc:33] Output C source to file ./test02_matmul.cc +53: W0924 13:32:18.207154 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_tile.h +53: W0924 13:32:18.211670 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_tile.cc +53: [ OK ] test02_matmul.basic (52 ms) +53: [----------] 1 test from test02_matmul (52 ms total) +53: +53: [----------] 8 tests from matmul +53: [ RUN ] matmul.Split +53: W0924 13:32:18.232223 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_split.h +53: W0924 13:32:18.235391 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_split.cc +53: [ OK ] matmul.Split (24 ms) +53: [ RUN ] matmul.Blocking +53: W0924 13:32:18.278457 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_block.h +53: W0924 13:32:18.286087 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_block.cc +53: [ OK ] matmul.Blocking (51 ms) +53: [ RUN ] matmul.Vectorization +53: W0924 13:32:18.332374 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_vectorize.h +51: [==========] Running 4 tests from 2 test cases. +51: [----------] Global test environment set-up. +51: [----------] 2 tests from test01_elementwise_add +51: [ RUN ] test01_elementwise_add.basic +51: WARNING: Logging before InitGoogleLogging() is written to STDERR +51: W0924 13:32:18.168052 27788 codegen_c.cc:24] Output C header to file ./test01_elementwise_add.h +51: W0924 13:32:18.168761 27788 codegen_c.cc:33] Output C source to file ./test01_elementwise_add.cc +51: [ OK ] test01_elementwise_add.basic (9 ms) +51: [ RUN ] test01_elementwise_add.vectorize +51: I0924 13:32:18.175801 27788 test01_elementwise_add_main.cc:56] after optim: +51: function add1_vectorize (_A, _B, _C) +51: { +51: for (i, 100) +51: { +51: for (j, 4) +51: { +51: C[Broadcast(i,8), Ramp((8 * j),1,8)] = (A[i, Ramp((8 * j),1,8)] + B[i, Ramp((8 * j),1,8)]) +51: } +51: } +51: } +51: W0924 13:32:18.176316 27788 codegen_c.cc:24] Output C header to file ./test01_elementwise_add_vectorize.h +51: W0924 13:32:18.177737 27788 codegen_c.cc:33] Output C source to file ./test01_elementwise_add_vectorize.cc +51: [ OK ] test01_elementwise_add.vectorize (9 ms) +51: [----------] 2 tests from test01_elementwise_add (18 ms total) +51: +51: [----------] 2 tests from elementwise_add +51: [ RUN ] elementwise_add.compute_at +51: W0924 13:32:18.197803 27788 codegen_c.cc:24] Output C header to file ./test01_elementwise_add_compute_at.h +51: W0924 13:32:18.200814 27788 codegen_c.cc:33] Output C source to file ./test01_elementwise_add_compute_at.cc +51: [ OK ] elementwise_add.compute_at (23 ms) +51: [ RUN ] elementwise_add.compute_at1 +51: W0924 13:32:18.227854 27788 codegen_c.cc:24] Output C header to file ./test01_elementwise_add_compute_at_level1.h +51: W0924 13:32:18.230847 27788 codegen_c.cc:33] Output C source to file ./test01_elementwise_add_compute_at_level1.cc +51: [ OK ] elementwise_add.compute_at1 (30 ms) +51: [----------] 2 tests from elementwise_add (53 ms total) +51: +51: [----------] Global test environment tear-down +51: [==========] 4 tests from 2 test cases ran. (71 ms total) +51: [ PASSED ] 4 tests. +1: [==========] Running 3 tests from 1 test case. +1: [----------] Global test environment set-up. +1: [----------] 3 tests from CINNValue +1: [ RUN ] CINNValue.test +1: [ OK ] CINNValue.test (0 ms) +1: [ RUN ] CINNValue.buffer +1: [ OK ] CINNValue.buffer (0 ms) +1: [ RUN ] CINNValue.Expr +1: [ OK ] CINNValue.Expr (0 ms) +1: [----------] 3 tests from CINNValue (0 ms total) +1: +1: [----------] Global test environment tear-down +1: [==========] 3 tests from 1 test case ran. (0 ms total) +1: [ PASSED ] 3 tests. +2: [==========] Running 2 tests from 1 test case. +2: [----------] Global test environment set-up. +2: [----------] 2 tests from Shared +2: [ RUN ] Shared.test +2: [ OK ] Shared.test (0 ms) +2: [ RUN ] Shared.cycle_share +2: [ OK ] Shared.cycle_share (0 ms) +2: [----------] 2 tests from Shared (0 ms total) +2: +2: [----------] Global test environment tear-down +2: [==========] 2 tests from 1 test case ran. (0 ms total) +2: [ PASSED ] 2 tests. +3: [==========] Running 2 tests from 1 test case. +3: [----------] Global test environment set-up. +3: [----------] 2 tests from Graph +3: [ RUN ] Graph.Visualize +3: WARNING: Logging before InitGoogleLogging() is written to STDERR +3: I0924 13:32:18.161446 27791 graph_utils_test.cc:58] graph: +3: digraph G { +3: node_0[label="A"] +3: node_1[label="B"] +3: node_2[label="C"] +3: node_3[label="D"] +3: node_4[label="E"] +3: node_0->node_2 +3: node_0->node_1 +3: node_1->node_3 +3: node_2->node_4 +3: node_2->node_3 +3: } // end G +3: [ OK ] Graph.Visualize (0 ms) +3: [ RUN ] Graph.simple +3: I0924 13:32:18.161548 27791 graph_utils_test.cc:67] graph1 digraph G { +3: node_5[label="A"] +3: node_6[label="B"] +3: node_6->node_5 +3: } // end G +3: [ OK ] Graph.simple (0 ms) +3: [----------] 2 tests from Graph (0 ms total) +3: +3: [----------] Global test environment tear-down +3: [==========] 2 tests from 1 test case ran. (0 ms total) +3: [ PASSED ] 2 tests. +4: [==========] Running 5 tests from 2 test cases. +4: [----------] Global test environment set-up. +4: [----------] 3 tests from GiNaC +4: [ RUN ] GiNaC.simplify +4: WARNING: Logging before InitGoogleLogging() is written to STDERR +4: I0924 13:32:18.162345 27792 arithmatic_test.cc:26] e: -94+9*y +4: [ OK ] GiNaC.simplify (0 ms) +4: [ RUN ] GiNaC.diff +4: I0924 13:32:18.162425 27792 arithmatic_test.cc:37] e: 1 +4: I0924 13:32:18.162429 27792 arithmatic_test.cc:38] e1: 0 +4: [ OK ] GiNaC.diff (0 ms) +4: [ RUN ] GiNaC.solve +4: I0924 13:32:18.162444 27792 arithmatic_test.cc:48] solve: {x==8} +4: I0924 13:32:18.162505 27792 arithmatic_test.cc:49] 2 +4: [ OK ] GiNaC.solve (0 ms) +4: [----------] 3 tests from GiNaC (0 ms total) +4: +4: [----------] 2 tests from Solve +4: [ RUN ] Solve.basic +4: I0924 13:32:18.162674 27792 arithmatic_test.cc:59] res: 200 +4: [ OK ] Solve.basic (0 ms) +4: [ RUN ] Solve.basic1 +4: I0924 13:32:18.162784 27792 arithmatic_test.cc:72] res -400 +4: [ OK ] Solve.basic1 (0 ms) +4: [----------] 2 tests from Solve (0 ms total) +4: +4: [----------] Global test environment tear-down +4: [==========] 5 tests from 2 test cases ran. (0 ms total) +4: [ PASSED ] 5 tests. +5: [==========] Running 12 tests from 2 test cases. +5: [----------] Global test environment set-up. +5: [----------] 11 tests from CAS +5: [ RUN ] CAS.SimplifyPower_0 +5: WARNING: Logging before InitGoogleLogging() is written to STDERR +5: I0924 13:32:18.163086 27793 cas_test.cc:25] p0 (x^0) +5: I0924 13:32:18.163147 27793 cas_test.cc:27] simplified 1 +5: I0924 13:32:18.163197 27793 cas_test.cc:47] p0 (1^x) +5: I0924 13:32:18.163206 27793 cas_test.cc:49] simplified 1 +5: I0924 13:32:18.163215 27793 cas_test.cc:56] p0 (0^x) +5: I0924 13:32:18.163221 27793 cas_test.cc:58] simplified 0 +5: I0924 13:32:18.163229 27793 cas_test.cc:65] p0 (0^x) +5: I0924 13:32:18.163236 27793 cas_test.cc:67] simplified 0 +5: [ OK ] CAS.SimplifyPower_0 (0 ms) +5: [ RUN ] CAS.number_cal +5: I0924 13:32:18.163265 27793 cas_test.cc:75] ((1 * 100 * -1) + 0 + 1001) +5: [ OK ] CAS.number_cal (0 ms) +5: [ RUN ] CAS.SimplifyPower +5: I0924 13:32:18.163280 27793 cas_test.cc:81] p0 (x^2) +5: I0924 13:32:18.163285 27793 cas_test.cc:84] power: ((x^2)^3) +5: I0924 13:32:18.163316 27793 cas_test.cc:87] simplified: (x^(6)) +5: [ OK ] CAS.SimplifyPower (0 ms) +5: [ RUN ] CAS.cmp +5: [ OK ] CAS.cmp (0 ms) +5: [ RUN ] CAS.SimplifySum +5: [ OK ] CAS.SimplifySum (2 ms) +5: [ RUN ] CAS.SimplifyProduct +5: I0924 13:32:18.166296 27793 cas_test.cc:164] (x^5) +5: [ OK ] CAS.SimplifyProduct (1 ms) +5: [ RUN ] CAS.SimplifyMod +5: [ OK ] CAS.SimplifyMod (0 ms) +5: [ RUN ] CAS.ConvertCinnToCAS +5: I0924 13:32:18.167304 27793 cas_test.cc:196] body ((((A[i, j] + 0) + 1) + (2 * B[i, j])) + ((0 * B[i, j]) * A[i, j])) +5: [ OK ] CAS.ConvertCinnToCAS (1 ms) +5: [ RUN ] CAS.FracOp +5: [ OK ] CAS.FracOp (1 ms) +5: [ RUN ] CAS.Mod +5: I0924 13:32:18.172488 27793 cas_test.cc:273] ((128 * k) + (x + ((32768 * y) + (32 * z)))) +5: [ OK ] CAS.Mod (4 ms) +5: [ RUN ] CAS.IntConnerCase +5: I0924 13:32:18.173523 27793 cas.cc:1231] found y Interval[2, 3] ai 1 +5: I0924 13:32:18.173576 27793 cas.cc:1231] found y Interval[0, 3] ai 1 +5: I0924 13:32:18.173614 27793 cas.cc:1231] found y Interval[0, 3] ai 1 +5: [ OK ] CAS.IntConnerCase (1 ms) +5: [----------] 11 tests from CAS (10 ms total) +5: +5: [----------] 1 test from SolveInequality +5: [ RUN ] SolveInequality.basic +5: [ OK ] SolveInequality.basic (3 ms) +5: [----------] 1 test from SolveInequality (3 ms total) +5: +5: [----------] Global test environment tear-down +5: [==========] 12 tests from 2 test cases ran. (13 ms total) +5: [ PASSED ] 12 tests. +6: [==========] Running 2 tests from 1 test case. +6: [----------] Global test environment set-up. +6: [----------] 2 tests from string +6: [ RUN ] string.Endswith +6: [ OK ] string.Endswith (0 ms) +6: [ RUN ] string.Startswith +6: [ OK ] string.Startswith (0 ms) +6: [----------] 2 tests from string (0 ms total) +6: +6: [----------] Global test environment tear-down +6: [==========] 2 tests from 1 test case ran. (0 ms total) +6: [ PASSED ] 2 tests. +7: [==========] Running 2 tests from 1 test case. +7: [----------] Global test environment set-up. +7: [----------] 2 tests from CreateStages +7: [ RUN ] CreateStages.compute_at +7: { +7: for (i, 100) +7: { +7: for (j, 100) +7: { +7: B[i, j] = (1 + A[i, j]) +7: for (k, 100) +7: { +7: C[i, j, k] = (B[i, j] * B[j, k]) +7: } +7: } +7: } +7: } +7: [ OK ] CreateStages.compute_at (18 ms) +7: [ RUN ] CreateStages.buffer_bind_to_multiple_tensors_schedule +7: { +7: for (i, 100) +7: { +7: for (j, 100) +7: { +7: B[i, j] = (1 + A[i, j]) +7: } +7: } +7: for (i, 100) +7: { +7: for (j, 100) +7: { +7: C[i, j] = (1 + A[i, j]) +7: } +7: } +7: for (i, 100) +7: { +7: for (j, 100) +7: { +7: D[i, j] = (1 + A[i, j]) +7: } +7: } +7: } +7: [ OK ] CreateStages.buffer_bind_to_multiple_tensors_schedule (15 ms) +7: [----------] 2 tests from CreateStages (33 ms total) +7: +7: [----------] Global test environment tear-down +7: [==========] 2 tests from 1 test case ran. (33 ms total) +7: [ PASSED ] 2 tests. + 1/68 Test #55: test03_conv_main ....................... Passed 0.09 sec + 2/68 Test #51: test01_elementwise_add_main ............ Passed 0.19 sec + 3/68 Test #1: test_cinn_value ........................ Passed 0.19 sec + 4/68 Test #2: test_shared ............................ Passed 0.19 sec + 5/68 Test #3: test_graph_utils ....................... Passed 0.19 sec + 6/68 Test #4: test_arithmatic ........................ Passed 0.19 sec + 7/68 Test #5: test_cas ............................... Passed 0.19 sec + 8/68 Test #6: test_string ............................ Passed 0.19 sec + 9/68 Test #7: test_schedule .......................... Passed 0.18 sec +test 8 + Start 8: test_stage + +8: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/poly/test_stage "" +8: Test timeout computed to be: 600 +test 9 + Start 9: test_compute_at_transform + +9: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/poly/test_compute_at_transform "" +9: Test timeout computed to be: 600 +test 10 + Start 10: test_cinn_runtime + +10: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/runtime/test_cinn_runtime "" +10: Test timeout computed to be: 600 +test 11 + Start 11: test_mkl_math + +11: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/runtime/cpu/test_mkl_math "" +11: Test timeout computed to be: 600 +test 12 + Start 12: test_host_intrinsics + +12: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/runtime/cpu/test_host_intrinsics "" +12: Test timeout computed to be: 600 +test 13 + Start 13: test_collect_ir_nodes + +13: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/ir/test_collect_ir_nodes "" +13: Test timeout computed to be: 600 +test 14 + Start 14: test_buffer + +14: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/ir/test_buffer "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +14: Test timeout computed to be: 600 +test 15 + Start 15: test_tensor + +15: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/ir/test_tensor "" +15: Test timeout computed to be: 600 +test 16 + Start 16: test_codegen_c + +16: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/test_codegen_c "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +16: Test timeout computed to be: 600 +14: [==========] Running 2 tests from 1 test case. +14: [----------] Global test environment set-up. +14: [----------] 2 tests from Buffer +14: [ RUN ] Buffer.basic +14: [ OK ] Buffer.basic (0 ms) +14: [ RUN ] Buffer.bind_to_multiple_tensors +14: codegen C: +14: #include +14: #include +14: +14: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +14: /// Predefined utilities in CINN BEGIN( +14: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +14: +14: #include +14: #include +14: +14: #include +14: +14: //! Vector in stack, this can only used in generated .cc file. +14: template +14: struct StackVec { +14: typedef T value_type; +14: typedef StackVec self_type; +14: +14: self_type& operator=(const StackVec& src) { +14: if (this != &src) { +14: memcpy(data_, src.data_, num_bytes()); +14: } +14: return *this; +14: } +14: +14: StackVec() { memset(data_, 0, num_bytes()); } +14: +14: explicit StackVec(const T* externl) : external_data_(externl) {} +14: +14: static self_type Broadcast(const value_type& v) { +14: self_type res; +14: for (size_t i = 0; i < Num; i++) res.data_[i] = v; +14: return res; +14: } +14: +14: static self_type Ramp(const value_type& base, const value_type& stride) { +14: self_type res; +14: for (size_t i = 0; i < Num; i++) { +14: res.data_[i] = base + stride * i; +14: } +14: } +14: +14: static self_type Load(const void* base, int32_t offset) { +14: self_type res; +14: memcpy(&res.data_[0], (const value_type*)base + offset, num_bytes()); +14: } +14: +14: static self_type Load(const void* base, const StackVec& offset) { +14: self_type res; +14: for (size_t i = 0; i < Num; i++) { +14: res.data_[i] = ((const value_type*)base)[offset[i]]; +14: } +14: } +14: +14: void Store(void* base, int32_t offset) const { +14: mempcpy((value_type*)base + offset, &data_[0], num_bytes()); // NOLINT +14: } +14: +14: inline value_type& operator[](size_t i) { return data_[i]; } +14: inline value_type operator[](size_t i) const { return data_[i]; } +14: +14: // binary operator between two vectors +14: // @{ +14: #define __(op__) \ +14: friend self_type operator op__(const self_type& a, const self_type& b) { \ +14: self_type res; \ +14: for (size_t i = 0; i < Num; i++) { \ +14: res.data_[i] = a[i] op__ b[i]; \ +14: } \ +14: return res; \ +14: } +14: __(+) +14: __(-) +14: __(*) +14: __(/) +14: __(%) +14: // @} +14: #undef __ +14: +14: // binary operator between a vector and a scalar +14: // @{ +14: #define __(op__) \ +14: friend self_type operator op__(const self_type& a, const value_type& b) { \ +14: self_type res; \ +14: for (size_t i = 0; i < Num; i++) { \ +14: res.data_[i] = a[i] op__ b; \ +14: } \ +14: return res; \ +14: } +14: __(+) +14: __(-) +14: __(*) +14: __(/) +14: __(%) +14: #undef __ +14: // @} +14: +14: static constexpr size_t num_bytes() { return sizeof(data_); } +14: +14: private: +14: T data_[Num]; +14: T* external_data_{nullptr}; +14: }; +14: +14: /** +14: * The vector with external data. +14: */ +14: template +14: struct ExternalVec { +14: typedef T value_type; +14: typedef ExternalVec self_type; +14: +14: explicit ExternalVec(T* data) : data_(data) {} +14: +14: self_type& operator=(const self_type& src) { +14: if (data_ != src.data_) { +14: memcpy(data_, src.data_, num_bytes()); +14: } +14: return *this; +14: } +14: +14: static self_type Load(const void* base, int32_t offset) { +14: self_type res((T*)base + offset); // NOLINT +14: return res; +14: } +14: +14: static constexpr size_t num_bytes() { return sizeof(value_type) * Num; } +14: +14: private: +14: T* data_{nullptr}; +14: }; +14: +14: // AVX256 load +14: //@{ +14: inline __m256 cinn_avx256_load(const float* dst) { return _mm256_load_ps(dst); } +14: inline __m256d cinn_avx256_load(const double* dst) { return _mm256_load_pd(dst); } +14: //@} +14: // AVX512 load +14: //@{ +14: inline __m512 cinn_avx512_load(const float* dst) { return _mm512_load_ps(dst); } +14: inline __m512d cinn_avx512_load(const double* dst) { return _mm512_load_pd(dst); } +14: //@} +14: +14: // FP32x8 * FP32x8 +14: // @{ +14: inline void cinn_avx256_add(float* dst, float* a, float* b) { +14: _mm256_store_ps(dst, _mm256_add_ps(_mm256_load_ps(a), _mm256_load_ps(b))); +14: } +14: inline void cinn_avx256_sub(float* dst, float* a, float* b) { +14: _mm256_store_ps(dst, _mm256_sub_ps(_mm256_load_ps(a), _mm256_load_ps(b))); +14: } +14: inline void cinn_avx256_mul(float* dst, float* a, float* b) { +14: _mm256_store_ps(dst, _mm256_mul_ps(_mm256_load_ps(a), _mm256_load_ps(b))); +14: } +14: inline void cinn_avx256_div(float* dst, float* a, float* b) { +14: _mm256_store_ps(dst, _mm256_div_ps(_mm256_load_ps(a), _mm256_load_ps(b))); +14: } +14: // @} +14: +14: // FP32x4 * float +14: // @{ +14: inline void cinn_avx256_add(float* dst, float* a, float b) { +14: _mm256_store_ps(dst, _mm256_add_ps(_mm256_load_ps(a), _mm256_set1_ps(b))); +14: } +14: inline void cinn_avx256_sub(float* dst, float* a, float b) { +14: _mm256_store_ps(dst, _mm256_sub_ps(_mm256_load_ps(a), _mm256_set1_ps(b))); +14: } +14: inline void cinn_avx256_mul(float* dst, float* a, float b) { +14: _mm256_store_ps(dst, _mm256_mul_ps(_mm256_load_ps(a), _mm256_set1_ps(b))); +14: } +14: inline void cinn_avx256_div(float* dst, float* a, float b) { +14: _mm256_store_ps(dst, _mm256_div_ps(_mm256_load_ps(a), _mm256_set1_ps(b))); +14: } +14: // @} +14: +14: // float * FP32x4 +14: // @{ +14: inline void cinn_avx256_add(float* dst, float a, float* b) { +14: _mm256_store_ps(dst, _mm256_add_ps(_mm256_set1_ps(a), _mm256_load_ps(b))); +14: } +14: inline void cinn_avx256_sub(float* dst, float a, float* b) { +14: _mm256_store_ps(dst, _mm256_sub_ps(_mm256_set1_ps(a), _mm256_load_ps(b))); +14: } +14: inline void cinn_avx256_mul(float* dst, float a, float* b) { +14: _mm256_store_ps(dst, _mm256_mul_ps(_mm256_set1_ps(a), _mm256_load_ps(b))); +14: } +14: inline void cinn_avx256_div(float* dst, float a, float* b) { +14: _mm256_store_ps(dst, _mm256_div_ps(_mm256_set1_ps(a), _mm256_load_ps(b))); +14: } +14: // @} +14: +14: // 4 x float64 +14: // @{ +14: inline void cinn_avx256_add(double* dst, double* a, double* b) { +14: _mm256_store_pd(dst, _mm256_add_pd(_mm256_load_pd(a), _mm256_load_pd(b))); +14: } +14: inline void cinn_avx256_sub(double* dst, double* a, double* b) { +14: _mm256_store_pd(dst, _mm256_sub_pd(_mm256_load_pd(a), _mm256_load_pd(b))); +14: } +14: inline void cinn_avx256_mul(double* dst, double* a, double* b) { +14: _mm256_store_pd(dst, _mm256_mul_pd(_mm256_load_pd(a), _mm256_load_pd(b))); +14: } +14: inline void cinn_avx256_div(double* dst, double* a, double* b) { +14: _mm256_store_pd(dst, _mm256_div_pd(_mm256_load_pd(a), _mm256_load_pd(b))); +14: } +14: // @} +14: +14: // FP32x4 * FP64 +14: // @{ +14: inline void cinn_avx256_add(double* dst, double* a, double b) { +14: _mm256_store_pd(dst, _mm256_add_pd(_mm256_load_pd(a), _mm256_set1_pd(b))); +14: } +14: inline void cinn_avx256_sub(double* dst, double* a, double b) { +14: _mm256_store_pd(dst, _mm256_sub_pd(_mm256_load_pd(a), _mm256_set1_pd(b))); +14: } +14: inline void cinn_avx256_mul(double* dst, double* a, double b) { +14: _mm256_store_pd(dst, _mm256_mul_pd(_mm256_load_pd(a), _mm256_set1_pd(b))); +14: } +14: inline void cinn_avx256_div(double* dst, double* a, double b) { +14: _mm256_store_pd(dst, _mm256_div_pd(_mm256_load_pd(a), _mm256_set1_pd(b))); +14: } +14: // @} +14: +14: // float * FP32x4 +14: // @{ +14: inline void cinn_avx256_add(double* dst, double a, double* b) { +14: _mm256_store_pd(dst, _mm256_add_pd(_mm256_set1_pd(a), _mm256_load_pd(b))); +14: } +14: inline void cinn_avx256_sub(double* dst, double a, double* b) { +14: _mm256_store_pd(dst, _mm256_sub_pd(_mm256_set1_pd(a), _mm256_load_pd(b))); +14: } +14: inline void cinn_avx256_mul(double* dst, double a, double* b) { +14: _mm256_store_pd(dst, _mm256_mul_pd(_mm256_set1_pd(a), _mm256_load_pd(b))); +14: } +14: inline void cinn_avx256_div(double* dst, double a, double* b) { +14: _mm256_store_pd(dst, _mm256_div_pd(_mm256_set1_pd(a), _mm256_load_pd(b))); +14: } +14: // @} +14: +14: //! 32 x float32 operations. +14: // @{ +14: inline void cinn_avx512_add(float* dst, float* a, float* b) { +14: _mm512_store_ps(dst, _mm512_add_ps(_mm512_load_ps(a), _mm512_load_ps(b))); +14: } +14: inline void cinn_avx512_sub(float* dst, float* a, float* b) { +14: _mm512_store_ps(dst, _mm512_sub_ps(_mm512_load_ps(a), _mm512_load_ps(b))); +14: } +14: inline void cinn_avx512_mul(float* dst, float* a, float* b) { +14: _mm512_store_ps(dst, _mm512_mul_ps(_mm512_load_ps(a), _mm512_load_ps(b))); +14: } +14: inline void cinn_avx512_div(float* dst, float* a, float* b) { +14: _mm512_store_ps(dst, _mm512_div_ps(_mm512_load_ps(a), _mm512_load_ps(b))); +14: } +14: // @} +14: +14: // FP32x4 * FP64 +14: // @{ +14: inline void cinn_avx512_add(float* dst, float* a, float b) { +14: _mm512_store_pd(dst, _mm512_add_pd(_mm512_load_pd(a), _mm512_set1_pd(b))); +14: } +14: inline void cinn_avx512_sub(float* dst, float* a, float b) { +14: _mm512_store_pd(dst, _mm512_sub_pd(_mm512_load_pd(a), _mm512_set1_pd(b))); +14: } +14: inline void cinn_avx512_mul(float* dst, float* a, float b) { +14: _mm512_store_pd(dst, _mm512_mul_pd(_mm512_load_pd(a), _mm512_set1_pd(b))); +14: } +14: inline void cinn_avx512_div(float* dst, float* a, float b) { +14: _mm512_store_pd(dst, _mm512_div_pd(_mm512_load_pd(a), _mm512_set1_pd(b))); +14: } +14: // @} +14: +14: // float * FP32x4 +14: // @{ +14: inline void cinn_avx512_add(float* dst, float a, float* b) { +14: _mm512_store_pd(dst, _mm512_add_pd(_mm512_set1_pd(a), _mm512_load_pd(b))); +14: } +14: inline void cinn_avx512_sub(float* dst, float a, float* b) { +14: _mm512_store_pd(dst, _mm512_sub_pd(_mm512_set1_pd(a), _mm512_load_pd(b))); +14: } +14: inline void cinn_avx512_mul(float* dst, float a, float* b) { +14: _mm512_store_pd(dst, _mm512_mul_pd(_mm512_set1_pd(a), _mm512_load_pd(b))); +14: } +14: inline void cinn_avx512_div(float* dst, float a, float* b) { +14: _mm512_store_pd(dst, _mm512_div_pd(_mm512_set1_pd(a), _mm512_load_pd(b))); +14: } +14: // @} +14: +14: //! 16 x float32 operations. +14: // @{ +14: inline void cinn_avx512_add(double* dst, double* a, double* b) { +14: _mm512_store_pd(dst, _mm512_add_pd(_mm512_load_pd(a), _mm512_load_pd(b))); +14: } +14: inline void cinn_avx512_sub(double* dst, double* a, double* b) { +14: _mm512_store_pd(dst, _mm512_sub_pd(_mm512_load_pd(a), _mm512_load_pd(b))); +14: } +14: inline void cinn_avx512_mul(double* dst, double* a, double* b) { +14: _mm512_store_pd(dst, _mm512_mul_pd(_mm512_load_pd(a), _mm512_load_pd(b))); +14: } +14: inline void cinn_avx512_div(double* dst, double* a, double* b) { +14: _mm512_store_pd(dst, _mm512_div_pd(_mm512_load_pd(a), _mm512_load_pd(b))); +14: } +14: // @} +14: +14: inline __m512 cinn_avx512_add(const __m512& a, const __m512& b); +14: +14: inline __m256 cinn_avx256_add_float(const __m256& a, const __m256& b) { return _mm256_add_ps(a, b); } +14: inline __m256d cinn_avx256_add_double(const __m256d& a, const __m256d& b) { return _mm256_add_pd(a, b); } +14: inline __m512 cinn_avx512_add_float(const __m512& a, const __m512& b) { return _mm512_add_ps(a, b); } +14: inline __m512d cinn_avx512_add_double(const __m512d& a, const __m512d& b) { return _mm512_add_pd(a, b); } +14: +14: //! set1 +14: // @{ +14: inline __m256 cinn_avx256_set1(float value) { return _mm256_set1_ps(value); } +14: inline __m256d cinn_avx256_set1(double value) { return _mm256_set1_pd(value); } +14: inline __m512 cinn_avx512_set1(float value) { return _mm512_set1_ps(value); } +14: inline __m512d cinn_avx512_set1(double value) { return _mm512_set1_pd(value); } +14: // @} +14: +14: //! store +14: // @{ +14: inline void cinn_avx512_store(float* dst, const __m512& x) { _mm512_store_ps(dst, x); } +14: inline void cinn_avx512_store(double* dst, const __m512d& x) { _mm512_store_pd(dst, x); } +14: inline void cinn_avx256_store(float* dst, const __m256& x) { _mm256_store_ps(dst, x); } +14: inline void cinn_avx256_store(double* dst, const __m256d& x) { _mm256_store_pd(dst, x); } +14: // @} +14: +14: //! add +14: // @{ +14: inline __m256 cinn_avx256_add(const __m256& a, const __m256& b) { return _mm256_add_ps(a, b); } +14: inline __m256d cinn_avx256_add(const __m256d& a, const __m256d& b) { return _mm256_add_pd(a, b); } +14: inline __m512 cinn_avx512_add(const __m512& a, const __m512& b) { return _mm512_add_ps(a, b); } +14: inline __m512d cinn_avx512_add(const __m512d& a, const __m512d& b) { return _mm512_add_pd(a, b); } +14: // @} +14: +14: //! mul +14: // @{ +14: inline __m256 cinn_avx256_mul(const __m256& a, const __m256& b) { return _mm256_mul_ps(a, b); } +14: inline __m256d cinn_avx256_mul(const __m256d& a, const __m256d& b) { return _mm256_mul_pd(a, b); } +14: inline __m512 cinn_avx512_mul(const __m512& a, const __m512& b) { return _mm512_mul_ps(a, b); } +14: inline __m512d cinn_avx512_mul(const __m512d& a, const __m512d& b) { return _mm512_mul_pd(a, b); } +14: // @} +14: +14: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +14: /// )END Predefined utilities in CINN +14: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +14: +14: cinn_buffer_t* _A = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 100, 20 }, 32/*align*/); +14: void func1(void* _args, int32_t num_args) +14: { +14: cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +14: cinn_buffer_malloc((void*)(0), _A); +14: float* A = ((float*)(_A->memory)); +14: float* B = ((float*)(_A->memory)); +14: for (int32_t i = 0; i < 100; i += 1) { +14: for (int32_t j = 0; j < 20; j += 1) { +14: A[((20 * i) + j)] = 0; +14: }; +14: }; +14: for (int32_t i = 0; i < 100; i += 1) { +14: for (int32_t j = 0; j < 20; j += 1) { +14: B[((20 * i) + j)] = 1; +14: }; +14: }; +14: cinn_buffer_free((void*)(0), _A); +14: } +14: +14: +14: [ OK ] Buffer.bind_to_multiple_tensors (10 ms) +14: [----------] 2 tests from Buffer (10 ms total) +14: +14: [----------] Global test environment tear-down +14: [==========] 2 tests from 1 test case ran. (11 ms total) +14: [ PASSED ] 2 tests. +53: W0924 13:32:18.341225 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_vectorize.cc +53: [ OK ] matmul.Vectorization (55 ms) +53: [ RUN ] matmul.LoopPermutation +53: W0924 13:32:18.403461 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_loop_permutation.h +53: W0924 13:32:18.427978 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_loop_permutation.cc +53: [ OK ] matmul.LoopPermutation (87 ms) +53: [ RUN ] matmul.ArrayPacking +13: [==========] Running 2 tests from 1 test case. +13: [----------] Global test environment set-up. +13: [----------] 2 tests from CollectIRNodes +13: [ RUN ] CollectIRNodes.basic0 +13: [ OK ] CollectIRNodes.basic0 (0 ms) +13: [ RUN ] CollectIRNodes.basic +13: WARNING: Logging before InitGoogleLogging() is written to STDERR +13: I0924 13:32:18.361411 27801 collect_ir_nodes_test.cc:32] fn: +13: function fn (_A, _B, _C) +13: { +13: for (i, 100) +13: { +13: for (j, 200) +13: { +13: C[i, j] = (A[i, j] + B[i, j]) +13: } +13: } +13: } +13: I0924 13:32:18.361507 27801 collect_ir_nodes_test.cc:38] fn.body: +13: { +13: for (i, 100) +13: { +13: for (j, 200) +13: { +13: C[i, j] = (A[i, j] + B[i, j]) +13: } +13: } +13: } +13: [ OK ] CollectIRNodes.basic (7 ms) +13: [----------] 2 tests from CollectIRNodes (7 ms total) +13: +13: [----------] Global test environment tear-down +13: [==========] 2 tests from 1 test case ran. (7 ms total) +13: [ PASSED ] 2 tests. +12: [==========] Running 1 test from 1 test case. +12: [----------] Global test environment set-up. +12: [----------] 1 test from tanh +12: [ RUN ] tanh.basic +12: WARNING: Logging before InitGoogleLogging() is written to STDERR +12: I0924 13:32:18.361735 27800 host_intrinsics_test.cc:30] fn: +12: function fn (_x, _tensor) +12: { +12: for (i, 10) +12: { +12: for (j, 20) +12: { +12: tensor[i, j] = cinn_cpu_tanh_fp32(x[i, j]) +12: } +12: } +12: } +12: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 800 +12: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 800 +12: I0924 13:32:18.421118 27800 host_intrinsics_test.cc:49] 0.654482 +12: I0924 13:32:18.421147 27800 host_intrinsics_test.cc:49] 0.663164 +12: I0924 13:32:18.421150 27800 host_intrinsics_test.cc:49] 0.721922 +12: [ OK ] tanh.basic (67 ms) +12: [----------] 1 test from tanh (67 ms total) +12: +12: [----------] Global test environment tear-down +12: [==========] 1 test from 1 test case ran. (67 ms total) +12: [ PASSED ] 1 test. +11: [==========] Running 27 tests from 2 test cases. +11: [----------] Global test environment set-up. +11: [----------] 26 tests from mkl_math +11: [ RUN ] mkl_math.exp +11: WARNING: Logging before InitGoogleLogging() is written to STDERR +11: I0924 13:32:18.364267 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_exp_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_exp_fp32_out[i, j] = cinn_cpu_exp_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.exp (33 ms) +11: [ RUN ] mkl_math.erf +11: I0924 13:32:18.396248 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_erf_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_erf_fp32_out[i, j] = cinn_cpu_erf_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.erf (29 ms) +11: [ RUN ] mkl_math.sqrt +11: I0924 13:32:18.424861 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_sqrt_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_sqrt_fp32_out[i, j] = cinn_cpu_sqrt_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.sqrt (29 ms) +11: [ RUN ] mkl_math.log +11: I0924 13:32:18.453444 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_log_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_log_fp32_out[i, j] = cinn_cpu_log_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.log (28 ms) +11: [ RUN ] mkl_math.log2 +11: I0924 13:32:18.481892 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_log2_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_log2_fp32_out[i, j] = cinn_cpu_log2_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.log2 (29 ms) +11: [ RUN ] mkl_math.log10 +11: I0924 13:32:18.510504 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_log10_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_log10_fp32_out[i, j] = cinn_cpu_log10_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.log10 (28 ms) +11: [ RUN ] mkl_math.floor +11: I0924 13:32:18.538941 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_floor_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_floor_fp32_out[i, j] = cinn_cpu_floor_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.floor (29 ms) +11: [ RUN ] mkl_math.ceil +11: I0924 13:32:18.567494 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_ceil_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_ceil_fp32_out[i, j] = cinn_cpu_ceil_fp32(x[i, j]) +11: } +11: } +11: } +10: [==========] Running 3 tests from 3 test cases. +10: [----------] Global test environment set-up. +10: [----------] 1 test from buffer +10: [ RUN ] buffer.basic +10: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 120 +10: [ OK ] buffer.basic (0 ms) +10: [----------] 1 test from buffer (0 ms total) +10: +10: [----------] 1 test from cinn_print_debug_string +10: [ RUN ] cinn_print_debug_string.basic +10: hello world +10: should be 1, 1 +10: should be pointer, 0x7ffc871d9534 +10: should be 1, 1 +10: v3[1 2 3], +10: [ OK ] cinn_print_debug_string.basic (0 ms) +10: [----------] 1 test from cinn_print_debug_string (0 ms total) +10: +10: [----------] 1 test from cinn_args_construct +10: [ RUN ] cinn_args_construct.basic +10: [ OK ] cinn_args_construct.basic (0 ms) +10: [----------] 1 test from cinn_args_construct (0 ms total) +10: +10: [----------] Global test environment tear-down +10: [==========] 3 tests from 3 test cases ran. (0 ms total) +10: [ PASSED ] 3 tests. +9: [==========] Running 1 test from 1 test case. +9: [----------] Global test environment set-up. +9: [----------] 1 test from ComputeAtTransform2 +9: [ RUN ] ComputeAtTransform2.basic +9: WARNING: Logging before InitGoogleLogging() is written to STDERR +9: I0924 13:32:18.362177 27797 compute_at_transform.cc:117] code: +9: +9: { +9: if (_cp_c_0 >= 0 && _cp_c_0 <= 12 && _cp_c_1 == 0) +9: for (int c0 = 0; c0 <= 12; c0 += 1) +9: for (int c4 = 0; c4 <= 49; c4 += 1) +9: p(4 * _cp_c_0, c4); +9: for (int c0 = 0; c0 <= 12; c0 += 1) +9: for (int c2 = 0; c2 <= 49; c2 += 1) +9: for (int c3 = 0; c3 <= 49; c3 += 1) +9: c(4 * c0, c2, c3); +9: } +9: I0924 13:32:18.376412 27797 compute_at_transform.cc:117] code: +9: +9: for (int c0 = 0; c0 <= 12; c0 += 1) { +9: if (_cp_c_0 >= 0 && _cp_c_0 <= 12 && _cp_c_1 == 0) +9: for (int c5 = 0; c5 <= 49; c5 += 1) +9: p(4 * _cp_c_0, c5); +9: for (int c3 = 0; c3 <= 49; c3 += 1) +9: for (int c4 = 0; c4 <= 49; c4 += 1) +9: c(4 * c0, c3, c4); +9: } +9: I0924 13:32:18.376475 27797 compute_at_transform_test.cc:31] shape: +9: I0924 13:32:18.376705 27797 compute_at_transform_test.cc:34] 2 +9: I0924 13:32:18.376709 27797 compute_at_transform_test.cc:34] 50 +9: [ OK ] ComputeAtTransform2.basic (24 ms) +9: [----------] 1 test from ComputeAtTransform2 (24 ms total) +9: +9: [----------] Global test environment tear-down +9: [==========] 1 test from 1 test case ran. (24 ms total) +9: [ PASSED ] 1 test. +8: [==========] Running 23 tests from 10 test cases. +8: [----------] Global test environment set-up. +8: [----------] 6 tests from Stage +8: [ RUN ] Stage.split +8: WARNING: Logging before InitGoogleLogging() is written to STDERR +8: I0924 13:32:18.353860 27796 stage_test.cc:31] { S[i, j] -> S[i_outer, i_inner, j' = j] : (-i + i_inner) mod 4 = 0 and -3 + i <= 4i_outer <= i and 0 <= i_inner <= 3 } +8: [ OK ] Stage.split (1 ms) +8: [ RUN ] Stage.tile +8: I0924 13:32:18.355557 27796 stage_test.cc:46] { S[i, j, k] -> S[i_outer, i_inner, j_outer, j_inner, k' = k] : (-i + i_inner) mod 4 = 0 and (-j + j_inner) mod 6 = 0 and -3 + i <= 4i_outer <= i and 0 <= i_inner <= 3 and -5 + j <= 6j_outer <= j and 0 <= j_inner <= 5 } +8: [ OK ] Stage.tile (2 ms) +8: [ RUN ] Stage.reorder +8: I0924 13:32:18.356016 27796 stage_test.cc:63] { S[i, j, k] -> S[i' = i, k' = k, j' = j] } +8: [ OK ] Stage.reorder (1 ms) +8: [ RUN ] Stage.split_reorder +8: I0924 13:32:18.357061 27796 stage_test.cc:74] { S[i, j, k] -> S[i_outer, k' = k, i_inner, j' = j] : (-i + i_inner) mod 4 = 0 and -3 + i <= 4i_outer <= i and 0 <= i_inner <= 3 } +8: [ OK ] Stage.split_reorder (1 ms) +8: [ RUN ] Stage.Fuse +8: I0924 13:32:18.357940 27796 stage_test.cc:96] split: { S[i, j, k] -> S[i_outer, i_inner, j' = j, k' = k] : (-i + i_inner) mod 4 = 0 and -3 + i <= 4i_outer <= i and 0 <= i_inner <= 3 } +8: I0924 13:32:18.358453 27796 stage_test.cc:98] fused: { S[i, j, k] -> S[i_outer_i_inner_fused, j' = j, k' = k] : (-i + i_outer_i_inner_fused) mod 4 = 0 and -3 + i_outer_i_inner_fused <= 4*floor((i)/4) <= i_outer_i_inner_fused } +8: [ OK ] Stage.Fuse (1 ms) +8: [ RUN ] Stage.Fuse1 +8: I0924 13:32:18.358978 27796 stage_test.cc:108] fused: { S[i, j, k] -> S[i_j_fused = 101i + j, k' = k] } +8: [ OK ] Stage.Fuse1 (0 ms) +8: [----------] 6 tests from Stage (6 ms total) +8: +8: [----------] 1 test from ComputeAtRelation +8: [ RUN ] ComputeAtRelation.basic +8: [ OK ] ComputeAtRelation.basic (0 ms) +8: [----------] 1 test from ComputeAtRelation (0 ms total) +8: +8: [----------] 5 tests from ComputeAt +8: [ RUN ] ComputeAt.Before +8: I0924 13:32:18.385377 27796 stage_test.cc:126] fn: +8: function fn (_A, _B, _cache, _C) +8: { +8: for (po0, 10) +8: { +8: for (po1, 10) +8: { +8: if (((((po0 >= 0) and (po0 <= 9)) and (po1 >= 0)) and (po1 <= 9))) { +8: cache[0, 0] = A[po0, po1] +8: } +8: C[po0, po1] = (cache[0, 0] + B[po0, po1]) +8: } +8: } +8: } +8: [ OK ] ComputeAt.Before (26 ms) +8: [ RUN ] ComputeAt.level0 +8: I0924 13:32:18.412464 27796 stage_test.cc:165] fn: +8: function fn (_A, _cache, _C) +8: { +8: for (po0, 10) +8: { +8: if (((po0 >= 0) and (po0 <= 9))) { +8: for (j, 11) +8: { +8: for (k, 10) +8: { +8: cache[0, j, k] = A[po0, j, k] +8: } +8: } +8: } +8: for (i, 10) +8: { +8: for (j, 10) +8: { +8: C[po0, i, j] = select((i < 9), (cache[0, i, j] + cache[0, (1 + i), j]), 0) +8: } +8: } +8: } +8: } +8: I0924 13:32:18.412554 27796 stage_test.cc:198] C code: +8: #include +8: #include +8: +8: void fn(void* _args, int32_t num_args) +8: { +8: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +8: cinn_buffer_t* _cache = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +8: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +8: cinn_buffer_malloc((void*)(0), _cache); +8: cinn_buffer_malloc((void*)(0), _C); +8: const float* A = ((const float*)(_A->memory)); +8: float* C = ((float*)(_C->memory)); +8: float* cache = ((float*)(_cache->memory)); +8: for (int32_t po0 = 0; po0 < 10; po0 += 1) { +8: if (((po0 >= 0) && (po0 <= 9))) { +8: for (int32_t j = 0; j < 11; j += 1) { +8: for (int32_t k = 0; k < 10; k += 1) { +8: cache[((10 * j) + k)] = A[((25 * j) + ((750 * po0) + k))]; +8: }; +8: }; +8: }; +8: for (int32_t i = 0; i < 10; i += 1) { +8: for (int32_t j = 0; j < 10; j += 1) { +8: C[((10 * i) + ((100 * po0) + j))] = (((i < 9)) ? (cache[((10 * i) + j)] + cache[(10 + ((10 * i) + j))]) : 0); +8: }; +8: }; +8: }; +8: cinn_buffer_free((void*)(0), _cache); +8: cinn_buffer_free((void*)(0), _C); +8: } +8: +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 30000 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4000 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 440 +8: [ OK ] ComputeAt.level0 (56 ms) +8: [ RUN ] ComputeAt.level1 +8: I0924 13:32:18.474115 27796 stage_test.cc:250] fn: +8: function fn (_A, _B, _cache, _C) +8: { +8: for (po0, 10) +8: { +8: for (po1, 10) +8: { +8: if (((((po0 >= 0) and (po0 <= 9)) and (po1 >= 0)) and (po1 <= 9))) { +8: for (i, (1 + int32((1 + (po0 - cinn_max(0, (po0 - 1))))))) +8: { +8: cache[i, 0] = A[(i + cinn_max(0, (po0 - 1))), po1] +8: } +8: } +8: C[po0, po1] = select((po0 < 10), (cache[-1, 0] + (cache[0, 0] + (cache[1, 0] + B[po0, po1]))), 0) +8: } +8: } +8: } +8: I0924 13:32:18.474181 27796 stage_test.cc:276] source: +8: #include +8: #include +8: +8: void fn(void* _args, int32_t num_args) +8: { +8: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +8: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +8: cinn_buffer_t* _cache = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +8: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +8: cinn_buffer_malloc((void*)(0), _cache); +8: cinn_buffer_malloc((void*)(0), _C); +8: const float* A = ((const float*)(_A->memory)); +8: const float* B = ((const float*)(_B->memory)); +8: float* C = ((float*)(_C->memory)); +8: float* cache = ((float*)(_cache->memory)); +8: for (int32_t po0 = 0; po0 < 10; po0 += 1) { +8: for (int32_t po1 = 0; po1 < 10; po1 += 1) { +8: if (((((po0 >= 0) && (po0 <= 9)) && (po1 >= 0)) && (po1 <= 9))) { +8: for (int32_t i = 0; i < (1 + ((int32_t)((1 + (po0 - cinn_max(0, (po0 - 1))))))); i += 1) { +8: cache[i] = A[((200 * i) + ((200 * cinn_max(0, (po0 - 1))) + po1))]; +8: }; +8: }; +8: C[((10 * po0) + po1)] = (((po0 < 10)) ? (cache[-1] + (cache[0] + (cache[1] + B[((200 * po0) + po1)]))) : 0); +8: }; +8: }; +8: cinn_buffer_free((void*)(0), _cache); +8: cinn_buffer_free((void*)(0), _C); +8: } +8: +8: [ OK ] ComputeAt.level1 (35 ms) +8: [ RUN ] ComputeAt.simple +8: I0924 13:32:18.531344 27796 stage_test.cc:313] fn: +8: function fn (_A, _A1, _B) +8: { +8: for (po0, 2) +8: { +8: for (po1, 16) +8: { +8: if (((((po1 >= 0) and (((16 * po0) + po1) >= 0)) and (po1 <= 15)) and (((16 * po0) + po1) <= 31))) { +8: for (i, 3) +8: { +8: for (j, 32) +8: { +8: A1[i, j] = A[(i + ((16 * po0) + po1)), j] +8: } +8: } +8: } +8: for (i, 32) +8: { +8: B[((16 * po0) + po1), i] = (A1[0, i] + (A1[1, i] + A1[2, i])) +8: } +8: } +8: } +8: } +8: I0924 13:32:18.531433 27796 stage_test.cc:346] source: +8: #include +8: #include +8: +8: void fn(void* _args, int32_t num_args) +8: { +8: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +8: cinn_buffer_t* _A1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +8: cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +8: cinn_buffer_malloc((void*)(0), _A1); +8: cinn_buffer_malloc((void*)(0), _B); +8: const float* A = ((const float*)(_A->memory)); +8: float* A1 = ((float*)(_A1->memory)); +8: float* B = ((float*)(_B->memory)); +8: for (int32_t po0 = 0; po0 < 2; po0 += 1) { +8: for (int32_t po1 = 0; po1 < 16; po1 += 1) { +8: if (((((po1 >= 0) && (((16 * po0) + po1) >= 0)) && (po1 <= 15)) && (((16 * po0) + po1) <= 31))) { +8: for (int32_t i = 0; i < 3; i += 1) { +8: for (int32_t j = 0; j < 32; j += 1) { +8: A1[((32 * i) + j)] = A[((64 * i) + ((1024 * po0) + ((64 * po1) + j)))]; +8: }; +8: }; +8: }; +8: for (int32_t i = 0; i < 32; i += 1) { +8: B[((512 * po0) + ((32 * po1) + i))] = (A1[i] + (A1[(32 + i)] + A1[(64 + i)])); +8: }; +8: }; +8: }; +8: cinn_buffer_free((void*)(0), _A1); +8: cinn_buffer_free((void*)(0), _B); +8: } +8: +8: [ OK ] ComputeAt.simple (60 ms) +8: [ RUN ] ComputeAt.Before1 +8: E0924 13:32:18.536700 27796 stage.cc:195] ComputeAt: transformed has no access to cache, skipped it +8: I0924 13:32:18.545419 27796 stage_test.cc:373] fn: +8: function fn (_A, _cache, _transformed) +8: { +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: transformed[i, j] = 1 +8: } +8: } +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: cache[i] = A[i, j] +8: } +8: } +8: } +8: E0924 13:32:18.546058 27796 stage.cc:195] ComputeAt: transformed has no access to cache, skipped it +8: [ OK ] ComputeAt.Before1 (18 ms) +8: [----------] 5 tests from ComputeAt (195 ms total) +8: +8: [----------] 2 tests from Fuse +8: [ RUN ] Fuse.jit_precision_test +8: I0924 13:32:18.560876 27796 stage_test.cc:444] fn: +8: function fn (_A, _B, _C) +8: { +8: for (i_j_fused, 1200) +8: { +8: C[(i_j_fused / 40), (i_j_fused % 40)] = (A[(i_j_fused / 40), (i_j_fused % 40)] + B[(i_j_fused / 40), (i_j_fused % 40)]) +8: } +8: } +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: I0924 13:32:18.581537 27796 stage_test.cc:466] 0.566904 +8: I0924 13:32:18.581550 27796 stage_test.cc:466] 0.818275 +8: I0924 13:32:18.581553 27796 stage_test.cc:466] 1.25823 +8: I0924 13:32:18.581555 27796 stage_test.cc:466] 0.186051 +8: [ OK ] Fuse.jit_precision_test (27 ms) +8: [ RUN ] Fuse.jit_precision_test2 +8: I0924 13:32:18.590202 27796 stage_test.cc:444] fn: +8: function fn (_A, _B, _C) +8: { +8: for (i_outer_i_inner_fused, 30) +8: { +8: for (j, 40) +8: { +8: C[i_outer_i_inner_fused, j] = (A[i_outer_i_inner_fused, j] + B[i_outer_i_inner_fused, j]) +8: } +8: } +8: } +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: I0924 13:32:18.610214 27796 stage_test.cc:466] 0.988877 +8: I0924 13:32:18.610225 27796 stage_test.cc:466] 0.858201 +8: I0924 13:32:18.610227 27796 stage_test.cc:466] 0.573442 +8: I0924 13:32:18.610230 27796 stage_test.cc:466] 0.873411 +8: [ OK ] Fuse.jit_precision_test2 (29 ms) +8: [----------] 2 tests from Fuse (56 ms total) +8: +8: [----------] 1 test from Tile +8: [ RUN ] Tile.jit_precision_test +8: I0924 13:32:18.633318 27796 stage_test.cc:444] fn: +8: function fn (_A, _B, _C) +8: { +8: for (i_outer, 8) +8: { +8: for (i_inner, (1 + int32(cinn_min(3, (29 + (-4 * i_outer)))))) +8: { +8: for (j_outer, 10) +8: { +8: for (j_inner, 4) +8: { +8: C[((4 * i_outer) + i_inner), ((4 * j_outer) + j_inner)] = (A[((4 * i_outer) + i_inner), ((4 * j_outer) + j_inner)] + B[((4 * i_outer) + i_inner), ((4 * j_outer) + j_inner)]) +8: } +8: } +8: } +8: } +8: } +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: I0924 13:32:18.655704 27796 stage_test.cc:466] 1.83821 +8: I0924 13:32:18.655715 27796 stage_test.cc:466] 0.631529 +8: I0924 13:32:18.655719 27796 stage_test.cc:466] 0.886526 +8: I0924 13:32:18.655721 27796 stage_test.cc:466] 1.68606 +8: [ OK ] Tile.jit_precision_test (45 ms) +8: [----------] 1 test from Tile (45 ms total) +8: +8: [----------] 1 test from Reorder +8: [ RUN ] Reorder.jit_precision_test +8: I0924 13:32:18.662151 27796 stage_test.cc:444] fn: +8: function fn (_A, _B, _C) +8: { +8: for (j, 40) +8: { +8: for (i, 30) +8: { +8: C[i, j] = (A[i, j] + B[i, j]) +8: } +8: } +8: } +15: [==========] Running 4 tests from 1 test case. +15: [----------] Global test environment set-up. +15: [----------] 4 tests from Tensor +15: [ RUN ] Tensor.inlined +15: output: +15: function func_C (_A, _B, _D) +15: { +15: for (i, 100) +15: { +15: for (j, 20) +15: { +15: D[i, j] = (1 + ((2 * A[i, j]) + (2 * B[i, j]))) +15: } +15: } +15: } +15: [ OK ] Tensor.inlined (8 ms) +15: [ RUN ] Tensor.IsDependOnStatement +15: [ OK ] Tensor.IsDependOnStatement (0 ms) +15: [ RUN ] Tensor.Reshape +15: WARNING: Logging before InitGoogleLogging() is written to STDERR +15: I0924 13:32:18.375077 27803 tensor_test.cc:85] source: +15: #include +15: #include +15: +15: void fn(void* _args, int32_t num_args) +15: { +15: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +15: cinn_buffer_t* _tensor = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +15: cinn_buffer_malloc((void*)(0), _tensor); +15: const float* A_reshape = ((const float*)(_A->memory)); +15: float* tensor = ((float*)(_tensor->memory)); +15: for (int32_t i = 0; i < 10; i += 1) { +15: for (int32_t j = 0; j < 10; j += 1) { +15: for (int32_t k = 0; k < 100; k += 1) { +15: tensor[((1000 * i) + ((100 * j) + k))] = (2 * A_reshape[((1000 * i) + ((100 * j) + k))]); +15: }; +15: }; +15: }; +15: cinn_buffer_free((void*)(0), _tensor); +15: } +15: +15: [ OK ] Tensor.Reshape (12 ms) +15: [ RUN ] Tensor.ReshapeCopied +15: I0924 13:32:18.391335 27803 tensor_test.cc:131] source: +15: #include +15: #include +15: +15: void fn(void* _args, int32_t num_args) +15: { +15: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +15: cinn_buffer_t* _tensor = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +15: cinn_buffer_malloc((void*)(0), _tensor); +15: const float* A = ((const float*)(_A->memory)); +15: float* A_copied = ((float*)(_A_copied_reshape->memory)); +15: const float* A_copied_reshape = ((const float*)(_A_copied_reshape->memory)); +15: float* tensor = ((float*)(_tensor->memory)); +15: for (int32_t i = 0; i < 100; i += 1) { +15: for (int32_t j = 0; j < 100; j += 1) { +15: A_copied[((100 * i) + j)] = A[((100 * i) + j)]; +15: }; +15: }; +15: for (int32_t i = 0; i < 10; i += 1) { +15: for (int32_t j = 0; j < 10; j += 1) { +15: for (int32_t k = 0; k < 100; k += 1) { +15: tensor[((1000 * i) + ((100 * j) + k))] = (2 * A_copied_reshape[((1000 * i) + ((100 * j) + k))]); +15: }; +15: }; +15: }; +15: cinn_buffer_free((void*)(0), _tensor); +15: } +15: +15: [ OK ] Tensor.ReshapeCopied (16 ms) +15: [----------] 4 tests from Tensor (36 ms total) +15: +15: [----------] Global test environment tear-down +15: [==========] 4 tests from 1 test case ran. (36 ms total) +15: [ PASSED ] 4 tests. +16: [==========] Running 6 tests from 1 test case. +16: [----------] Global test environment set-up. +16: [----------] 6 tests from CodeGenC +16: [ RUN ] CodeGenC.module +16: WARNING: Logging before InitGoogleLogging() is written to STDERR +16: I0924 13:32:18.357357 27804 codegen_c_test.cc:46] C.body: (A[i, j] + B[i, j]) +16: codegen C: +16: #include +16: #include +16: +16: cinn_buffer_t* _C = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 100, 20 }, 32/*align*/); +16: void add1(void* _args, int32_t num_args) +16: { +16: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +16: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +16: cinn_buffer_malloc((void*)(0), _C); +16: const float* A = ((const float*)(_A->memory)); +16: const float* B = ((const float*)(_B->memory)); +16: float* C = ((float*)(_C->memory)); +16: for (int32_t i = 0; i < 100; i += 1) { +16: for (int32_t j = 0; j < 20; j += 1) { +16: C[((20 * i) + j)] = (A[((20 * i) + j)] + B[((20 * i) + j)]); +16: }; +16: }; +16: cinn_buffer_free((void*)(0), _C); +16: } +16: +16: +16: header: +16: #ifndef _MODULE1_CINN_H_ +16: #define _MODULE1_CINN_H_ +16: +16: #include +16: #include +16: +16: void add1(void* _args, int32_t num_args); +16: +16: +16: #endif // _MODULE1_CINN_H_ +16: +16: W0924 13:32:18.365751 27804 codegen_c.cc:24] Output C header to file ./generated_module1.h +16: W0924 13:32:18.366384 27804 codegen_c.cc:33] Output C source to file ./generated_module1.cc +16: [ OK ] CodeGenC.module (9 ms) +16: [ RUN ] CodeGenC.module_with_transform +16: I0924 13:32:18.403774 27804 codegen_c_test.cc:150] func: +16: function add1 (_A, _B, _C, _D) +16: { +16: for (i_outer, 25) +16: { +16: for (i_inner, 4) +16: { +16: for (j, 20) +16: { +16: C[((4 * i_outer) + i_inner), j] = (1 + ((3 * A[((4 * i_outer) + i_inner), j]) + B[((4 * i_outer) + i_inner), j])) +16: } +16: } +16: } +16: for (i_outer, 25) +16: { +16: for (i_inner, 4) +16: { +16: for (j_outer, 2) +16: { +16: for (j_inner, (1 + int32(cinn_min(15, (19 + (-16 * j_outer)))))) +16: { +16: D[((4 * i_outer) + i_inner), ((16 * j_outer) + j_inner)] = ((2 * C[((4 * i_outer) + i_inner), ((16 * j_outer) + j_inner)]) + (4 * (C[((4 * i_outer) + i_inner), ((16 * j_outer) + j_inner)] * A[((4 * i_outer) + i_inner), ((16 * j_outer) + j_inner)]))) +16: } +16: } +16: } +16: } +16: } +16: codegen C: +16: #include +16: #include +16: +16: cinn_buffer_t* _C = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 100, 20 }, 32/*align*/); +16: void add1(void* _args, int32_t num_args) +16: { +16: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +16: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +16: cinn_buffer_t* _D = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +16: cinn_buffer_malloc((void*)(0), _C); +16: cinn_buffer_malloc((void*)(0), _D); +16: const float* A = ((const float*)(_A->memory)); +16: const float* B = ((const float*)(_B->memory)); +16: float* C = ((float*)(_C->memory)); +16: float* D = ((float*)(_D->memory)); +16: for (int32_t i_outer = 0; i_outer < 25; i_outer += 1) { +16: for (int32_t i_inner = 0; i_inner < 4; i_inner += 1) { +16: for (int32_t j = 0; j < 20; j += 1) { +16: C[((20 * i_inner) + ((80 * i_outer) + j))] = (1 + ((3 * A[((20 * i_inner) + ((80 * i_outer) + j))]) + B[((20 * i_inner) + ((80 * i_outer) + j))])); +16: }; +16: }; +16: }; +16: for (int32_t i_outer = 0; i_outer < 25; i_outer += 1) { +16: for (int32_t i_inner = 0; i_inner < 4; i_inner += 1) { +16: for (int32_t j_outer = 0; j_outer < 2; j_outer += 1) { +16: for (int32_t j_inner = 0; j_inner < (1 + ((int32_t)(cinn_min(15, (19 + (-16 * j_outer)))))); j_inner += 1) { +16: D[((20 * i_inner) + ((80 * i_outer) + ((16 * j_outer) + j_inner)))] = ((2 * C[((20 * i_inner) + ((80 * i_outer) + ((16 * j_outer) + j_inner)))]) + (4 * (C[((20 * i_inner) + ((80 * i_outer) + ((16 * j_outer) + j_inner)))] * A[((20 * i_inner) + ((80 * i_outer) + ((16 * j_outer) + j_inner)))]))); +16: }; +16: }; +16: }; +16: }; +16: cinn_buffer_free((void*)(0), _C); +16: cinn_buffer_free((void*)(0), _D); +16: } +16: +16: +16: [ OK ] CodeGenC.module_with_transform (47 ms) +16: [ RUN ] CodeGenC.matmul +16: I0924 13:32:18.429903 27804 codegen_c_test.cc:238] C.body: matmul(Tensor(A, [100,20]), Tensor(B, [20,50]), Tensor(C, [100,50])) +16: f +16: function main (_A, _B, _C) +16: { +16: matmul(_Buffer_(_A), _Buffer_(_B), _Buffer_(_C)) +16: } +16: codegen C: +16: #include +16: #include +16: +16: cinn_buffer_t* _C = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 100, 50 }); +16: void matmul(void* _args, int32_t num_args) +16: { +16: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +16: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +16: cinn_buffer_malloc((void*)(0), _C); +16: const float* A = ((const float*)(_A->memory)); +16: const float* B = ((const float*)(_B->memory)); +16: float* C = ((float*)(_C->memory)); +16: float* C_init = ((float*)(_C->memory)); +16: for (int32_t i = 0; i < 100; i += 1) { +16: for (int32_t j = 0; j < 50; j += 1) { +16: C_init[((50 * i) + j)] = 0; +16: for (int32_t k0 = 0; k0 < 20; k0 += 1) { +16: C[((50 * i) + j)] = (C[((50 * i) + j)] + (A[((20 * i) + k0)] * B[((50 * k0) + j)])); +16: }; +16: }; +16: }; +16: cinn_buffer_free((void*)(0), _C); +16: } +16: +16: void main(void* _args, int32_t num_args) +16: { +16: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +16: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +16: cinn_buffer_malloc((void*)(0), _C); +16: const float* A = ((const float*)(_A->memory)); +16: const float* B = ((const float*)(_B->memory)); +16: float* C = ((float*)(_C->memory)); +16: { +16: cinn_pod_value_t _pod_val_; +16: buffer_p_to_cinn_pod_value(_A, &_pod_val_); +16: cinn_pod_value_t _pod_val__0; +16: buffer_p_to_cinn_pod_value(_B, &_pod_val__0); +16: cinn_pod_value_t _pod_val__1; +16: buffer_p_to_cinn_pod_value(_C, &_pod_val__1); +16: cinn_pod_value_t _pod_arr[3]; +16: cinn_args_construct(_pod_arr, 3, &_pod_val_, &_pod_val__0, &_pod_val__1); +16: matmul(_pod_arr, 5); +16: }; +16: cinn_buffer_free((void*)(0), _C); +16: } +16: +16: +16: [ OK ] CodeGenC.matmul (20 ms) +16: [ RUN ] CodeGenC.matmul_tile +16: codegen C: +16: #include +16: #include +16: +16: cinn_buffer_t* _C = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 100, 500 }, 32/*align*/); +16: void matmul(void* _args, int32_t num_args) +16: { +16: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +16: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +16: cinn_buffer_malloc((void*)(0), _C); +16: const float* A = ((const float*)(_A->memory)); +16: const float* B = ((const float*)(_B->memory)); +16: float* C = ((float*)(_C->memory)); +16: float* C_init = ((float*)(_C->memory)); +16: for (int32_t i_outer = 0; i_outer < 4; i_outer += 1) { +16: for (int32_t j_outer = 0; j_outer < 16; j_outer += 1) { +16: for (int32_t i_inner = 0; i_inner < (1 + ((int32_t)(cinn_min(31, (99 + (-32 * i_outer)))))); i_inner += 1) { +16: for (int32_t j_inner = 0; j_inner < (1 + ((int32_t)(cinn_min(31, (499 + (-32 * j_outer)))))); j_inner += 1) { +16: C_init[((500 * i_inner) + ((16000 * i_outer) + ((32 * j_outer) + j_inner)))] = 0; +16: for (int32_t k0_outer = 0; k0_outer < 50; k0_outer += 1) { +16: for (int32_t k0_inner = 0; k0_inner < 4; k0_inner += 1) { +16: C[((500 * i_inner) + ((16000 * i_outer) + ((32 * j_outer) + j_inner)))] = (C[((500 * i_inner) + ((16000 * i_outer) + ((32 * j_outer) + j_inner)))] + (A[((200 * i_inner) + ((6400 * i_outer) + ((4 * k0_outer) + k0_inner)))] * B[((32 * j_outer) + ((500 * k0_inner) + ((2000 * k0_outer) + j_inner)))])); +16: }; +16: }; +16: }; +16: }; +16: }; +16: }; +16: cinn_buffer_free((void*)(0), _C); +16: } +16: +16: +16: [ OK ] CodeGenC.matmul_tile (116 ms) +16: [ RUN ] CodeGenC.matmul_packed +16: codegen C: +16: #include +16: #include +16: +16: cinn_buffer_t* _C = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 100, 500 }, 32/*align*/); +16: cinn_buffer_t* _PackedB = cinn_buffer_t::new_((cinn_device_kind_t)(0)/*target*/, cinn_float32_t(), { 15, 200, 32 }, 32/*align*/); +16: void matmul_with_packing(void* _args, int32_t num_args) +16: { +16: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +16: cinn_buffer_t* _PackedB = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +16: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +16: cinn_buffer_malloc((void*)(0), _PackedB); +16: cinn_buffer_malloc((void*)(0), _C); +16: const float* A = ((const float*)(_A->memory)); +16: const float* B = ((const float*)(_B->memory)); +16: float* C = ((float*)(_C->memory)); +16: float* PackedB = ((float*)(_PackedB->memory)); +16: for (int32_t i = 0; i < 15; i += 1) { +16: for (int32_t j = 0; j < 200; j += 1) { +16: for (int32_t k = 0; k < 32; k += 1) { +16: PackedB[((6400 * i) + ((32 * j) + k))] = B[((32 * i) + ((500 * j) + k))]; +16: }; +16: }; +16: }; +16: for (int32_t i_outer = 0; i_outer < 4; i_outer += 1) { +16: for (int32_t j_outer = 0; j_outer < 16; j_outer += 1) { +16: for (int32_t i_inner = 0; i_inner < (1 + ((int32_t)(cinn_min(31, (99 + (-32 * i_outer)))))); i_inner += 1) { +16: for (int32_t j_inner = 0; j_inner < (1 + ((int32_t)(cinn_min(31, (499 + (-32 * j_outer)))))); j_inner += 1) { +16: for (int32_t k0_outer = 0; k0_outer < 50; k0_outer += 1) { +16: for (int32_t k0_inner = 0; k0_inner < 4; k0_inner += 1) { +16: C[((500 * i_inner) + ((16000 * i_outer) + ((32 * j_outer) + j_inner)))] = (A[((200 * i_inner) + ((6400 * i_outer) + ((4 * k0_outer) + k0_inner)))] * PackedB[((j_inner % 32) + ((6400 * (j_inner / 32)) + ((6400 * j_outer) + ((32 * k0_inner) + (128 * k0_outer)))))]); +16: }; +16: }; +16: }; +16: }; +16: }; +16: }; +16: cinn_buffer_free((void*)(0), _PackedB); +16: cinn_buffer_free((void*)(0), _C); +16: } +16: +16: +16: [ OK ] CodeGenC.matmul_packed (66 ms) +16: [ RUN ] CodeGenC.call_extern +16: codegen C: +16: #include +16: #include +16: +16: void yy(void* _args, int32_t num_args) +16: { +16: cinn_buffer_t* _y = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +16: cinn_buffer_malloc((void*)(0), _y); +16: cinn_buffer_malloc((void*)(0), _x); +16: const float* x = ((const float*)(_x->memory)); +16: float* y = ((float*)(_y->memory)); +16: for (int32_t i = 0; i < 100; i += 1) { +16: y[i] = cinn_cpu_tanh_fp32(x[i]); +16: }; +16: cinn_buffer_free((void*)(0), _y); +16: } +16: +16: +16: [ OK ] CodeGenC.call_extern (3 ms) +16: [----------] 6 tests from CodeGenC (261 ms total) +16: +16: [----------] Global test environment tear-down +16: [==========] 6 tests from 1 test case ran. (261 ms total) +16: [ PASSED ] 6 tests. +10/68 Test #14: test_buffer ............................ Passed 0.03 sec +11/68 Test #13: test_collect_ir_nodes .................. Passed 0.13 sec +12/68 Test #12: test_host_intrinsics ................... Passed 0.13 sec +13/68 Test #10: test_cinn_runtime ...................... Passed 0.23 sec +14/68 Test #9: test_compute_at_transform .............. Passed 0.23 sec +15/68 Test #15: test_tensor ............................ Passed 0.33 sec +16/68 Test #16: test_codegen_c ......................... Passed 0.33 sec +test 17 + Start 17: test_codegen_c_x86 + +17: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/test_codegen_c_x86 "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +17: Test timeout computed to be: 600 +test 18 + Start 18: test_generated1 + +18: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/test_generated1 "" +18: Test timeout computed to be: 600 +test 19 + Start 19: test_compiler + +19: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/test_compiler "" +19: Test timeout computed to be: 600 +test 20 + Start 20: test_codegen_llvm + +20: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/llvm/test_codegen_llvm "" +20: Test timeout computed to be: 600 +test 21 + Start 21: test_execution_engine + +21: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/llvm/test_execution_engine "" +21: Test timeout computed to be: 600 +test 22 + Start 22: test_codegen_x86 + +22: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/backends/llvm/test_codegen_x86 "" +22: Test timeout computed to be: 600 +test 23 + Start 23: test_compute + +23: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/lang/test_compute "" +23: Test timeout computed to be: 600 +53: W0924 13:32:18.485375 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_array_packing.h +53: W0924 13:32:18.496706 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_array_packing.cc +53: [ OK ] matmul.ArrayPacking (68 ms) +53: [ RUN ] matmul.varient_shape +53: W0924 13:32:18.512287 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_varient_shape.h +53: W0924 13:32:18.513252 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_varient_shape.cc +53: I0924 13:32:18.549512 27786 test02_matmul_main.cc:203] func function matmul_dynamic_shape_tile (M, _A, _B, _C) +53: { +53: for (i, int32(M)) +53: { +53: for (j, 1024) +53: { +53: C_init_4[i, j] = 0 +53: } +53: } +53: for (i_outer, (1 + int32(((-1 + M) / 32)))) +53: { +53: for (i_inner, (1 + int32(cinn_min(31, (-1 + (M + (-32 * i_outer))))))) +53: { +53: for (j_outer, 32) +53: { +53: for (j_inner, 32) +53: { +53: for (k0, 1024) +53: { +53: C[((32 * i_outer) + i_inner), ((32 * j_outer) + j_inner)] = (C[((32 * i_outer) + i_inner), ((32 * j_outer) + j_inner)] + (A[((32 * i_outer) + i_inner), k0] * B[k0, ((32 * j_outer) + j_inner)])) +53: } +53: } +53: } +53: } +53: } +53: } +53: W0924 13:32:18.550587 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_varient_shape_tile.h +53: W0924 13:32:18.555127 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_varient_shape_tile.cc +53: [ OK ] matmul.varient_shape (59 ms) +53: [ RUN ] matmul.ArrayPacking_dynamic_shape +53: W0924 13:32:18.622040 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_array_packing_dynamic_shape.h +53: W0924 13:32:18.633481 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_array_packing_dynamic_shape.cc +53: [ OK ] matmul.ArrayPacking_dynamic_shape (78 ms) +53: [ RUN ] matmul.call +53: I0924 13:32:18.647380 27786 test02_matmul_main.cc:284] stage domain: { C[i, j, k0] : 0 <= i <= 1023 and 0 <= j <= 1023 and 0 <= k0 <= 1023 } +53: W0924 13:32:18.659453 27786 codegen_c.cc:24] Output C header to file ./test02_matmul_call.h +53: W0924 13:32:18.660598 27786 codegen_c.cc:33] Output C source to file ./test02_matmul_call.cc +53: [ OK ] matmul.call (27 ms) +53: [----------] 8 tests from matmul (449 ms total) +53: +53: [----------] Global test environment tear-down +53: [==========] 9 tests from 2 test cases ran. (501 ms total) +53: [ PASSED ] 9 tests. +22: [==========] Running 1 test from 1 test case. +22: [----------] Global test environment set-up. +22: [----------] 1 test from Vectorize +22: [ RUN ] Vectorize.basic +22: WARNING: Logging before InitGoogleLogging() is written to STDERR +22: I0924 13:32:18.694113 27810 codegen_x86_test.cc:25] fn: function fn (_A, _B, _tensor) +22: { +22: for (i, 128) +22: { +22: tensor[Ramp((8 * i),1,8)] = (A[Ramp((8 * i),1,8)] + B[Ramp((8 * i),1,8)]) +22: } +22: } +22: I0924 13:32:18.694416 27810 codegen_x86_test.cc:32] +22: function fn (_A, _B, _tensor) +22: { +22: for (i, 128) +22: { +22: tensor[Ramp((8 * i),1,8)] = (A[Ramp((8 * i),1,8)] + B[Ramp((8 * i),1,8)]) +22: } +22: } +22: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +22: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +22: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +22: [ OK ] Vectorize.basic (61 ms) +22: [----------] 1 test from Vectorize (61 ms total) +22: +22: [----------] Global test environment tear-down +22: [==========] 1 test from 1 test case ran. (62 ms total) +22: [ PASSED ] 1 test. +21: [==========] Running 4 tests from 3 test cases. +21: [----------] Global test environment set-up. +21: [----------] 1 test from llvm_test01 +21: [ RUN ] llvm_test01.elementwise_add +21: [ OK ] llvm_test01.elementwise_add (0 ms) +21: [----------] 1 test from llvm_test01 (0 ms total) +21: +21: [----------] 1 test from llvm +21: [ RUN ] llvm.module_call_lowered_func +21: WARNING: Logging before InitGoogleLogging() is written to STDERR +21: I0924 13:32:18.698501 27809 execution_engine_test.cc:165] module: +21: #include +21: #include +21: +21: void elementwise_add(void* _args, int32_t num_args) +21: { +21: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +21: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +21: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +21: cinn_buffer_malloc((void*)(0), _C); +21: const float* A = ((const float*)(_A->memory)); +21: const float* B = ((const float*)(_B->memory)); +21: float* C = ((float*)(_C->memory)); +21: for (int32_t i = 0; i < 100; i += 1) { +21: for (int32_t j = 0; j < 32; j += 1) { +21: C[((32 * i) + j)] = (A[((32 * i) + j)] + B[((32 * i) + j)]); +21: }; +21: }; +21: cinn_buffer_free((void*)(0), _C); +21: } +21: +21: void main(void* _args, int32_t num_args) +21: { +21: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +21: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +21: cinn_buffer_t* _c_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +21: cinn_buffer_malloc((void*)(0), _c_out); +21: const float* A = ((const float*)(_A->memory)); +21: const float* B = ((const float*)(_B->memory)); +21: float* c_out = ((float*)(_c_out->memory)); +21: { +21: cinn_pod_value_t _pod_val_; +21: buffer_p_to_cinn_pod_value(_A, &_pod_val_); +21: cinn_pod_value_t _pod_val__0; +21: buffer_p_to_cinn_pod_value(_B, &_pod_val__0); +21: cinn_pod_value_t _pod_val__1; +21: buffer_p_to_cinn_pod_value(_c_out, &_pod_val__1); +21: cinn_pod_value_t _pod_arr[3]; +21: cinn_args_construct(_pod_arr, 3, &_pod_val_, &_pod_val__0, &_pod_val__1); +21: elementwise_add(_pod_arr, 5); +21: }; +21: cinn_buffer_free((void*)(0), _c_out); +21: } +21: +21: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +21: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +21: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +21: I0924 13:32:18.700588 27809 execution_engine_test.cc:172] JIT Link the module +21: I0924 13:32:18.709628 27809 execution_engine_test.cc:175] => LLVM JIT cos(0) = 1 +21: I0924 13:32:18.726971 27809 execution_engine_test.cc:178] JIT get elementwise_add_addr +21: [ OK ] llvm.module_call_lowered_func (39 ms) +21: [----------] 1 test from llvm (39 ms total) +21: +21: [----------] 2 tests from ExecutionEngine +21: [ RUN ] ExecutionEngine.custom_runtime_symbols +21: [ OK ] ExecutionEngine.custom_runtime_symbols (1 ms) +21: [ RUN ] ExecutionEngine.call_extern +21: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +21: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +21: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +21: [ OK ] ExecutionEngine.call_extern (31 ms) +21: [----------] 2 tests from ExecutionEngine (32 ms total) +21: +21: [----------] Global test environment tear-down +21: [==========] 4 tests from 3 test cases ran. (71 ms total) +21: [ PASSED ] 4 tests. +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.ceil (28 ms) +11: [ RUN ] mkl_math.round +11: I0924 13:32:18.596024 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_round_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_round_fp32_out[i, j] = cinn_cpu_round_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.round (29 ms) +11: [ RUN ] mkl_math.trunc +11: I0924 13:32:18.624581 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_trunc_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_trunc_fp32_out[i, j] = cinn_cpu_trunc_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.trunc (29 ms) +11: [ RUN ] mkl_math.cos +11: I0924 13:32:18.653442 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_cos_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_cos_fp32_out[i, j] = cinn_cpu_cos_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.cos (28 ms) +11: [ RUN ] mkl_math.cosh +11: I0924 13:32:18.682230 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_cosh_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_cosh_fp32_out[i, j] = cinn_cpu_cosh_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.cosh (30 ms) +11: [ RUN ] mkl_math.tan +11: I0924 13:32:18.711864 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_tan_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_tan_fp32_out[i, j] = cinn_cpu_tan_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.tan (29 ms) +11: [ RUN ] mkl_math.sin +11: I0924 13:32:18.740679 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_sin_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_sin_fp32_out[i, j] = cinn_cpu_sin_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.sin (29 ms) +11: [ RUN ] mkl_math.sinh +11: I0924 13:32:18.769448 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_sinh_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_sinh_fp32_out[i, j] = cinn_cpu_sinh_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.sinh (28 ms) +11: [ RUN ] mkl_math.acos +11: I0924 13:32:18.798184 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_acos_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_acos_fp32_out[i, j] = cinn_cpu_acos_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.acos (29 ms) +11: [ RUN ] mkl_math.acosh +11: I0924 13:32:18.826864 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_acosh_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_acosh_fp32_out[i, j] = cinn_cpu_acosh_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.acosh (29 ms) +11: [ RUN ] mkl_math.asin +11: I0924 13:32:18.855567 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_asin_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_asin_fp32_out[i, j] = cinn_cpu_asin_fp32(x[i, j]) +11: } +11: } +11: } +20: [==========] Running 5 tests from 2 test cases. +20: [----------] Global test environment set-up. +20: [----------] 4 tests from CodeGenLLVM +20: [ RUN ] CodeGenLLVM.Imm +20: [ OK ] CodeGenLLVM.Imm (0 ms) +20: [ RUN ] CodeGenLLVM.Expr +20: [ OK ] CodeGenLLVM.Expr (0 ms) +20: [ RUN ] CodeGenLLVM.Statement +20: [ OK ] CodeGenLLVM.Statement (0 ms) +20: [ RUN ] CodeGenLLVM.LowerFunc +20: WARNING: Logging before InitGoogleLogging() is written to STDERR +20: I0924 13:32:18.699189 27808 codegen_llvm_test.cc:457] ir function: function add1 (_a, _b, _c) +20: { +20: for (i, 3) +20: { +20: for (j, 2) +20: { +20: c[i, j] = (a[i, j] + b[i, j]) +20: } +20: } +20: } +20: [ OK ] CodeGenLLVM.LowerFunc (11 ms) +20: [----------] 4 tests from CodeGenLLVM (11 ms total) +20: +20: [----------] 1 test from SymbolTable +20: [ RUN ] SymbolTable.test +20: [ OK ] SymbolTable.test (0 ms) +20: [----------] 1 test from SymbolTable (0 ms total) +20: +20: [----------] Global test environment tear-down +20: [==========] 5 tests from 2 test cases ran. (11 ms total) +20: [ PASSED ] 5 tests. +19: [==========] Running 2 tests from 1 test case. +19: [----------] Global test environment set-up. +19: [----------] 2 tests from Compiler +19: [ RUN ] Compiler.x86 +19: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 800 +19: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 800 +19: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 800 +19: [ OK ] Compiler.x86 (34 ms) +19: [ RUN ] Compiler.sqrt +19: WARNING: Logging before InitGoogleLogging() is written to STDERR +19: I0924 13:32:18.722045 27807 nn.cc:477] kernel_size length is: 2 +19: I0924 13:32:18.722067 27807 nn.cc:478] kernel_size is: 3 +19: I0924 13:32:18.722069 27807 nn.cc:479] padding_size length is: 4 +19: I0924 13:32:18.722071 27807 nn.cc:480] padding_size is: 1 +19: [ OK ] Compiler.sqrt (71 ms) +19: [----------] 2 tests from Compiler (105 ms total) +19: +19: [----------] Global test environment tear-down +19: [==========] 2 tests from 1 test case ran. (105 ms total) +19: [ PASSED ] 2 tests. +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: I0924 13:32:18.682731 27796 stage_test.cc:466] 1.42271 +8: I0924 13:32:18.682749 27796 stage_test.cc:466] 0.919059 +8: I0924 13:32:18.682751 27796 stage_test.cc:466] 0.962657 +8: I0924 13:32:18.682754 27796 stage_test.cc:466] 1.50734 +8: [ OK ] Reorder.jit_precision_test (28 ms) +8: [----------] 1 test from Reorder (28 ms total) +8: +8: [----------] 2 tests from Unroll +8: [ RUN ] Unroll.jit_precision_test +8: I0924 13:32:18.713403 27796 stage_test.cc:444] fn: +8: function fn (_A, _B, _C) +8: { +8: for (i, 30) +8: { +8: { +8: C[i, 0] = (A[i, 0] + B[i, 0]) +8: } +8: { +8: C[i, 1] = (A[i, 1] + B[i, 1]) +8: } +8: { +8: C[i, 2] = (A[i, 2] + B[i, 2]) +8: } +8: { +8: C[i, 3] = (A[i, 3] + B[i, 3]) +8: } +8: { +8: C[i, 4] = (A[i, 4] + B[i, 4]) +8: } +8: { +8: C[i, 5] = (A[i, 5] + B[i, 5]) +8: } +8: { +8: C[i, 6] = (A[i, 6] + B[i, 6]) +8: } +8: { +8: C[i, 7] = (A[i, 7] + B[i, 7]) +8: } +8: { +8: C[i, 8] = (A[i, 8] + B[i, 8]) +8: } +8: { +8: C[i, 9] = (A[i, 9] + B[i, 9]) +8: } +8: { +8: C[i, 10] = (A[i, 10] + B[i, 10]) +8: } +8: { +8: C[i, 11] = (A[i, 11] + B[i, 11]) +8: } +8: { +8: C[i, 12] = (A[i, 12] + B[i, 12]) +8: } +8: { +8: C[i, 13] = (A[i, 13] + B[i, 13]) +8: } +8: { +8: C[i, 14] = (A[i, 14] + B[i, 14]) +8: } +8: { +8: C[i, 15] = (A[i, 15] + B[i, 15]) +8: } +8: { +8: C[i, 16] = (A[i, 16] + B[i, 16]) +8: } +8: { +8: C[i, 17] = (A[i, 17] + B[i, 17]) +8: } +8: { +8: C[i, 18] = (A[i, 18] + B[i, 18]) +8: } +8: { +8: C[i, 19] = (A[i, 19] + B[i, 19]) +8: } +8: { +8: C[i, 20] = (A[i, 20] + B[i, 20]) +8: } +8: { +8: C[i, 21] = (A[i, 21] + B[i, 21]) +8: } +8: { +8: C[i, 22] = (A[i, 22] + B[i, 22]) +8: } +8: { +8: C[i, 23] = (A[i, 23] + B[i, 23]) +8: } +8: { +8: C[i, 24] = (A[i, 24] + B[i, 24]) +8: } +8: { +8: C[i, 25] = (A[i, 25] + B[i, 25]) +8: } +8: { +8: C[i, 26] = (A[i, 26] + B[i, 26]) +8: } +8: { +8: C[i, 27] = (A[i, 27] + B[i, 27]) +8: } +8: { +8: C[i, 28] = (A[i, 28] + B[i, 28]) +8: } +8: { +8: C[i, 29] = (A[i, 29] + B[i, 29]) +8: } +8: { +8: C[i, 30] = (A[i, 30] + B[i, 30]) +8: } +8: { +8: C[i, 31] = (A[i, 31] + B[i, 31]) +8: } +8: { +8: C[i, 32] = (A[i, 32] + B[i, 32]) +8: } +8: { +8: C[i, 33] = (A[i, 33] + B[i, 33]) +8: } +8: { +8: C[i, 34] = (A[i, 34] + B[i, 34]) +8: } +8: { +8: C[i, 35] = (A[i, 35] + B[i, 35]) +8: } +8: { +8: C[i, 36] = (A[i, 36] + B[i, 36]) +8: } +8: { +8: C[i, 37] = (A[i, 37] + B[i, 37]) +8: } +8: { +8: C[i, 38] = (A[i, 38] + B[i, 38]) +8: } +8: { +8: C[i, 39] = (A[i, 39] + B[i, 39]) +8: } +8: } +8: } +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: I0924 13:32:18.747979 27796 stage_test.cc:466] 1.14587 +8: I0924 13:32:18.747992 27796 stage_test.cc:466] 0.950917 +8: I0924 13:32:18.747995 27796 stage_test.cc:466] 1.1457 +8: I0924 13:32:18.747997 27796 stage_test.cc:466] 0.782843 +8: [ OK ] Unroll.jit_precision_test (65 ms) +8: [ RUN ] Unroll.jit_precision_test1 +8: I0924 13:32:18.773546 27796 stage_test.cc:444] fn: +8: function fn (_A, _B, _C) +8: { +8: { +8: for (j, 40) +8: { +8: C[0, j] = (A[0, j] + B[0, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[1, j] = (A[1, j] + B[1, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[2, j] = (A[2, j] + B[2, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[3, j] = (A[3, j] + B[3, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[4, j] = (A[4, j] + B[4, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[5, j] = (A[5, j] + B[5, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[6, j] = (A[6, j] + B[6, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[7, j] = (A[7, j] + B[7, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[8, j] = (A[8, j] + B[8, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[9, j] = (A[9, j] + B[9, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[10, j] = (A[10, j] + B[10, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[11, j] = (A[11, j] + B[11, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[12, j] = (A[12, j] + B[12, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[13, j] = (A[13, j] + B[13, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[14, j] = (A[14, j] + B[14, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[15, j] = (A[15, j] + B[15, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[16, j] = (A[16, j] + B[16, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[17, j] = (A[17, j] + B[17, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[18, j] = (A[18, j] + B[18, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[19, j] = (A[19, j] + B[19, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[20, j] = (A[20, j] + B[20, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[21, j] = (A[21, j] + B[21, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[22, j] = (A[22, j] + B[22, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[23, j] = (A[23, j] + B[23, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[24, j] = (A[24, j] + B[24, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[25, j] = (A[25, j] + B[25, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[26, j] = (A[26, j] + B[26, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[27, j] = (A[27, j] + B[27, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[28, j] = (A[28, j] + B[28, j]) +8: } +8: } +8: { +8: for (j, 40) +8: { +8: C[29, j] = (A[29, j] + B[29, j]) +8: } +8: } +8: } +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +8: I0924 13:32:18.803447 27796 stage_test.cc:466] 0.52927 +8: I0924 13:32:18.803457 27796 stage_test.cc:466] 1.21315 +8: I0924 13:32:18.803459 27796 stage_test.cc:466] 1.52707 +8: I0924 13:32:18.803462 27796 stage_test.cc:466] 0.940553 +8: [ OK ] Unroll.jit_precision_test1 (55 ms) +8: [----------] 2 tests from Unroll (120 ms total) +8: +8: [----------] 2 tests from ComputeInline +8: [ RUN ] ComputeInline.basic +8: I0924 13:32:18.811084 27796 stage_test.cc:531] fn: +8: function fn (_A, _C) +8: { +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: C[i, j] = (6 + (2 * A[i, j])) +8: } +8: } +8: } +8: [ OK ] ComputeInline.basic (8 ms) +8: [ RUN ] ComputeInline.complex_graph +8: I0924 13:32:18.828562 27796 stage_test.cc:575] fn: +8: function fn (_A, _C, _C1, _C2) +8: { +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: C2[i, j] = (6 + (2 * A[i, j])) +8: } +8: } +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: C1[i, j] = (4 + (2 * A[i, j])) +8: } +8: } +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: C[i, j] = (2 + (2 * A[i, j])) +8: } +8: } +8: } +8: [ OK ] ComputeInline.complex_graph (17 ms) +8: [----------] 2 tests from ComputeInline (25 ms total) +8: +8: [----------] 1 test from ShareBufferWith +8: [ RUN ] ShareBufferWith.basic +8: I0924 13:32:18.839054 27796 stage_test.cc:622] fn: +8: function fn (_A, _B) +8: { +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: B[i, j] = (1 + A[i, j]) +8: } +8: } +8: for (i, 100) +8: { +8: for (j, 200) +8: { +8: B1[i, j] = (1 + B[i, j]) +8: } +8: } +8: } +8: I0924 13:32:18.839084 27796 stage_test.cc:630] +8: #include +8: #include +8: +8: void fn(void* _args, int32_t num_args) +8: { +8: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +8: cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +8: cinn_buffer_malloc((void*)(0), _B); +8: const float* A = ((const float*)(_A->memory)); +8: float* B = ((float*)(_B->memory)); +8: float* B1 = ((float*)(_B->memory)); +8: for (int32_t i = 0; i < 100; i += 1) { +8: for (int32_t j = 0; j < 200; j += 1) { +8: B[((200 * i) + j)] = (1 + A[((200 * i) + j)]); +8: }; +8: }; +8: for (int32_t i = 0; i < 100; i += 1) { +8: for (int32_t j = 0; j < 200; j += 1) { +8: B1[((200 * i) + j)] = (1 + B[((200 * i) + j)]); +8: }; +8: }; +8: cinn_buffer_free((void*)(0), _B); +8: } +8: +8: [ OK ] ShareBufferWith.basic (12 ms) +8: [----------] 1 test from ShareBufferWith (12 ms total) +8: +8: [----------] 2 tests from isl +8: [ RUN ] isl.test +8: I0924 13:32:18.840884 27796 stage_test.cc:641] schedule_intersected: [p0, p1] -> { p[i, j] -> p[t0, t1, t2 = j] : p0 = 0 and 2t1 = i and (t0) mod 2 = 0 and 0 <= p1 <= 2 and 4p1 <= i <= 1 + 4p1 and 0 <= j <= 9 + 4p1 - i and 0 <= t0 <= 1 } +8: I0924 13:32:18.841017 27796 stage_test.cc:644] space: [p0, p1] -> { : } +8: I0924 13:32:18.843125 27796 stage_test.cc:648] code: +8: if (p0 == 0 && p1 >= 0 && p1 <= 2) +8: for (int c2 = 0; c2 <= 9; c2 += 1) +8: p(4 * p1, c2); +8: [ OK ] isl.test (3 ms) +8: [ RUN ] isl.test1 +8: I0924 13:32:18.860146 27796 stage_test.cc:687] code: +8: +8: for (int c1 = 0; c1 <= 2; c1 += 1) +8: for (int c2 = 0; c2 <= 9; c2 += 1) +8: for (int c3 = 0; c3 <= 9; c3 += 1) { +8: if (p0 == 0 && p1 <= 2 && c2 == 0 && c3 == 2 * p1) +8: for (int c5 = 0; c5 <= 9; c5 += 1) +8: p(4 * p1, c5); +8: c(4 * c1, c2, c3); +8: } +8: [ OK ] isl.test1 (17 ms) +8: [----------] 2 tests from isl (20 ms total) +8: +8: [----------] Global test environment tear-down +8: [==========] 23 tests from 10 test cases ran. (508 ms total) +8: [ PASSED ] 23 tests. +18: [==========] Running 0 tests from 0 test cases. +18: [==========] 0 tests from 0 test cases ran. (0 ms total) +18: [ PASSED ] 0 tests. +17: [==========] Running 1 test from 1 test case. +17: [----------] Global test environment set-up. +17: [----------] 1 test from CodeGenCX86 +17: [ RUN ] CodeGenCX86.basic +17: before optim +17: { +17: for (i, 100) +17: { +17: for (j, 500) +17: { +17: D[i, j] = (A[i, j] * B[i, j]) +17: } +17: } +17: for (i, 100) +17: { +17: { +17: C[Broadcast(i,16), Ramp(0,1,16)] = (A[i, Ramp(0,1,16)] * B[i, Ramp(0,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(16,1,16)] = (A[i, Ramp(16,1,16)] * B[i, Ramp(16,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(32,1,16)] = (A[i, Ramp(32,1,16)] * B[i, Ramp(32,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(48,1,16)] = (A[i, Ramp(48,1,16)] * B[i, Ramp(48,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(64,1,16)] = (A[i, Ramp(64,1,16)] * B[i, Ramp(64,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(80,1,16)] = (A[i, Ramp(80,1,16)] * B[i, Ramp(80,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(96,1,16)] = (A[i, Ramp(96,1,16)] * B[i, Ramp(96,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(112,1,16)] = (A[i, Ramp(112,1,16)] * B[i, Ramp(112,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(128,1,16)] = (A[i, Ramp(128,1,16)] * B[i, Ramp(128,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(144,1,16)] = (A[i, Ramp(144,1,16)] * B[i, Ramp(144,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(160,1,16)] = (A[i, Ramp(160,1,16)] * B[i, Ramp(160,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(176,1,16)] = (A[i, Ramp(176,1,16)] * B[i, Ramp(176,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(192,1,16)] = (A[i, Ramp(192,1,16)] * B[i, Ramp(192,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(208,1,16)] = (A[i, Ramp(208,1,16)] * B[i, Ramp(208,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(224,1,16)] = (A[i, Ramp(224,1,16)] * B[i, Ramp(224,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(240,1,16)] = (A[i, Ramp(240,1,16)] * B[i, Ramp(240,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(256,1,16)] = (A[i, Ramp(256,1,16)] * B[i, Ramp(256,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(272,1,16)] = (A[i, Ramp(272,1,16)] * B[i, Ramp(272,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(288,1,16)] = (A[i, Ramp(288,1,16)] * B[i, Ramp(288,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(304,1,16)] = (A[i, Ramp(304,1,16)] * B[i, Ramp(304,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(320,1,16)] = (A[i, Ramp(320,1,16)] * B[i, Ramp(320,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(336,1,16)] = (A[i, Ramp(336,1,16)] * B[i, Ramp(336,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(352,1,16)] = (A[i, Ramp(352,1,16)] * B[i, Ramp(352,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(368,1,16)] = (A[i, Ramp(368,1,16)] * B[i, Ramp(368,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(384,1,16)] = (A[i, Ramp(384,1,16)] * B[i, Ramp(384,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(400,1,16)] = (A[i, Ramp(400,1,16)] * B[i, Ramp(400,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(416,1,16)] = (A[i, Ramp(416,1,16)] * B[i, Ramp(416,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(432,1,16)] = (A[i, Ramp(432,1,16)] * B[i, Ramp(432,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(448,1,16)] = (A[i, Ramp(448,1,16)] * B[i, Ramp(448,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(464,1,16)] = (A[i, Ramp(464,1,16)] * B[i, Ramp(464,1,16)]) +17: } +17: { +17: C[Broadcast(i,16), Ramp(480,1,16)] = (A[i, Ramp(480,1,16)] * B[i, Ramp(480,1,16)]) +17: } +17: } +17: } +17: out: +17: #include +17: #include +17: +17: void matmul(void* _args, int32_t num_args) +17: { +17: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +17: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +17: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +17: cinn_buffer_t* _D = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +17: cinn_buffer_malloc((void*)(0), _C); +17: cinn_buffer_malloc((void*)(0), _D); +17: const float* A = ((const float*)(_A->memory)); +17: const float* B = ((const float*)(_B->memory)); +17: float* C = ((float*)(_C->memory)); +17: float* D = ((float*)(_D->memory)); +17: for (int32_t i = 0; i < 100; i += 1) { +17: for (int32_t j = 0; j < 500; j += 1) { +17: D[((500 * i) + j)] = (A[((500 * i) + j)] * B[((500 * i) + j)]); +17: }; +17: }; +17: for (int32_t i = 0; i < 100; i += 1) { +17: cinn_avx512_store(C + (500 * i), cinn_avx512_mul(cinn_avx512_load(A + (500 * i)), cinn_avx512_load(B + (500 * i)))); +17: cinn_avx512_store(C + (16 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (16 + (500 * i))), cinn_avx512_load(B + (16 + (500 * i))))); +17: cinn_avx512_store(C + (32 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (32 + (500 * i))), cinn_avx512_load(B + (32 + (500 * i))))); +17: cinn_avx512_store(C + (48 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (48 + (500 * i))), cinn_avx512_load(B + (48 + (500 * i))))); +17: cinn_avx512_store(C + (64 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (64 + (500 * i))), cinn_avx512_load(B + (64 + (500 * i))))); +17: cinn_avx512_store(C + (80 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (80 + (500 * i))), cinn_avx512_load(B + (80 + (500 * i))))); +17: cinn_avx512_store(C + (96 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (96 + (500 * i))), cinn_avx512_load(B + (96 + (500 * i))))); +17: cinn_avx512_store(C + (112 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (112 + (500 * i))), cinn_avx512_load(B + (112 + (500 * i))))); +17: cinn_avx512_store(C + (128 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (128 + (500 * i))), cinn_avx512_load(B + (128 + (500 * i))))); +17: cinn_avx512_store(C + (144 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (144 + (500 * i))), cinn_avx512_load(B + (144 + (500 * i))))); +17: cinn_avx512_store(C + (160 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (160 + (500 * i))), cinn_avx512_load(B + (160 + (500 * i))))); +17: cinn_avx512_store(C + (176 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (176 + (500 * i))), cinn_avx512_load(B + (176 + (500 * i))))); +17: cinn_avx512_store(C + (192 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (192 + (500 * i))), cinn_avx512_load(B + (192 + (500 * i))))); +17: cinn_avx512_store(C + (208 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (208 + (500 * i))), cinn_avx512_load(B + (208 + (500 * i))))); +17: cinn_avx512_store(C + (224 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (224 + (500 * i))), cinn_avx512_load(B + (224 + (500 * i))))); +17: cinn_avx512_store(C + (240 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (240 + (500 * i))), cinn_avx512_load(B + (240 + (500 * i))))); +17: cinn_avx512_store(C + (256 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (256 + (500 * i))), cinn_avx512_load(B + (256 + (500 * i))))); +17: cinn_avx512_store(C + (272 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (272 + (500 * i))), cinn_avx512_load(B + (272 + (500 * i))))); +17: cinn_avx512_store(C + (288 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (288 + (500 * i))), cinn_avx512_load(B + (288 + (500 * i))))); +17: cinn_avx512_store(C + (304 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (304 + (500 * i))), cinn_avx512_load(B + (304 + (500 * i))))); +17: cinn_avx512_store(C + (320 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (320 + (500 * i))), cinn_avx512_load(B + (320 + (500 * i))))); +17: cinn_avx512_store(C + (336 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (336 + (500 * i))), cinn_avx512_load(B + (336 + (500 * i))))); +17: cinn_avx512_store(C + (352 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (352 + (500 * i))), cinn_avx512_load(B + (352 + (500 * i))))); +17: cinn_avx512_store(C + (368 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (368 + (500 * i))), cinn_avx512_load(B + (368 + (500 * i))))); +17: cinn_avx512_store(C + (384 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (384 + (500 * i))), cinn_avx512_load(B + (384 + (500 * i))))); +17: cinn_avx512_store(C + (400 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (400 + (500 * i))), cinn_avx512_load(B + (400 + (500 * i))))); +17: cinn_avx512_store(C + (416 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (416 + (500 * i))), cinn_avx512_load(B + (416 + (500 * i))))); +17: cinn_avx512_store(C + (432 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (432 + (500 * i))), cinn_avx512_load(B + (432 + (500 * i))))); +17: cinn_avx512_store(C + (448 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (448 + (500 * i))), cinn_avx512_load(B + (448 + (500 * i))))); +17: cinn_avx512_store(C + (464 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (464 + (500 * i))), cinn_avx512_load(B + (464 + (500 * i))))); +17: cinn_avx512_store(C + (480 + (500 * i)), cinn_avx512_mul(cinn_avx512_load(A + (480 + (500 * i))), cinn_avx512_load(B + (480 + (500 * i))))); +17: }; +17: cinn_buffer_free((void*)(0), _C); +17: cinn_buffer_free((void*)(0), _D); +17: } +17: +17: [ OK ] CodeGenCX86.basic (80 ms) +17: [----------] 1 test from CodeGenCX86 (80 ms total) +17: +17: [----------] Global test environment tear-down +17: [==========] 1 test from 1 test case ran. (80 ms total) +17: [ PASSED ] 1 test. +23: [==========] Running 1 test from 1 test case. +23: [----------] Global test environment set-up. +23: [----------] 1 test from Call +23: [ RUN ] Call.basic +23: [ OK ] Call.basic (0 ms) +23: [----------] 1 test from Call (0 ms total) +23: +23: [----------] Global test environment tear-down +23: [==========] 1 test from 1 test case ran. (0 ms total) +23: [ PASSED ] 1 test. +17/68 Test #53: test02_matmul_main ..................... Passed 0.53 sec +18/68 Test #22: test_codegen_x86 ....................... Passed 0.08 sec +19/68 Test #21: test_execution_engine .................. Passed 0.09 sec +20/68 Test #20: test_codegen_llvm ...................... Passed 0.19 sec +21/68 Test #19: test_compiler .......................... Passed 0.19 sec +22/68 Test #8: test_stage ............................. Passed 0.53 sec +23/68 Test #18: test_generated1 ........................ Passed 0.19 sec +24/68 Test #17: test_codegen_c_x86 ..................... Passed 0.19 sec +25/68 Test #23: test_compute ........................... Passed 0.19 sec +test 24 + Start 24: test_placeholder + +24: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/lang/test_placeholder "" +24: Test timeout computed to be: 600 +test 25 + Start 25: test_lower + +25: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/lang/test_lower "" +25: Test timeout computed to be: 600 +test 26 + Start 26: test_lower_impl + +26: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/lang/test_lower_impl "" +26: Test timeout computed to be: 600 +test 27 + Start 27: test_packed_func + +27: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/lang/test_packed_func "" +27: Test timeout computed to be: 600 +test 28 + Start 28: test_remove_nested_block + +28: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_remove_nested_block "" +28: Test timeout computed to be: 600 +test 29 + Start 29: test_ir_copy + +29: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_ir_copy "" +29: Test timeout computed to be: 600 +test 30 + Start 30: test_ir_simplify + +30: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_ir_simplify "" +30: Test timeout computed to be: 600 +test 31 + Start 31: test_replace_call_with_expr + +31: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_replace_call_with_expr "" +31: Test timeout computed to be: 600 +test 32 + Start 32: test_vectorize_loops + +32: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_vectorize_loops "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +32: Test timeout computed to be: 600 +31: [==========] Running 0 tests from 0 test cases. +31: [==========] 0 tests from 0 test cases ran. (0 ms total) +31: [ PASSED ] 0 tests. +30: [==========] Running 2 tests from 2 test cases. +30: [----------] Global test environment set-up. +30: [----------] 1 test from IrSimplify +30: [ RUN ] IrSimplify.basic +30: WARNING: Logging before InitGoogleLogging() is written to STDERR +30: I0924 13:32:18.883857 27818 ir_simplify_test.cc:26] B (((C[i, 0] + 0) + 100) + 24.5) +30: I0924 13:32:18.884227 27818 ir_simplify_test.cc:29] simplified: (124.5 + C[i, 0]) +30: I0924 13:32:18.892341 27818 ir_simplify_test.cc:50] original body: +30: { +30: for (i, 100) +30: { +30: for (j, 20) +30: { +30: B[i, j] = (125 + (X[i, j] + y[i, 0])) +30: } +30: } +30: } +30: I0924 13:32:18.900732 27818 ir_simplify_test.cc:83] original body: +30: { +30: for (i, 100) +30: { +30: for (j, 20) +30: { +30: B[i, j] = (125 + (X[(1000 * i), 0] + (0.333333 * y[i, 0]))) +30: } +30: } +30: } +30: [ OK ] IrSimplify.basic (18 ms) +30: [----------] 1 test from IrSimplify (18 ms total) +30: +30: [----------] 1 test from reverse +30: [ RUN ] reverse.prod +30: I0924 13:32:18.906757 27818 ir_simplify_test.cc:108] fn: +30: function fn (_A, _tensor) +30: { +30: for (i, 100) +30: { +30: for (j, 20) +30: { +30: tensor[i, j] = (1 / A[i, j]) +30: } +30: } +30: } +30: [ OK ] reverse.prod (5 ms) +30: [----------] 1 test from reverse (5 ms total) +30: +30: [----------] Global test environment tear-down +30: [==========] 2 tests from 2 test cases ran. (23 ms total) +30: [ PASSED ] 2 tests. +29: [==========] Running 1 test from 1 test case. +29: [----------] Global test environment set-up. +29: [----------] 1 test from IrCopy +29: [ RUN ] IrCopy.basic +29: WARNING: Logging before InitGoogleLogging() is written to STDERR +29: I0924 13:32:18.883689 27817 ir_copy_test.cc:13] aa 1 +29: [ OK ] IrCopy.basic (0 ms) +29: [----------] 1 test from IrCopy (0 ms total) +29: +29: [----------] Global test environment tear-down +29: [==========] 1 test from 1 test case ran. (0 ms total) +29: [ PASSED ] 1 test. +28: [==========] Running 1 test from 1 test case. +28: [----------] Global test environment set-up. +28: [----------] 1 test from RemoveNestedBlock +28: [ RUN ] RemoveNestedBlock.basic +28: origin: +28: { +28: { +28: 1 +28: 1 +28: } +28: } +28: e: +28: { +28: 1 +28: 1 +28: } +28: [ OK ] RemoveNestedBlock.basic (0 ms) +28: [----------] 1 test from RemoveNestedBlock (0 ms total) +28: +28: [----------] Global test environment tear-down +28: [==========] 1 test from 1 test case ran. (0 ms total) +28: [ PASSED ] 1 test. +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.asin (29 ms) +11: [ RUN ] mkl_math.asinh +11: I0924 13:32:18.885082 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_asinh_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_asinh_fp32_out[i, j] = cinn_cpu_asinh_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.asinh (30 ms) +11: [ RUN ] mkl_math.atan +11: I0924 13:32:18.914476 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_atan_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_atan_fp32_out[i, j] = cinn_cpu_atan_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.atan (28 ms) +11: [ RUN ] mkl_math.atanh +11: I0924 13:32:18.943289 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_atanh_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_atanh_fp32_out[i, j] = cinn_cpu_atanh_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.atanh (29 ms) +11: [ RUN ] mkl_math.isnan +11: I0924 13:32:18.971868 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_isnan_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_isnan_fp32_out[i, j] = cinn_cpu_isnan_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.isnan (29 ms) +11: [ RUN ] mkl_math.tanh +11: I0924 13:32:19.000425 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_tanh_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_tanh_fp32_out[i, j] = cinn_cpu_tanh_fp32(x[i, j]) +11: } +11: } +11: } +27: [==========] Running 4 tests from 1 test case. +27: [----------] Global test environment set-up. +27: [----------] 4 tests from Function +27: [ RUN ] Function.test +27: WARNING: Logging before InitGoogleLogging() is written to STDERR +27: I0924 13:32:18.881062 27815 packed_func_test.cc:21] c 3 +27: [ OK ] Function.test (0 ms) +27: [ RUN ] Function.test1 +27: I0924 13:32:18.881088 27815 packed_func_test.cc:33] hello world +27: [ OK ] Function.test1 (0 ms) +27: [ RUN ] Function.Expr +27: [ OK ] Function.Expr (0 ms) +27: [ RUN ] Function.ReturnMultiValue +27: [ OK ] Function.ReturnMultiValue (0 ms) +27: [----------] 4 tests from Function (0 ms total) +27: +27: [----------] Global test environment tear-down +27: [==========] 4 tests from 1 test case ran. (0 ms total) +27: [ PASSED ] 4 tests. +26: [==========] Running 5 tests from 1 test case. +26: [----------] Global test environment set-up. +26: [----------] 5 tests from CreateCompGraph +26: [ RUN ] CreateCompGraph.single_layer +26: WARNING: Logging before InitGoogleLogging() is written to STDERR +26: I0924 13:32:18.881656 27814 lower_impl_test.cc:25] 5 +26: I0924 13:32:18.881683 27814 lower_impl_test.cc:27] e: (A[i, j] + B[i, j]) +26: I0924 13:32:18.881696 27814 lower_impl_test.cc:27] e: 100 +26: I0924 13:32:18.881700 27814 lower_impl_test.cc:27] e: 200 +26: I0924 13:32:18.881702 27814 lower_impl_test.cc:27] e: 100 +26: I0924 13:32:18.881705 27814 lower_impl_test.cc:27] e: 200 +26: I0924 13:32:18.882537 27814 lower_impl_test.cc:33] graph: +26: digraph G { +26: node_0[label="A"] +26: node_1[label="B"] +26: node_2[label="C"] +26: node_0->node_2 +26: node_1->node_2 +26: } // end G +26: [ OK ] CreateCompGraph.single_layer (1 ms) +26: [ RUN ] CreateCompGraph.multi_layers +26: I0924 13:32:18.883672 27814 lower_impl_test.cc:80] graph: +26: digraph G { +26: node_3[label="A"] +26: node_4[label="B"] +26: node_6[label="C"] +26: node_7[label="D"] +26: node_5[label="E"] +26: node_3->node_5 +26: node_3->node_6 +26: node_4->node_5 +26: node_4->node_7 +26: node_4->node_6 +26: node_6->node_5 +26: node_6->node_7 +26: node_7->node_5 +26: } // end G +26: [ OK ] CreateCompGraph.multi_layers (1 ms) +26: [ RUN ] CreateCompGraph.multi_layers_with_extra_deps +26: I0924 13:32:18.885052 27814 lower_impl_test.cc:150] graph: +26: digraph G { +26: node_8[label="A"] +26: node_9[label="B"] +26: node_11[label="C"] +26: node_12[label="D"] +26: node_13[label="E"] +26: node_10[label="F"] +26: node_8->node_13 +26: node_8->node_11 +26: node_9->node_12 +26: node_11->node_10 +26: node_11->node_13 +26: node_11->node_12 +26: node_12->node_10 +26: node_13->node_10 +26: } // end G +26: [ OK ] CreateCompGraph.multi_layers_with_extra_deps (2 ms) +26: [ RUN ] CreateCompGraph.inline_compatible +26: I0924 13:32:18.886207 27814 lower_impl_test.cc:216] graph: +26: digraph G { +26: node_14[label="A"] +26: node_15[label="B"] +26: node_17[label="C"] +26: node_16[label="E"] +26: node_14->node_16 +26: node_14->node_17 +26: node_15->node_16 +26: node_15->node_17 +26: node_17->node_16 +26: } // end G +26: [ OK ] CreateCompGraph.inline_compatible (1 ms) +26: [ RUN ] CreateCompGraph.inline_compatible1 +26: I0924 13:32:18.887346 27814 lower_impl_test.cc:275] graph: +26: digraph G { +26: node_18[label="A"] +26: node_19[label="B"] +26: node_21[label="D"] +26: node_20[label="E"] +26: node_18->node_20 +26: node_18->node_21 +26: node_19->node_20 +26: node_19->node_21 +26: node_21->node_20 +26: } // end G +26: [ OK ] CreateCompGraph.inline_compatible1 (1 ms) +26: [----------] 5 tests from CreateCompGraph (6 ms total) +26: +26: [----------] Global test environment tear-down +26: [==========] 5 tests from 1 test case ran. (6 ms total) +26: [ PASSED ] 5 tests. +25: [==========] Running 5 tests from 1 test case. +25: [----------] Global test environment set-up. +25: [----------] 5 tests from lower +25: [ RUN ] lower.basic +25: WARNING: Logging before InitGoogleLogging() is written to STDERR +25: I0924 13:32:18.889024 27813 lower_test.cc:27] lower_size function cal_B (_A, _B) +25: { +25: for (i, 100) +25: { +25: for (j, 15) +25: { +25: B[i, j] = (1 + A[i, j]) +25: } +25: } +25: } +25: +25: { +25: for (i, 100) +25: { +25: for (j, 15) +25: { +25: B[i, j] = (1 + A[i, j]) +25: } +25: } +25: } +25: [ OK ] lower.basic (7 ms) +25: [ RUN ] lower.more_complex +25: func: +25: function cal_C (_A, _B, _C) +25: { +25: for (i, 100) +25: { +25: for (j, 15) +25: { +25: for (k, 200) +25: { +25: C[i, j, k] = (A[i, j] * B[j, k]) +25: } +25: } +25: } +25: } +25: [ OK ] lower.more_complex (9 ms) +25: [ RUN ] lower.dynamic_shape +25: func: +25: function cal_C (_X, _W, _C) +25: { +25: for (i, int32(B)) +25: { +25: for (j, 15) +25: { +25: for (k, 200) +25: { +25: C[i, j, k] = (X[i, j] * W[j, k]) +25: } +25: } +25: } +25: } +25: [ OK ] lower.dynamic_shape (11 ms) +25: [ RUN ] lower.lowered_call +25: I0924 13:32:18.910259 27813 lower_test.cc:101] call_op: lowered_fun0(Tensor(X, [B,15]), Tensor(Y, [B,15]), Tensor(Z, [B,15]), Tensor(C, [B,15])) +25: [ OK ] lower.lowered_call (9 ms) +25: [ RUN ] lower.temp_buffer_collects +25: [ OK ] lower.temp_buffer_collects (9 ms) +25: [----------] 5 tests from lower (45 ms total) +25: +25: [----------] Global test environment tear-down +25: [==========] 5 tests from 1 test case ran. (45 ms total) +25: [ PASSED ] 5 tests. +24: [==========] Running 2 tests from 1 test case. +24: [----------] Global test environment set-up. +24: [----------] 2 tests from placeholder +24: [ RUN ] placeholder.basic +24: WARNING: Logging before InitGoogleLogging() is written to STDERR +24: I0924 13:32:18.880643 27812 placeholder_test.cc:20] slice x[i, j] +24: [ OK ] placeholder.basic (0 ms) +24: [ RUN ] placeholder.dynamic_shape +24: [ OK ] placeholder.dynamic_shape (0 ms) +24: [----------] 2 tests from placeholder (0 ms total) +24: +24: [----------] Global test environment tear-down +24: [==========] 2 tests from 1 test case ran. (0 ms total) +24: [ PASSED ] 2 tests. +32: [==========] Running 4 tests from 1 test case. +32: [----------] Global test environment set-up. +32: [----------] 4 tests from Vectorize +32: [ RUN ] Vectorize.replace_var +32: [ OK ] Vectorize.replace_var (11 ms) +32: [ RUN ] Vectorize.TestMarkVectorize +32: before optim +32: { +32: for (i, 100) +32: { +32: for (j, 500) +32: { +32: D[i, j] = (A[i, j] * B[i, j]) +32: } +32: } +32: for (i, 100) +32: { +32: for (j, 31) +32: { +32: C[Broadcast(i,16), Ramp((16 * j),1,16)] = (A[i, Ramp((16 * j),1,16)] * B[i, Ramp((16 * j),1,16)]) +32: } +32: } +32: } +32: out: +32: #include +32: #include +32: +32: void matmul(void* _args, int32_t num_args) +32: { +32: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +32: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +32: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +32: cinn_buffer_malloc((void*)(0), _C); +32: const float* A = ((const float*)(_A->memory)); +32: const float* B = ((const float*)(_B->memory)); +32: float* C = ((float*)(_C->memory)); +32: float* D = ((float*)(_C->memory)); +32: for (int32_t i = 0; i < 100; i += 1) { +32: for (int32_t j = 0; j < 500; j += 1) { +32: D[((500 * i) + j)] = (A[((500 * i) + j)] * B[((500 * i) + j)]); +32: }; +32: }; +32: for (int32_t i = 0; i < 100; i += 1) { +32: for (int32_t j = 0; j < 31; j += 1) { +32: C[StackVec<16,int32_t>::Ramp(((500 * i) + (16 * j)), 1, 16)] = (StackedVec::Load(A,((500 * i) + (16 * j))) * StackedVec::Load(B,((500 * i) + (16 * j)))); +32: }; +32: }; +32: cinn_buffer_free((void*)(0), _C); +32: } +32: +32: [ OK ] Vectorize.TestMarkVectorize (16 ms) +32: [ RUN ] Vectorize.vectorize +32: [ OK ] Vectorize.vectorize (0 ms) +32: [ RUN ] Vectorize.single_for +32: WARNING: Logging before InitGoogleLogging() is written to STDERR +32: I0924 13:32:18.913712 27820 vectorize_loops_test.cc:216] Forloop +32: for (k0, 1) +32: { +32: C[Ramp((16 * k0),1,16)] = (A[Ramp((16 * k0),1,16)] + B[Ramp((16 * k0),1,16)]) +32: } +32: [ OK ] Vectorize.single_for (1 ms) +32: [----------] 4 tests from Vectorize (28 ms total) +32: +32: [----------] Global test environment tear-down +32: [==========] 4 tests from 1 test case ran. (28 ms total) +32: [ PASSED ] 4 tests. +26/68 Test #31: test_replace_call_with_expr ............ Passed 0.01 sec +27/68 Test #30: test_ir_simplify ....................... Passed 0.04 sec +28/68 Test #29: test_ir_copy ........................... Passed 0.04 sec +29/68 Test #28: test_remove_nested_block ............... Passed 0.04 sec +30/68 Test #27: test_packed_func ....................... Passed 0.14 sec +31/68 Test #26: test_lower_impl ........................ Passed 0.14 sec +32/68 Test #25: test_lower ............................. Passed 0.14 sec +33/68 Test #24: test_placeholder ....................... Passed 0.14 sec +34/68 Test #32: test_vectorize_loops ................... Passed 0.14 sec +test 33 + Start 33: test_transform_polyfor_to_for + +33: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_transform_polyfor_to_for "--cinn_x86_builtin_code_root=/home/wangyue50/CINN-my/CINN/cinn/backends" +33: Test timeout computed to be: 600 +test 34 + Start 34: test_optimize + +34: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_optimize "" +34: Test timeout computed to be: 600 +test 35 + Start 35: test_cache_read_write_replace + +35: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/optim/test_cache_read_write_replace "" +35: Test timeout computed to be: 600 +test 36 + Start 36: test_hlir_framework_buffer + +36: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_buffer "" +36: Test timeout computed to be: 600 +test 37 + Start 37: test_hlir_framework_infershape_pass + +37: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_infershape_pass "" +37: Test timeout computed to be: 600 +test 38 + Start 38: test_hlir_framework_tensor + +38: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_tensor "" +38: Test timeout computed to be: 600 +test 39 + Start 39: test_hlir_framework_scope + +39: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_scope "" +39: Test timeout computed to be: 600 +test 40 + Start 40: test_hlir_framework_instruction + +40: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_instruction "" +40: Test timeout computed to be: 600 +test 41 + Start 41: test_hlir_framework_op + +41: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_op "" +41: Test timeout computed to be: 600 +39: [==========] Running 1 test from 1 test case. +39: [----------] Global test environment set-up. +39: [----------] 1 test from Scope +39: [ RUN ] Scope.basic +39: [ OK ] Scope.basic (0 ms) +39: [----------] 1 test from Scope (0 ms total) +39: +39: [----------] Global test environment tear-down +39: [==========] 1 test from 1 test case ran. (0 ms total) +39: [ PASSED ] 1 test. +38: [==========] Running 1 test from 1 test case. +38: [----------] Global test environment set-up. +38: [----------] 1 test from Tensor +38: [ RUN ] Tensor.basic +38: [ OK ] Tensor.basic (0 ms) +38: [----------] 1 test from Tensor (0 ms total) +38: +38: [----------] Global test environment tear-down +38: [==========] 1 test from 1 test case ran. (0 ms total) +38: [ PASSED ] 1 test. +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register relu +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register relu6 +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register conv2d +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register depthwise_conv2d +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register batchnorm +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register pool1d +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register pool2d +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register pool3d +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register sigmoid +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register softmax +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register slice +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register depthwise_conv2d +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register elementwise_add +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register elementwise_mul +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register scale +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register matmul +37: I0100 00:00:00.000000 27825 registry.h:89] RAW: Register mul +37: [==========] Running 1 test from 1 test case. +37: [----------] Global test environment set-up. +37: [----------] 1 test from Operator +37: [ RUN ] Operator.GetAttrs +37: WARNING: Logging before InitGoogleLogging() is written to STDERR +37: I0924 13:32:19.052171 27825 graph_compiler.cc:39] [Debug] C Code is: +37: #include +37: #include +37: +37: void fn_elementwise_add_0(void* _args, int32_t num_args) +37: { +37: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +37: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +37: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +37: cinn_buffer_malloc((void*)(0), _C); +37: const float* A = ((const float*)(_A->memory)); +37: const float* B = ((const float*)(_B->memory)); +37: float* C = ((float*)(_C->memory)); +37: for (int32_t i = 0; i < 100; i += 1) { +37: for (int32_t j = 0; j < 32; j += 1) { +37: C[((32 * i) + j)] = (A[((32 * i) + j)] + B[((32 * i) + j)]); +37: }; +37: }; +37: cinn_buffer_free((void*)(0), _C); +37: } +37: +37: void fn_elementwise_add_1(void* _args, int32_t num_args) +37: { +37: const cinn_buffer_t* _var = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +37: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +37: cinn_buffer_t* _C_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +37: cinn_buffer_malloc((void*)(0), _C_0); +37: const float* B = ((const float*)(_B->memory)); +37: float* C_0 = ((float*)(_C_0->memory)); +37: const float* var = ((const float*)(_var->memory)); +37: for (int32_t i = 0; i < 100; i += 1) { +37: for (int32_t j = 0; j < 32; j += 1) { +37: C_0[((32 * i) + j)] = (var[((32 * i) + j)] + B[((32 * i) + j)]); +37: }; +37: }; +37: cinn_buffer_free((void*)(0), _C_0); +37: } +37: +37: void fn_elementwise_add_2(void* _args, int32_t num_args) +37: { +37: const cinn_buffer_t* _var = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +37: const cinn_buffer_t* _var_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +37: cinn_buffer_t* _C_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +37: cinn_buffer_malloc((void*)(0), _C_1); +37: float* C_1 = ((float*)(_C_1->memory)); +37: const float* var = ((const float*)(_var->memory)); +37: const float* var_0 = ((const float*)(_var_0->memory)); +37: for (int32_t i = 0; i < 100; i += 1) { +37: for (int32_t j = 0; j < 32; j += 1) { +37: C_1[((32 * i) + j)] = (var[((32 * i) + j)] + var_0[((32 * i) + j)]); +37: }; +37: }; +37: cinn_buffer_free((void*)(0), _C_1); +37: } +37: +37: I0924 13:32:19.080960 27825 infershape_pass_test.cc:68] data: 1.5662 + 1.20472 = 2.77092 +37: I0924 13:32:19.080993 27825 infershape_pass_test.cc:68] data: 1.59688 + 1.46827 = 3.06515 +37: I0924 13:32:19.080997 27825 infershape_pass_test.cc:68] data: 1.82329 + 1.40903 = 3.23232 +37: [ OK ] Operator.GetAttrs (53 ms) +37: [----------] 1 test from Operator (53 ms total) +37: +37: [----------] Global test environment tear-down +37: [==========] 1 test from 1 test case ran. (53 ms total) +37: [ PASSED ] 1 test. +36: [==========] Running 1 test from 1 test case. +36: [----------] Global test environment set-up. +36: [----------] 1 test from Buffer +36: [ RUN ] Buffer.basic +36: [ OK ] Buffer.basic (0 ms) +36: [----------] 1 test from Buffer (0 ms total) +36: +36: [----------] Global test environment tear-down +36: [==========] 1 test from 1 test case ran. (0 ms total) +36: [ PASSED ] 1 test. +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.tanh (29 ms) +11: [ RUN ] mkl_math.isfinite +11: I0924 13:32:19.029877 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_isfinite_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_isfinite_fp32_out[i, j] = cinn_cpu_isfinite_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.isfinite (30 ms) +11: [ RUN ] mkl_math.isinf +11: I0924 13:32:19.059433 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _cinn_cpu_isinf_fp32_out) +11: { +11: for (i, 10) +11: { +11: for (j, 10) +11: { +11: cinn_cpu_isinf_fp32_out[i, j] = cinn_cpu_isinf_fp32(x[i, j]) +11: } +11: } +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.isinf (28 ms) +11: [ RUN ] mkl_math.tanh_v_fp32 +11: E0924 13:32:19.084836 27799 lower_impl.cc:283] tensor [cinn_mkl_tanh_v_fp32_out] buffer is null +11: I0924 13:32:19.085230 27799 mkl_math_test.cc:59] func: +11: function fn (_x, _tuple_cinn_mkl_tanh_v_fp32_out0__0) +11: { +11: cinn_mkl_tanh_v_fp32(Tensor(x, [10,10]), Tensor(tuple_cinn_mkl_tanh_v_fp32_out0__0, [10,10])) +11: } +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 400 +11: [ OK ] mkl_math.tanh_v_fp32 (58 ms) +11: [----------] 26 tests from mkl_math (783 ms total) +11: +11: [----------] 1 test from cinn_cpu_mkl_gemm_fp32 +11: [ RUN ] cinn_cpu_mkl_gemm_fp32.test +11: E0924 13:32:19.142848 27799 lower_impl.cc:283] tensor [extern_call] buffer is null +11: I0924 13:32:19.143520 27799 mkl_math_test.cc:166] func: +11: function fn (_A, _B, _tuple_cinn_cpu_mkl_gemm_fp32_out0__0) +11: { +11: cinn_cpu_mkl_gemm_fp32(1, 30, 20, 40, 0, 0, 30, 40, 30, 0, Tensor(A, [30,40]), Tensor(B, [40,20]), Tensor(tuple_cinn_cpu_mkl_gemm_fp32_out0__0, [30,20])) +11: } +35: [==========] Running 2 tests from 1 test case. +35: [----------] Global test environment set-up. +35: [----------] 2 tests from CacheReadWriteReplace +35: [ RUN ] CacheReadWriteReplace.basic +35: WARNING: Logging before InitGoogleLogging() is written to STDERR +35: I0924 13:32:19.045128 27823 cache_read_write_replace_test.cc:30] fn: +35: function fn (_A, _B, _C) +35: { +35: for (i, 100) +35: { +35: for (j, 20) +35: { +35: A_read_cache[i, j] = A[i, j] +35: } +35: } +35: __syncthreads() +35: for (i, 100) +35: { +35: for (j, 20) +35: { +35: C[i, j] = (A_read_cache[i, j] + B[i, j]) +35: } +35: } +35: for (i, 100) +35: { +35: for (j, 20) +35: { +35: C_cache_write_out[i, j] = C[i, j] +35: } +35: } +35: } +35: [ OK ] CacheReadWriteReplace.basic (18 ms) +35: [ RUN ] CacheReadWriteReplace.cache_write +35: I0924 13:32:19.056802 27823 cache_read_write_replace_test.cc:87] +35: function fn (_A, _B, _C1_cache_write_out) +35: { +35: for (i, 100) +35: { +35: for (j, 100) +35: { +35: C1[i, j] = (3 + A[i, j]) +35: } +35: } +35: for (i, 100) +35: { +35: for (j, 100) +35: { +35: C1_cache_write_out[i, j] = C1[i, j] +35: } +35: } +35: } +35: [ OK ] CacheReadWriteReplace.cache_write (11 ms) +35: [----------] 2 tests from CacheReadWriteReplace (29 ms total) +35: +35: [----------] Global test environment tear-down +35: [==========] 2 tests from 1 test case ran. (29 ms total) +35: [ PASSED ] 2 tests. +34: [==========] Running 1 test from 1 test case. +34: [----------] Global test environment set-up. +34: [----------] 1 test from Optimize +34: [ RUN ] Optimize.Unroll +34: [ OK ] Optimize.Unroll (16 ms) +34: [----------] 1 test from Optimize (16 ms total) +34: +34: [----------] Global test environment tear-down +34: [==========] 1 test from 1 test case ran. (16 ms total) +34: [ PASSED ] 1 test. +33: [==========] Running 1 test from 1 test case. +33: [----------] Global test environment set-up. +33: [----------] 1 test from Expr +33: [ RUN ] Expr.basic +33: out: +33: #include +33: #include +33: +33: void matmul(void* _args, int32_t num_args) +33: { +33: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +33: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +33: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +33: cinn_buffer_malloc((void*)(0), _C); +33: const float* A = ((const float*)(_A->memory)); +33: const float* B = ((const float*)(_B->memory)); +33: float* C = ((float*)(_C->memory)); +33: for (int32_t i_outer = 0; i_outer < 64; i_outer += 1) { +33: for (int32_t i_inner = 0; i_inner < 8; i_inner += 1) { +33: for (int32_t j_outer = 0; j_outer < 63; j_outer += 1) { +33: for (int32_t j_inner = 0; j_inner < (1 + ((int32_t)(cinn_min(7, (499 + (-8 * j_outer)))))); j_inner += 1) { +33: for (int32_t k0 = 0; k0 < 200; k0 += 1) { +33: C[((500 * i_inner) + ((4000 * i_outer) + ((8 * j_outer) + j_inner)))] = (C[((500 * i_inner) + ((4000 * i_outer) + ((8 * j_outer) + j_inner)))] + (A[((200 * i_inner) + ((1600 * i_outer) + k0))] * B[((8 * j_outer) + ((500 * k0) + j_inner))])); +33: }; +33: }; +33: }; +33: }; +33: }; +33: cinn_buffer_free((void*)(0), _C); +33: } +33: +33: out: +33: #include +33: #include +33: +33: void matmul(void* _args, int32_t num_args) +33: { +33: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +33: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +33: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +33: cinn_buffer_malloc((void*)(0), _C); +33: const float* A = ((const float*)(_A->memory)); +33: const float* B = ((const float*)(_B->memory)); +33: float* C = ((float*)(_C->memory)); +33: for (int32_t i_outer = 0; i_outer < 64; i_outer += 1) { +33: for (int32_t i_inner = 0; i_inner < 8; i_inner += 1) { +33: for (int32_t j_outer = 0; j_outer < 63; j_outer += 1) { +33: for (int32_t j_inner = 0; j_inner < (1 + ((int32_t)(cinn_min(7, (499 + (-8 * j_outer)))))); j_inner += 1) { +33: for (int32_t k0 = 0; k0 < 200; k0 += 1) { +33: C[((500 * i_inner) + ((4000 * i_outer) + ((8 * j_outer) + j_inner)))] = (C[((500 * i_inner) + ((4000 * i_outer) + ((8 * j_outer) + j_inner)))] + (A[((200 * i_inner) + ((1600 * i_outer) + k0))] * B[((8 * j_outer) + ((500 * k0) + j_inner))])); +33: }; +33: }; +33: }; +33: }; +33: }; +33: cinn_buffer_free((void*)(0), _C); +33: } +33: +33: [ OK ] Expr.basic (43 ms) +33: [----------] 1 test from Expr (43 ms total) +33: +33: [----------] Global test environment tear-down +33: [==========] 1 test from 1 test case ran. (43 ms total) +33: [ PASSED ] 1 test. +40: [==========] Running 1 test from 1 test case. +40: [----------] Global test environment set-up. +40: [----------] 1 test from Instruction +40: [ RUN ] Instruction.basic +40: WARNING: Logging before InitGoogleLogging() is written to STDERR +40: I0924 13:32:19.092864 27828 instruction_test.cc:74] data: 0.783099 + 0.903366 = 1.68647 +40: I0924 13:32:19.092911 27828 instruction_test.cc:74] data: 0.79844 + 0.983596 = 1.78204 +40: I0924 13:32:19.092916 27828 instruction_test.cc:74] data: 0.911647 + 0.66688 = 1.57853 +40: [ OK ] Instruction.basic (64 ms) +40: [----------] 1 test from Instruction (64 ms total) +40: +40: [----------] Global test environment tear-down +40: [==========] 1 test from 1 test case ran. (64 ms total) +40: [ PASSED ] 1 test. +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register relu +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register relu6 +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register conv2d +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register depthwise_conv2d +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register batchnorm +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register pool1d +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register pool2d +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register pool3d +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register sigmoid +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register softmax +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register slice +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register depthwise_conv2d +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register elementwise_add +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register elementwise_mul +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register scale +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register matmul +41: I0100 00:00:00.000000 27829 registry.h:89] RAW: Register mul +41: [==========] Running 1 test from 1 test case. +41: [----------] Global test environment set-up. +41: [----------] 1 test from Operator +41: [ RUN ] Operator.GetAttrs +41: WARNING: Logging before InitGoogleLogging() is written to STDERR +41: I0924 13:32:19.037526 27829 op_test.cc:46] Test Strategy Codegen: +41: function add1 (_A, _B, _C) +41: { +41: for (i, 100) +41: { +41: for (j, 32) +41: { +41: C[i, j] = (A[i, j] + B[i, j]) +41: } +41: } +41: } +41: [ OK ] Operator.GetAttrs (7 ms) +41: [----------] 1 test from Operator (7 ms total) +41: +41: [----------] Global test environment tear-down +41: [==========] 1 test from 1 test case ran. (7 ms total) +41: [ PASSED ] 1 test. +35/68 Test #39: test_hlir_framework_scope .............. Passed 0.01 sec +36/68 Test #38: test_hlir_framework_tensor ............. Passed 0.02 sec +37/68 Test #37: test_hlir_framework_infershape_pass .... Passed 0.07 sec +38/68 Test #36: test_hlir_framework_buffer ............. Passed 0.07 sec +39/68 Test #35: test_cache_read_write_replace .......... Passed 0.17 sec +40/68 Test #34: test_optimize .......................... Passed 0.17 sec +41/68 Test #33: test_transform_polyfor_to_for .......... Passed 0.17 sec +42/68 Test #40: test_hlir_framework_instruction ........ Passed 0.17 sec +43/68 Test #41: test_hlir_framework_op ................. Passed 0.17 sec +test 42 + Start 42: test_hlir_framework_print_graph_pass + +42: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/framework/test_hlir_framework_print_graph_pass "" +42: Test timeout computed to be: 600 +test 43 + Start 43: test_pe_elementwise + +43: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/pe/test_pe_elementwise "" +43: Test timeout computed to be: 600 +test 44 + Start 44: test_pe_broadcast + +44: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/pe/test_pe_broadcast "" +44: Test timeout computed to be: 600 +test 45 + Start 45: test_pe_transform + +45: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/pe/test_pe_transform "" +45: Test timeout computed to be: 600 +test 46 + Start 46: test_op_broadcast + +46: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/op/test_op_broadcast "" +46: Test timeout computed to be: 600 +test 47 + Start 47: test_op_nn + +47: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/hlir/op/test_op_nn "" +47: Test timeout computed to be: 600 +test 48 + Start 48: test_frontend_syntax + +48: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/frontend/test_frontend_syntax "--model_dir=/home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model" +48: Test timeout computed to be: 600 +test 49 + Start 49: test_frontend_executor + +49: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/frontend/test_frontend_executor "--model_dir=/home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model" +49: Test timeout computed to be: 600 +test 50 + Start 50: test_model_parser + +50: Test command: /home/wangyue50/CINN-my/CINN/build/cinn/frontend/paddle/test_model_parser "--model_dir=/home/wangyue50/CINN-my/CINN/build/thirds/model/lite_naive_model" +50: Test timeout computed to be: 600 +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register relu +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register relu6 +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register conv2d +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register depthwise_conv2d +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register batchnorm +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register pool1d +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register pool2d +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register pool3d +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register sigmoid +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register softmax +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register slice +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register depthwise_conv2d +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register elementwise_add +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register elementwise_mul +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register scale +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register matmul +48: I0100 00:00:00.000000 27836 registry.h:89] RAW: Register mul +48: [==========] Running 5 tests from 2 test cases. +48: [----------] Global test environment set-up. +48: [----------] 4 tests from syntax +48: [ RUN ] syntax.basic +48: WARNING: Logging before InitGoogleLogging() is written to STDERR +48: I0924 13:32:19.206811 27836 syntax_test.cc:43] instruction: var_1 = elementwise_add(placeholder, placeholder_0) +48: I0924 13:32:19.206851 27836 syntax_test.cc:43] instruction: var_2 = elementwise_add(placeholder, var_1) +48: [ OK ] syntax.basic (0 ms) +48: [ RUN ] syntax.program_execute_multi_elementwise_add +48: I0924 13:32:19.206964 27836 syntax_test.cc:57] graph: +48: digraph G { +48: node_3[label="elementwise_add_0"] +48: node_5[label="elementwise_add_1"] +48: node_0[label="placeholder_1"] +48: node_1[label="placeholder_2"] +48: node_2[label="var_5"] +48: node_4[label="var_6"] +48: node_0->node_5 +48: node_0->node_3 +48: node_1->node_3 +48: node_2->node_5 +48: node_3->node_2 +48: node_5->node_4 +48: } // end G +48: I0924 13:32:19.223773 27836 graph_compiler.cc:39] [Debug] C Code is: +48: #include +48: #include +48: +48: void fn_elementwise_add_0(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _placeholder_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _placeholder_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _C); +48: float* C = ((float*)(_C->memory)); +48: const float* placeholder_1 = ((const float*)(_placeholder_1->memory)); +48: const float* placeholder_2 = ((const float*)(_placeholder_2->memory)); +48: for (int32_t i = 0; i < 32; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: C[((24 * i) + j)] = (placeholder_1[((24 * i) + j)] + placeholder_2[((24 * i) + j)]); +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _C); +48: } +48: +48: void fn_elementwise_add_1(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _placeholder_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _var_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _C_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _C_0); +48: float* C_0 = ((float*)(_C_0->memory)); +48: const float* placeholder_1 = ((const float*)(_placeholder_1->memory)); +48: const float* var_5 = ((const float*)(_var_5->memory)); +48: for (int32_t i = 0; i < 32; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: C_0[((24 * i) + j)] = (placeholder_1[((24 * i) + j)] + var_5[((24 * i) + j)]); +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _C_0); +48: } +48: +48: [ OK ] syntax.program_execute_multi_elementwise_add (44 ms) +48: [ RUN ] syntax.program_execute_multi_elementwise_add2 +48: I0924 13:32:19.250627 27836 syntax_test.cc:80] graph: +48: digraph G { +48: node_9[label="elementwise_add_0"] +48: node_11[label="elementwise_add_1"] +48: node_6[label="placeholder_3"] +48: node_7[label="placeholder_4"] +48: node_10[label="var_10"] +48: node_8[label="var_9"] +48: node_6->node_11 +48: node_6->node_9 +48: node_7->node_9 +48: node_8->node_11 +48: node_9->node_8 +48: node_11->node_10 +48: } // end G +48: I0924 13:32:19.265573 27836 graph_compiler.cc:39] [Debug] C Code is: +48: #include +48: #include +48: +48: void fn_elementwise_add_0(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _placeholder_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _placeholder_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _C_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _C_1); +48: float* C_1 = ((float*)(_C_1->memory)); +48: const float* placeholder_3 = ((const float*)(_placeholder_3->memory)); +48: const float* placeholder_4 = ((const float*)(_placeholder_4->memory)); +48: for (int32_t i = 0; i < 32; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: C_1[((24 * i) + j)] = (placeholder_3[((24 * i) + j)] + placeholder_4[((24 * i) + j)]); +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _C_1); +48: } +48: +48: void fn_elementwise_add_1(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _placeholder_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _var_9 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _C_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _C_2); +48: float* C_2 = ((float*)(_C_2->memory)); +48: const float* placeholder_3 = ((const float*)(_placeholder_3->memory)); +48: const float* var_9 = ((const float*)(_var_9->memory)); +48: for (int32_t i = 0; i < 32; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: C_2[((24 * i) + j)] = (placeholder_3[((24 * i) + j)] + var_9[((24 * i) + j)]); +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _C_2); +48: } +48: +48: [ OK ] syntax.program_execute_multi_elementwise_add2 (40 ms) +48: [ RUN ] syntax.program_execute_fc +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register relu +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register relu6 +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register conv2d +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register depthwise_conv2d +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register batchnorm +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register pool1d +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register pool2d +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register pool3d +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register sigmoid +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register softmax +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register slice +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register depthwise_conv2d +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register elementwise_add +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register elementwise_mul +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register scale +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register matmul +47: I0100 00:00:00.000000 27835 registry.h:89] RAW: Register mul +47: [==========] Running 5 tests from 1 test case. +47: [----------] Global test environment set-up. +47: [----------] 5 tests from Operator +47: [ RUN ] Operator.Operator_Pool2d_Test0 +47: WARNING: Logging before InitGoogleLogging() is written to STDERR +47: I0924 13:32:19.206691 27835 nn.cc:477] kernel_size length is: 2 +47: I0924 13:32:19.206710 27835 nn.cc:478] kernel_size is: 2 +47: I0924 13:32:19.206713 27835 nn.cc:479] padding_size length is: 4 +47: I0924 13:32:19.206717 27835 nn.cc:480] padding_size is: 1 +47: I0924 13:32:19.254041 27835 op_nn_test.cc:53] Test Strategy Codegen: +47: function pool2d (_A, _pad_temp_0, _T_Pool2d_out_0) +47: { +47: for (j, 3) +47: { +47: for (k, 5) +47: { +47: for (a, 5) +47: { +47: T_Pool2d_out_0_init[0, j, k, a] = 0 +47: } +47: } +47: } +47: for (j, 3) +47: { +47: for (k, 10) +47: { +47: for (a, 10) +47: { +47: pad_temp_0[0, j, k, a] = select(((a < 9) and ((a >= 1) and ((k < 9) and (k >= 1)))), A[0, j, (-1 + k), (-1 + a)], -3.40282e+38) +47: } +47: } +47: } +47: for (j, 3) +47: { +47: for (k, 5) +47: { +47: for (a, 5) +47: { +47: for (kernel_idx, 2) +47: { +47: for (kernel_idx_0, 2) +47: { +47: T_Pool2d_out_0[0, j, k, a] = cinn_max(T_Pool2d_out_0[0, j, k, a], pad_temp_0[0, j, ((2 * k) + kernel_idx), ((2 * a) + kernel_idx_0)]) +47: } +47: } +47: } +47: } +47: } +47: } +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 768 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1200 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 300 +47: [ OK ] Operator.Operator_Pool2d_Test0 (79 ms) +47: [ RUN ] Operator.Operator_Pool2d_Test1 +47: I0924 13:32:19.285432 27835 nn.cc:477] kernel_size length is: 2 +47: I0924 13:32:19.285440 27835 nn.cc:478] kernel_size is: 2 +47: I0924 13:32:19.285442 27835 nn.cc:479] padding_size length is: 4 +47: I0924 13:32:19.285444 27835 nn.cc:480] padding_size is: 1 +47: I0924 13:32:19.332312 27835 op_nn_test.cc:109] Test Strategy Codegen: +47: function pool2d (_A, _pad_temp_1, _T_Pool2d_out_1_0) +47: { +47: for (j, 3) +47: { +47: for (k, 5) +47: { +47: for (a, 5) +47: { +47: T_Pool2d_out_1_0_init[0, j, k, a] = 0 +47: } +47: } +47: } +47: for (j, 3) +47: { +47: for (k, 11) +47: { +47: for (a, 11) +47: { +47: pad_temp_1[0, j, k, a] = select(((a < 9) and ((a >= 1) and ((k < 9) and (k >= 1)))), A[0, j, (-1 + k), (-1 + a)], 0) +47: } +47: } +47: } +47: for (j, 3) +47: { +47: for (k, 5) +47: { +47: for (a, 5) +47: { +47: for (kernel_idx_1, 2) +47: { +47: for (kernel_idx_2, 2) +47: { +47: T_Pool2d_out_1_0[0, j, k, a] = (T_Pool2d_out_1_0[0, j, k, a] + (pad_temp_1[0, j, ((2 * k) + kernel_idx_1), ((2 * a) + kernel_idx_2)] * (1 / float32(((1 * 2) * 2))))) +47: } +47: } +47: } +47: } +47: } +47: } +47: I0924 13:32:19.340415 27835 codegen_llvm.cc:344] instr: i32 4 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 768 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1452 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 300 +47: [ OK ] Operator.Operator_Pool2d_Test1 (76 ms) +47: [ RUN ] Operator.Operator_Pool2d_Test2 +47: I0924 13:32:19.361708 27835 nn.cc:477] kernel_size length is: 2 +47: I0924 13:32:19.361716 27835 nn.cc:478] kernel_size is: 2 +47: I0924 13:32:19.361719 27835 nn.cc:479] padding_size length is: 4 +47: I0924 13:32:19.361722 27835 nn.cc:480] padding_size is: 1 +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register relu +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register relu6 +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register conv2d +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register depthwise_conv2d +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register batchnorm +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register pool1d +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register pool2d +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register pool3d +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register sigmoid +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register softmax +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register slice +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register depthwise_conv2d +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register elementwise_add +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register elementwise_mul +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register scale +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register matmul +46: I0100 00:00:00.000000 27834 registry.h:89] RAW: Register mul +46: [==========] Running 2 tests from 1 test case. +46: [----------] Global test environment set-up. +46: [----------] 2 tests from Operator +46: [ RUN ] Operator.Operator_ElementWise_Add_Test0 +46: WARNING: Logging before InitGoogleLogging() is written to STDERR +46: I0924 13:32:19.210886 27834 op_broadcast_test.cc:44] Test Strategy Codegen: +46: function add1 (_A, _B, _C) +46: { +46: for (i, 100) +46: { +46: for (j, 32) +46: { +46: C[i, j] = (A[i, j] + B[i, j]) +46: } +46: } +46: } +46: [ OK ] Operator.Operator_ElementWise_Add_Test0 (7 ms) +46: [ RUN ] Operator.Operator_ElementWise_Add_Test1 +46: I0924 13:32:19.217064 27834 op_broadcast_test.cc:76] Test Strategy Codegen: +46: function add1 (_A, _B, _C_0) +46: { +46: for (i, 100) +46: { +46: for (j, 32) +46: { +46: C_0[i, j] = (A[i, j] + B[j]) +46: } +46: } +46: } +46: function add1 (_A, _B, _C_0) +46: { +46: for (i, 100) +46: { +46: for (j, 32) +46: { +46: C_0[i, j] = (A[i, j] + B[j]) +46: } +46: } +46: }[ OK ] Operator.Operator_ElementWise_Add_Test1 (7 ms) +46: [----------] 2 tests from Operator (14 ms total) +46: +46: [----------] Global test environment tear-down +46: [==========] 2 tests from 1 test case ran. (14 ms total) +46: [ PASSED ] 2 tests. +45: [==========] Running 1 test from 1 test case. +45: [----------] Global test environment set-up. +45: [----------] 1 test from MatmulPE +45: [ RUN ] MatmulPE.PE_Matmul_Test0 +45: WARNING: Logging before InitGoogleLogging() is written to STDERR +45: I0924 13:32:19.210407 27833 pe_transform_test.cc:32] func: +45: function fn (_A, _B, _C) +45: { +45: for (i, 100) +45: { +45: for (j, 32) +45: { +45: for (kk, 16) +45: { +45: C[i, j] = (C[i, j] + (A[i, kk] * B[kk, j])) +45: } +45: } +45: } +45: } +45: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 6400 +45: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 2048 +45: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +45: [ OK ] MatmulPE.PE_Matmul_Test0 (38 ms) +45: [----------] 1 test from MatmulPE (38 ms total) +45: +45: [----------] Global test environment tear-down +45: [==========] 1 test from 1 test case ran. (38 ms total) +45: [ PASSED ] 1 test. +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4800 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 3200 +11: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 2400 +11: [ OK ] cinn_cpu_mkl_gemm_fp32.test (49 ms) +11: [----------] 1 test from cinn_cpu_mkl_gemm_fp32 (49 ms total) +11: +11: [----------] Global test environment tear-down +11: [==========] 27 tests from 2 test cases ran. (832 ms total) +11: [ PASSED ] 27 tests. +44: [==========] Running 6 tests from 3 test cases. +44: [----------] Global test environment set-up. +44: [----------] 2 tests from broadcast_pe +44: [ RUN ] broadcast_pe.Add +44: WARNING: Logging before InitGoogleLogging() is written to STDERR +44: I0924 13:32:19.207803 27832 pe_broadcast_test.cc:33] func: +44: function fn (_A, _B, _C) +44: { +44: for (i, 100) +44: { +44: for (j, 32) +44: { +44: C[i, j] = (A[i, j] + B[i, j]) +44: } +44: } +44: } +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +44: [ OK ] broadcast_pe.Add (34 ms) +44: [ RUN ] broadcast_pe.Multiply +44: I0924 13:32:19.240713 27832 pe_broadcast_test.cc:33] func: +44: function fn (_A, _B, _C) +44: { +44: for (i, 100) +44: { +44: for (j, 32) +44: { +44: C[i, j] = (A[i, j] * B[i, j]) +44: } +44: } +44: } +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +44: [ OK ] broadcast_pe.Multiply (30 ms) +44: [----------] 2 tests from broadcast_pe (64 ms total) +44: +44: [----------] 2 tests from broadcast_pe1 +44: [ RUN ] broadcast_pe1.Add +44: I0924 13:32:19.274186 27832 pe_broadcast_test.cc:80] func: +44: function fn (_A, _B, _C) +44: { +44: for (i, 100) +44: { +44: for (j, 32) +44: { +44: for (k, 10) +44: { +44: C[i, j, k] = (A[i, j, k] + B[j]) +44: } +44: } +44: } +44: } +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: [ OK ] broadcast_pe1.Add (35 ms) +44: [ RUN ] broadcast_pe1.Multiply +44: I0924 13:32:19.309664 27832 pe_broadcast_test.cc:80] func: +44: function fn (_A, _B, _C) +44: { +44: for (i, 100) +44: { +44: for (j, 32) +44: { +44: for (k, 10) +44: { +44: C[i, j, k] = (A[i, j, k] * B[j]) +44: } +44: } +44: } +44: } +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: [ OK ] broadcast_pe1.Multiply (36 ms) +44: [----------] 2 tests from broadcast_pe1 (71 ms total) +44: +44: [----------] 2 tests from broadcast_pe2 +44: [ RUN ] broadcast_pe2.Add +44: I0924 13:32:19.348361 27832 pe_broadcast_test.cc:127] func: +44: function fn (_A, _B, _C) +44: { +44: for (i, 100) +44: { +44: for (j, 32) +44: { +44: for (k, 10) +44: { +44: C[i, j, k, 0] = (A[i, j, k, 0] + B[j, k]) +44: } +44: } +44: } +44: } +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1280 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: [ OK ] broadcast_pe2.Add (40 ms) +44: [ RUN ] broadcast_pe2.Multiply +44: I0924 13:32:19.388643 27832 pe_broadcast_test.cc:127] func: +44: function fn (_A, _B, _C) +44: { +44: for (i, 100) +44: { +44: for (j, 32) +44: { +44: for (k, 10) +44: { +44: C[i, j, k, 0] = (A[i, j, k, 0] * B[j, k]) +44: } +44: } +44: } +44: } +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1280 +44: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128000 +44: [ OK ] broadcast_pe2.Multiply (40 ms) +44: [----------] 2 tests from broadcast_pe2 (80 ms total) +44: +44: [----------] Global test environment tear-down +44: [==========] 6 tests from 3 test cases ran. (215 ms total) +44: [ PASSED ] 6 tests. +43: [==========] Running 25 tests from 1 test case. +43: [----------] Global test environment set-up. +43: [----------] 25 tests from elementwise_pe +43: [ RUN ] elementwise_pe.exp +43: WARNING: Logging before InitGoogleLogging() is written to STDERR +43: I0924 13:32:19.206897 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_exp_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_exp_fp32_out[i, j] = cinn_cpu_exp_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.exp (33 ms) +43: [ RUN ] elementwise_pe.erf +43: I0924 13:32:19.239110 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_erf_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_erf_fp32_out[i, j] = cinn_cpu_erf_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.erf (29 ms) +43: [ RUN ] elementwise_pe.sqrt +43: I0924 13:32:19.268132 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_sqrt_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_sqrt_fp32_out[i, j] = cinn_cpu_sqrt_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.sqrt (29 ms) +43: [ RUN ] elementwise_pe.log +43: I0924 13:32:19.296869 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_log_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_log_fp32_out[i, j] = cinn_cpu_log_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.log (29 ms) +43: [ RUN ] elementwise_pe.log2 +43: I0924 13:32:19.325762 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_log2_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_log2_fp32_out[i, j] = cinn_cpu_log2_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.log2 (29 ms) +43: [ RUN ] elementwise_pe.log10 +43: I0924 13:32:19.354607 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_log10_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_log10_fp32_out[i, j] = cinn_cpu_log10_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.log10 (29 ms) +43: [ RUN ] elementwise_pe.floor +43: I0924 13:32:19.383556 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_floor_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_floor_fp32_out[i, j] = cinn_cpu_floor_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.floor (29 ms) +43: [ RUN ] elementwise_pe.ceil +43: I0924 13:32:19.412417 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_ceil_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_ceil_fp32_out[i, j] = cinn_cpu_ceil_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.ceil (29 ms) +43: [ RUN ] elementwise_pe.round +43: I0924 13:32:19.441426 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_round_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_round_fp32_out[i, j] = cinn_cpu_round_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.round (28 ms) +43: [ RUN ] elementwise_pe.trunc +43: I0924 13:32:19.470185 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_trunc_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_trunc_fp32_out[i, j] = cinn_cpu_trunc_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.trunc (29 ms) +43: [ RUN ] elementwise_pe.cos +43: I0924 13:32:19.498968 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_cos_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_cos_fp32_out[i, j] = cinn_cpu_cos_fp32(A[i, j]) +43: } +43: } +43: } +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register relu +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register relu6 +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register conv2d +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register depthwise_conv2d +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register batchnorm +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register pool1d +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register pool2d +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register pool3d +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register sigmoid +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register softmax +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register slice +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register depthwise_conv2d +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register elementwise_add +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register elementwise_mul +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register scale +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register matmul +42: I0100 00:00:00.000000 27830 registry.h:89] RAW: Register mul +42: [==========] Running 1 test from 1 test case. +42: [----------] Global test environment set-up. +42: [----------] 1 test from Operator +42: [ RUN ] Operator.GetAttrs +42: WARNING: Logging before InitGoogleLogging() is written to STDERR +42: I0924 13:32:19.200210 27830 print_graph_pass_test.cc:53] 0:elementwise_add(elementwise_add_0) +42: 1:elementwise_add(elementwise_add_1) +42: 2:elementwise_add(elementwise_add_2) +42: [ OK ] Operator.GetAttrs (1 ms) +42: [----------] 1 test from Operator (1 ms total) +42: +42: [----------] Global test environment tear-down +42: [==========] 1 test from 1 test case ran. (1 ms total) +42: [ PASSED ] 1 test. +50: [==========] Running 1 test from 1 test case. +50: [----------] Global test environment set-up. +50: [----------] 1 test from LoadModelPb +50: [ RUN ] LoadModelPb.naive_model +50: WARNING: Logging before InitGoogleLogging() is written to STDERR +50: I0924 13:32:19.206583 27838 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/model/lite_naive_model +50: I0924 13:32:19.206604 27838 model_parser.cc:189] model_file is: __model__ +50: I0924 13:32:19.206606 27838 model_parser.cc:190] param_file is: +50: I0924 13:32:19.207155 27838 model_parser_test.cc:21] feed +50: I0924 13:32:19.207161 27838 model_parser_test.cc:21] mul +50: I0924 13:32:19.207163 27838 model_parser_test.cc:21] scale +50: I0924 13:32:19.207165 27838 model_parser_test.cc:21] fetch +50: [ OK ] LoadModelPb.naive_model (1 ms) +50: [----------] 1 test from LoadModelPb (1 ms total) +50: +50: [----------] Global test environment tear-down +50: [==========] 1 test from 1 test case ran. (1 ms total) +50: [ PASSED ] 1 test. +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register relu +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register relu6 +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register conv2d +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register depthwise_conv2d +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register batchnorm +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register pool1d +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register pool2d +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register pool3d +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register sigmoid +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register softmax +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register slice +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register depthwise_conv2d +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register elementwise_add +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register elementwise_mul +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register scale +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register matmul +49: I0100 00:00:00.000000 27837 registry.h:89] RAW: Register mul +49: [==========] Running 1 test from 1 test case. +49: [----------] Global test environment set-up. +49: [----------] 1 test from Executor +49: [ RUN ] Executor.basic +49: WARNING: Logging before InitGoogleLogging() is written to STDERR +49: I0924 13:32:19.207593 27837 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model +49: I0924 13:32:19.207660 27837 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model +49: I0924 13:32:19.207664 27837 model_parser.cc:189] model_file is: __model__ +49: I0924 13:32:19.207665 27837 model_parser.cc:190] param_file is: +49: I0924 13:32:19.208698 27837 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0.tmp_0] +49: I0924 13:32:19.208746 27837 executor.cc:49] Program: +49: Program { +49: var_3 = mul(A, fc_0__w_0, y_num_col_dims=1, x_num_col_dims=1) +49: var_7 = elementwise_add(var_3, fc_0__b_0, axis=1) +49: var_9 = relu(var_7) +49: var_11 = scale(var_9, scale=1) +49: } +49: I0924 13:32:19.239845 27837 graph_compiler.cc:39] [Debug] C Code is: +49: #include +49: #include +49: +49: void fn_mul_0(void* _args, int32_t num_args) +49: { +49: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +49: const cinn_buffer_t* _fc_0__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +49: cinn_buffer_t* _Mul_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +49: cinn_buffer_malloc((void*)(0), _Mul_out); +49: const float* A_reshape = ((const float*)(_A->memory)); +49: float* Mul_out = ((float*)(_Mul_out->memory)); +49: float* Mul_out_init = ((float*)(_Mul_out->memory)); +49: const float* fc_0__w_0_reshape = ((const float*)(_fc_0__w_0->memory)); +49: for (int32_t j = 0; j < 30; j += 1) { +49: Mul_out_init[j] = 0; +49: }; +49: for (int32_t j = 0; j < 30; j += 1) { +49: for (int32_t axis_k = 0; axis_k < 30; axis_k += 1) { +49: Mul_out[j] = (Mul_out[j] + (A_reshape[axis_k] * fc_0__w_0_reshape[((30 * axis_k) + j)])); +49: }; +49: }; +49: cinn_buffer_free((void*)(0), _Mul_out); +49: } +49: +49: void fn_elementwise_add_1(void* _args, int32_t num_args) +49: { +49: const cinn_buffer_t* _var_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +49: const cinn_buffer_t* _fc_0__b_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +49: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +49: cinn_buffer_malloc((void*)(0), _C); +49: float* C = ((float*)(_C->memory)); +49: const float* fc_0__b_0 = ((const float*)(_fc_0__b_0->memory)); +49: const float* var_3 = ((const float*)(_var_3->memory)); +49: for (int32_t j = 0; j < 30; j += 1) { +49: C[j] = (var_3[j] + fc_0__b_0[j]); +49: }; +49: cinn_buffer_free((void*)(0), _C); +49: } +49: +49: void fn_relu_2(void* _args, int32_t num_args) +49: { +49: const cinn_buffer_t* _var_7 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +49: cinn_buffer_t* _Relu_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +49: cinn_buffer_malloc((void*)(0), _Relu_output); +49: float* Relu_output = ((float*)(_Relu_output->memory)); +49: const float* var_7 = ((const float*)(_var_7->memory)); +49: for (int32_t j = 0; j < 30; j += 1) { +49: Relu_output[j] = cinn_max(var_7[j], 0); +49: }; +49: cinn_buffer_free((void*)(0), _Relu_output); +49: } +49: +49: void fn_scale_3(void* _args, int32_t num_args) +49: { +49: const cinn_buffer_t* _var_9 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +49: cinn_buffer_t* _Scale_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +49: cinn_buffer_malloc((void*)(0), _Scale_out); +49: float* Scale_out = ((float*)(_Scale_out->memory)); +49: const float* var_9 = ((const float*)(_var_9->memory)); +49: for (int32_t j = 0; j < 30; j += 1) { +49: Scale_out[j] = var_9[j]; +49: }; +49: cinn_buffer_free((void*)(0), _Scale_out); +49: } +49: +49: [ OK ] Executor.basic (61 ms) +49: [----------] 1 test from Executor (61 ms total) +49: +49: [----------] Global test environment tear-down +49: [==========] 1 test from 1 test case ran. (61 ms total) +49: [ PASSED ] 1 test. +44/68 Test #46: test_op_broadcast ...................... Passed 0.21 sec +45/68 Test #45: test_pe_transform ...................... Passed 0.21 sec +46/68 Test #11: test_mkl_math .......................... Passed 1.06 sec +47/68 Test #44: test_pe_broadcast ...................... Passed 0.23 sec +48/68 Test #42: test_hlir_framework_print_graph_pass ... Passed 0.33 sec +49/68 Test #50: test_model_parser ...................... Passed 0.32 sec +50/68 Test #49: test_frontend_executor ................. Passed 0.32 sec +test 52 + Start 52: test01_elementwise_add_case + +52: Test command: /home/wangyue50/CINN-my/CINN/build/tests/test01_elementwise_add_case "" +52: Test timeout computed to be: 600 +test 54 + Start 54: test02_matmul_case + +54: Test command: /home/wangyue50/CINN-my/CINN/build/tests/test02_matmul_case "" +54: Test timeout computed to be: 600 +test 56 + Start 56: test03_conv_case + +56: Test command: /home/wangyue50/CINN-my/CINN/build/tests/test03_conv_case "" +56: Test timeout computed to be: 600 +test 57 + Start 57: test_cinn_python_api + +57: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_matmul.py" +57: Test timeout computed to be: 9.99988e+06 +test 58 + Start 58: test_cinn_common + +58: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_common.py" +58: Test timeout computed to be: 9.99988e+06 +test 59 + Start 59: test_cinn_packed_func + +59: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_packed_func.py" +59: Test timeout computed to be: 9.99988e+06 +test 60 + Start 60: test_cinn_pe_elementwise + +60: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_pe_elementwise.py" +60: Test timeout computed to be: 9.99988e+06 +48: I0924 13:32:19.314476 27836 graph_compiler.cc:39] [Debug] C Code is: +48: #include +48: #include +48: +48: void fn_mul_0(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _W = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _Mul_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _Mul_out); +48: const float* A_reshape = ((const float*)(_A->memory)); +48: float* Mul_out = ((float*)(_Mul_out->memory)); +48: float* Mul_out_init = ((float*)(_Mul_out->memory)); +48: const float* W_reshape = ((const float*)(_W->memory)); +48: for (int32_t i = 0; i < 320; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: Mul_out_init[((24 * i) + j)] = 0; +48: }; +48: }; +48: for (int32_t i = 0; i < 320; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: for (int32_t axis_k = 0; axis_k < 18; axis_k += 1) { +48: Mul_out[((24 * i) + j)] = (Mul_out[((24 * i) + j)] + (A_reshape[((18 * i) + axis_k)] * W_reshape[((24 * axis_k) + j)])); +48: }; +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _Mul_out); +48: } +48: +48: void fn_elementwise_add_1(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _var_14 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _C_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _C_3); +48: const float* B = ((const float*)(_B->memory)); +48: float* C_3 = ((float*)(_C_3->memory)); +48: const float* var_14 = ((const float*)(_var_14->memory)); +48: for (int32_t i = 0; i < 320; i += 1) { +48: for (int32_t j = 0; j < 24; j += 1) { +48: C_3[((24 * i) + j)] = (var_14[((24 * i) + j)] + B[j]); +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _C_3); +48: } +48: +48: [ OK ] syntax.program_execute_fc (50 ms) +48: [----------] 4 tests from syntax (134 ms total) +48: +48: [----------] 1 test from load_paddle_model +48: [ RUN ] load_paddle_model.fc_execute +48: I0924 13:32:19.340806 27836 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model +48: I0924 13:32:19.340857 27836 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model +48: I0924 13:32:19.340860 27836 model_parser.cc:189] model_file is: __model__ +48: I0924 13:32:19.340862 27836 model_parser.cc:190] param_file is: +48: I0924 13:32:19.341847 27836 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0.tmp_0] +48: I0924 13:32:19.341898 27836 syntax_test.cc:152] program: +48: Program { +48: var_20 = mul(A, fc_0__w_0, y_num_col_dims=1, x_num_col_dims=1) +48: var_24 = elementwise_add(var_20, fc_0__b_0, axis=1) +48: var_26 = relu(var_24) +48: var_28 = scale(var_26, scale=1) +48: } +48: I0924 13:32:19.371641 27836 graph_compiler.cc:39] [Debug] C Code is: +48: #include +48: #include +48: +48: void fn_mul_0(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _fc_0__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _Mul_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _Mul_out_0); +48: const float* A_reshape_0 = ((const float*)(_A->memory)); +48: float* Mul_out_0 = ((float*)(_Mul_out_0->memory)); +48: float* Mul_out_0_init = ((float*)(_Mul_out_0->memory)); +48: const float* fc_0__w_0_reshape = ((const float*)(_fc_0__w_0->memory)); +48: for (int32_t j = 0; j < 30; j += 1) { +48: Mul_out_0_init[j] = 0; +48: }; +48: for (int32_t j = 0; j < 30; j += 1) { +48: for (int32_t axis_k_0 = 0; axis_k_0 < 30; axis_k_0 += 1) { +48: Mul_out_0[j] = (Mul_out_0[j] + (A_reshape_0[axis_k_0] * fc_0__w_0_reshape[((30 * axis_k_0) + j)])); +48: }; +48: }; +48: cinn_buffer_free((void*)(0), _Mul_out_0); +48: } +48: +48: void fn_elementwise_add_1(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _var_20 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: const cinn_buffer_t* _fc_0__b_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_t* _C_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +48: cinn_buffer_malloc((void*)(0), _C_4); +48: float* C_4 = ((float*)(_C_4->memory)); +48: const float* fc_0__b_0 = ((const float*)(_fc_0__b_0->memory)); +48: const float* var_20 = ((const float*)(_var_20->memory)); +48: for (int32_t j = 0; j < 30; j += 1) { +48: C_4[j] = (var_20[j] + fc_0__b_0[j]); +48: }; +48: cinn_buffer_free((void*)(0), _C_4); +48: } +48: +48: void fn_relu_2(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _var_24 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: cinn_buffer_t* _Relu_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_malloc((void*)(0), _Relu_output); +48: float* Relu_output = ((float*)(_Relu_output->memory)); +48: const float* var_24 = ((const float*)(_var_24->memory)); +48: for (int32_t j = 0; j < 30; j += 1) { +48: Relu_output[j] = cinn_max(var_24[j], 0); +48: }; +48: cinn_buffer_free((void*)(0), _Relu_output); +48: } +48: +48: void fn_scale_3(void* _args, int32_t num_args) +48: { +48: const cinn_buffer_t* _var_26 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +48: cinn_buffer_t* _Scale_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +48: cinn_buffer_malloc((void*)(0), _Scale_out); +48: float* Scale_out = ((float*)(_Scale_out->memory)); +48: const float* var_26 = ((const float*)(_var_26->memory)); +48: for (int32_t j = 0; j < 30; j += 1) { +48: Scale_out[j] = var_26[j]; +48: }; +48: cinn_buffer_free((void*)(0), _Scale_out); +48: } +48: +48: I0924 13:32:19.398069 27836 syntax_test.cc:169] scope.names: var_26,fc_0__w_0,fc_0__b_0,var_28,A,var_20,var_24 +48: I0924 13:32:19.398090 27836 syntax_test.cc:173] tensor.shape: 1,30 +48: I0924 13:32:19.398094 27836 syntax_test.cc:175] data: 1.1576 +48: I0924 13:32:19.398100 27836 syntax_test.cc:175] data: -0.79889 +48: I0924 13:32:19.398105 27836 syntax_test.cc:175] data: 0.125167 +48: I0924 13:32:19.398108 27836 syntax_test.cc:175] data: 0.0934057 +48: I0924 13:32:19.398110 27836 syntax_test.cc:175] data: 0.0106909 +48: I0924 13:32:19.398113 27836 syntax_test.cc:175] data: 0.60541 +48: I0924 13:32:19.398114 27836 syntax_test.cc:175] data: -0.777485 +48: I0924 13:32:19.398116 27836 syntax_test.cc:175] data: 0.546453 +48: I0924 13:32:19.398119 27836 syntax_test.cc:175] data: -0.624406 +48: I0924 13:32:19.398121 27836 syntax_test.cc:175] data: 1.4859 +48: [ OK ] load_paddle_model.fc_execute (58 ms) +48: [----------] 1 test from load_paddle_model (58 ms total) +48: +48: [----------] Global test environment tear-down +48: [==========] 5 tests from 2 test cases ran. (192 ms total) +48: [ PASSED ] 5 tests. +47: I0924 13:32:19.415700 27835 op_nn_test.cc:167] Test Strategy Codegen: +47: function pool2d (_A, _pad_temp_2, _T_Pool2d_out_2_0) +47: { +47: for (j, 5) +47: { +47: for (k, 5) +47: { +47: for (a, 3) +47: { +47: T_Pool2d_out_2_0_init[0, j, k, a] = 0 +47: } +47: } +47: } +47: for (j, 11) +47: { +47: for (k, 11) +47: { +47: for (a, 3) +47: { +47: pad_temp_2[0, j, k, a] = select(((k < 9) and ((k >= 1) and ((j < 9) and (j >= 1)))), A[0, (-1 + j), (-1 + k), a], 0) +47: } +47: } +47: } +47: for (j, 5) +47: { +47: for (k, 5) +47: { +47: for (a, 3) +47: { +47: for (kernel_idx_3, 2) +47: { +47: for (kernel_idx_4, 2) +47: { +47: T_Pool2d_out_2_0[0, j, k, a] = (T_Pool2d_out_2_0[0, j, k, a] + (pad_temp_2[0, ((2 * j) + kernel_idx_3), ((2 * k) + kernel_idx_4), a] * (1 / float32(cinn_max(((1 * (cinn_min(((-1 + (2 * j)) + 2), 8) - cinn_max((-1 + (2 * j)), 0))) * (cinn_min(((-1 + (2 * k)) + 2), 8) - cinn_max((-1 + (2 * k)), 0))), 1))))) +47: } +47: } +47: } +47: } +47: } +47: } +47: I0924 13:32:19.424890 27835 codegen_llvm.cc:344] instr: %89 = select i1 %88, i32 %87, i32 1 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 768 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1452 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 300 +47: [ OK ] Operator.Operator_Pool2d_Test2 (85 ms) +47: [ RUN ] Operator.Operator_Pool3d_Test0 +47: I0924 13:32:19.446368 27835 nn.cc:477] kernel_size length is: 3 +47: I0924 13:32:19.446377 27835 nn.cc:478] kernel_size is: 2 +47: I0924 13:32:19.446380 27835 nn.cc:479] padding_size length is: 6 +47: I0924 13:32:19.446382 27835 nn.cc:480] padding_size is: 1 +47: I0924 13:32:19.519640 27835 op_nn_test.cc:226] Test Strategy Codegen: +47: function pool3d (_A, _pad_temp_3, _T_Pool3d_out_0) +47: { +47: for (j, 5) +47: { +47: for (k, 5) +47: { +47: for (a, 5) +47: { +47: for (b, 3) +47: { +47: T_Pool3d_out_0_init[0, j, k, a, b] = 0 +47: } +47: } +47: } +47: } +47: for (j, 10) +47: { +47: for (k, 10) +47: { +47: for (a, 10) +47: { +47: for (b, 3) +47: { +47: pad_temp_3[0, j, k, a, b] = select(((a < 9) and ((a >= 1) and ((k < 9) and ((k >= 1) and ((j < 9) and (j >= 1)))))), A[0, (-1 + j), (-1 + k), (-1 + a), b], -3.40282e+38) +47: } +47: } +47: } +47: } +47: for (j, 5) +47: { +47: for (k, 5) +47: { +47: for (a, 5) +47: { +47: for (b, 3) +47: { +47: for (kernel_idx_5, 2) +47: { +47: for (kernel_idx_6, 2) +47: { +47: for (kernel_idx_7, 2) +47: { +47: T_Pool3d_out_0[0, j, k, a, b] = cinn_max(T_Pool3d_out_0[0, j, k, a, b], pad_temp_3[0, ((2 * j) + kernel_idx_5), ((2 * k) + kernel_idx_6), ((2 * a) + kernel_idx_7), b]) +47: } +47: } +47: } +47: } +47: } +47: } +47: } +47: } +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 6144 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 15972 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1500 +47: [ OK ] Operator.Operator_Pool3d_Test0 (111 ms) +47: [ RUN ] Operator.Operator_Pool1d_Test0 +47: I0924 13:32:19.558034 27835 nn.cc:477] kernel_size length is: 1 +47: I0924 13:32:19.558044 27835 nn.cc:478] kernel_size is: 2 +47: I0924 13:32:19.558048 27835 nn.cc:479] padding_size length is: 2 +47: I0924 13:32:19.558050 27835 nn.cc:480] padding_size is: 1 +47: I0924 13:32:19.587836 27835 op_nn_test.cc:284] Test Strategy Codegen: +47: function pool1d (_A, _pad_temp_4, _T_Pool1d_out_0) +47: { +47: for (j, 5) +47: { +47: for (k, 3) +47: { +47: T_Pool1d_out_0_init[0, j, k] = 0 +47: } +47: } +47: for (j, 10) +47: { +47: for (k, 3) +47: { +47: pad_temp_4[0, j, k] = select(((j < 9) and (j >= 1)), A[0, (-1 + j), k], -3.40282e+38) +47: } +47: } +47: for (j, 5) +47: { +47: for (k, 3) +47: { +47: for (kernel_idx_8, 2) +47: { +47: T_Pool1d_out_0[0, j, k] = cinn_max(T_Pool1d_out_0[0, j, k], pad_temp_4[0, ((2 * j) + kernel_idx_8), k]) +47: } +47: } +47: } +47: } +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 96 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 132 +47: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 60 +47: [ OK ] Operator.Operator_Pool1d_Test0 (70 ms) +47: [----------] 5 tests from Operator (421 ms total) +47: +47: [----------] Global test environment tear-down +47: [==========] 5 tests from 1 test case ran. (421 ms total) +47: [ PASSED ] 5 tests. +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register relu +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register relu6 +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register conv2d +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register depthwise_conv2d +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register batchnorm +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register pool1d +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register pool2d +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register pool3d +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register sigmoid +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register softmax +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register slice +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register depthwise_conv2d +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register elementwise_add +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register elementwise_mul +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register scale +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register matmul +60: I0100 00:00:00.000000 27850 registry.h:89] RAW: Register mul +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register relu +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register relu6 +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register conv2d +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register depthwise_conv2d +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register batchnorm +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register pool1d +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register pool2d +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register pool3d +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register sigmoid +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register softmax +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register slice +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register depthwise_conv2d +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register elementwise_add +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register elementwise_mul +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register scale +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register matmul +59: I0100 00:00:00.000000 27849 registry.h:89] RAW: Register mul +59: .... +59: ---------------------------------------------------------------------- +59: Ran 4 tests in 0.002s +59: +59: OK +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register relu +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register relu6 +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register conv2d +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register depthwise_conv2d +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register batchnorm +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register pool1d +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register pool2d +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register pool3d +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register sigmoid +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register softmax +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register slice +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register depthwise_conv2d +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register elementwise_add +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register elementwise_mul +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register scale +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register matmul +58: I0100 00:00:00.000000 27848 registry.h:89] RAW: Register mul +58: .. +58: ---------------------------------------------------------------------- +58: Ran 2 tests in 0.001s +58: +58: OK +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register relu +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register relu6 +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register conv2d +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register depthwise_conv2d +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register batchnorm +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register pool1d +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register pool2d +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register pool3d +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register sigmoid +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register softmax +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register slice +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register depthwise_conv2d +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register elementwise_add +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register elementwise_mul +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register scale +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register matmul +57: I0100 00:00:00.000000 27847 registry.h:89] RAW: Register mul +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.cos (29 ms) +43: [ RUN ] elementwise_pe.cosh +43: I0924 13:32:19.527927 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_cosh_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_cosh_fp32_out[i, j] = cinn_cpu_cosh_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.cosh (30 ms) +43: [ RUN ] elementwise_pe.tan +43: I0924 13:32:19.557880 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_tan_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_tan_fp32_out[i, j] = cinn_cpu_tan_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.tan (32 ms) +43: [ RUN ] elementwise_pe.sin +43: I0924 13:32:19.593026 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_sin_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_sin_fp32_out[i, j] = cinn_cpu_sin_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.sin (43 ms) +43: [ RUN ] elementwise_pe.sinh +43: I0924 13:32:19.635660 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_sinh_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_sinh_fp32_out[i, j] = cinn_cpu_sinh_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.sinh (43 ms) +43: [ RUN ] elementwise_pe.acos +43: I0924 13:32:19.678943 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_acos_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_acos_fp32_out[i, j] = cinn_cpu_acos_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.acos (39 ms) +43: [ RUN ] elementwise_pe.acosh +43: I0924 13:32:19.714777 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_acosh_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_acosh_fp32_out[i, j] = cinn_cpu_acosh_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.acosh (30 ms) +43: [ RUN ] elementwise_pe.asin +43: I0924 13:32:19.744695 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_asin_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_asin_fp32_out[i, j] = cinn_cpu_asin_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.asin (29 ms) +43: [ RUN ] elementwise_pe.asinh +43: I0924 13:32:19.774551 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_asinh_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_asinh_fp32_out[i, j] = cinn_cpu_asinh_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.asinh (31 ms) +43: [ RUN ] elementwise_pe.atan +43: I0924 13:32:19.804793 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_atan_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_atan_fp32_out[i, j] = cinn_cpu_atan_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.atan (30 ms) +43: [ RUN ] elementwise_pe.atanh +43: I0924 13:32:19.834564 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_atanh_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_atanh_fp32_out[i, j] = cinn_cpu_atanh_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.atanh (29 ms) +43: [ RUN ] elementwise_pe.isnan +43: I0924 13:32:19.864051 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_isnan_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_isnan_fp32_out[i, j] = cinn_cpu_isnan_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.isnan (29 ms) +43: [ RUN ] elementwise_pe.tanh +43: I0924 13:32:19.893018 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_tanh_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_tanh_fp32_out[i, j] = cinn_cpu_tanh_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.tanh (29 ms) +43: [ RUN ] elementwise_pe.isfinite +43: I0924 13:32:19.922298 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_isfinite_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_isfinite_fp32_out[i, j] = cinn_cpu_isfinite_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.isfinite (36 ms) +43: [ RUN ] elementwise_pe.isinf +43: I0924 13:32:19.960331 27831 pe_elementwise_test.cc:33] func: +43: function fn (_A, _PE_Elementwise_isinf_fp32_out) +43: { +43: for (i, 100) +43: { +43: for (j, 32) +43: { +43: PE_Elementwise_isinf_fp32_out[i, j] = cinn_cpu_isinf_fp32(A[i, j]) +43: } +43: } +43: } +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +43: [ OK ] elementwise_pe.isinf (40 ms) +43: [----------] 25 tests from elementwise_pe (792 ms total) +43: +43: [----------] Global test environment tear-down +43: [==========] 25 tests from 1 test case ran. (792 ms total) +43: [ PASSED ] 25 tests. +56: [==========] Running 1 test from 1 test case. +56: [----------] Global test environment set-up. +56: [----------] 1 test from test03 +56: [ RUN ] test03.basic +56: [ OK ] test03.basic (0 ms) +56: [----------] 1 test from test03 (0 ms total) +56: +56: [----------] Global test environment tear-down +56: [==========] 1 test from 1 test case ran. (0 ms total) +56: [ PASSED ] 1 test. +54: [==========] Running 1 test from 1 test case. +54: [----------] Global test environment set-up. +54: [----------] 1 test from test02 +54: [ RUN ] test02.basic +54: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +54: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +54: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +54: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +54: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +52: [==========] Running 3 tests from 1 test case. +52: [----------] Global test environment set-up. +52: [----------] 3 tests from test01 +52: [ RUN ] test01.basic +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: WARNING: Logging before InitGoogleLogging() is written to STDERR +52: I0924 13:32:19.536278 27839 test01_elementwise_add_case.cc:30] test1 basic +52: I0924 13:32:19.536424 27839 test01_elementwise_add_case.cc:34] test1 vectorize +52: [ OK ] test01.basic (0 ms) +52: [ RUN ] test01.compute_at +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: I0924 13:32:19.536634 27839 test01_elementwise_add_case.cc:73] test1 basic +52: I0924 13:32:19.536728 27839 test01_elementwise_add_case.cc:77] test1 vectorize +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +52: [ OK ] test01.compute_at (0 ms) +52: [ RUN ] test01.compute_at_level1 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +52: I0924 13:32:19.536936 27839 test01_elementwise_add_case.cc:120] test1 basic +52: I0924 13:32:19.537029 27839 test01_elementwise_add_case.cc:124] test1 vectorize +52: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4 +52: [ OK ] test01.compute_at_level1 (1 ms) +52: [----------] 3 tests from test01 (1 ms total) +52: +52: [----------] Global test environment tear-down +52: [==========] 3 tests from 1 test case ran. (1 ms total) +52: [ PASSED ] 3 tests. +51/68 Test #48: test_frontend_syntax ................... Passed 0.34 sec +52/68 Test #59: test_cinn_packed_func .................. Passed 0.28 sec +53/68 Test #58: test_cinn_common ....................... Passed 0.28 sec +54/68 Test #43: test_pe_elementwise .................... Passed 0.81 sec +55/68 Test #56: test03_conv_case ....................... Passed 0.47 sec +56/68 Test #52: test01_elementwise_add_case ............ Passed 0.58 sec +test 61 + Start 61: test_cinn_pe_reduction + +61: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_pe_reduction.py" +61: Test timeout computed to be: 9.99988e+06 +test 62 + Start 62: test_cinn_pe_transform + +62: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_pe_transform.py" +62: Test timeout computed to be: 9.99988e+06 +test 63 + Start 63: test_cinn_op_nn + +63: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_op_nn.py" +63: Test timeout computed to be: 9.99988e+06 +test 64 + Start 64: test_cinn_op_broadcast + +64: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_op_broadcast.py" +64: Test timeout computed to be: 9.99988e+06 +test 65 + Start 65: test_cinn_frontend + +65: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_frontend.py" "/home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model" "/home/wangyue50/CINN-my/CINN/build/thirds/multi_fc_model" +65: Test timeout computed to be: 9.99988e+06 +test 66 + Start 66: test_cinn_fake_resnet + +66: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_resnet.py" "/home/wangyue50/CINN-my/CINN/build/thirds/resnet_model" +66: Test timeout computed to be: 9.99988e+06 +60: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +60: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register relu +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register relu6 +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register conv2d +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register depthwise_conv2d +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register batchnorm +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register pool1d +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register pool2d +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register pool3d +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register sigmoid +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register softmax +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register slice +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register depthwise_conv2d +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register elementwise_add +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register elementwise_mul +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register scale +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register matmul +62: I0100 00:00:00.000000 28051 registry.h:89] RAW: Register mul +62: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 6400 +62: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 2048 +62: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +62: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 6400 +62: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 2048 +62: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +62: . +62: ---------------------------------------------------------------------- +62: Ran 2 tests in 0.165s +62: +62: OK +62: function test_matmul (_x, _y, _T_Matmul_out) +62: { +62: for (i, 100) +62: { +62: for (j, 32) +62: { +62: for (kk, 16) +62: { +62: T_Matmul_out[i, j] = (T_Matmul_out[i, j] + (x[i, kk] * y[kk, j])) +62: } +62: } +62: } +62: } +62: function test_matmul (_x, _y, _T_Matmul_out) +62: { +62: for (i, 100) +62: { +62: for (j, 32) +62: { +62: for (kk_0, 16) +62: { +62: T_Matmul_out[i, j] = (T_Matmul_out[i, j] + (x[i, kk_0] * y[j, kk_0])) +62: } +62: } +62: } +62: } +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register relu +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register relu6 +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register conv2d +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register depthwise_conv2d +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register batchnorm +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register pool1d +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register pool2d +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register pool3d +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register sigmoid +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register softmax +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register slice +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register depthwise_conv2d +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register elementwise_add +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register elementwise_mul +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register scale +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register matmul +61: I0100 00:00:00.000000 28046 registry.h:89] RAW: Register mul +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4 +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4 +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +57/68 Test #47: test_op_nn ............................. Passed 1.01 sec +58/68 Test #62: test_cinn_pe_transform ................. Passed 0.56 sec +test 67 + Start 67: test_cinn_real_resnet18 + +67: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_resnet18.py" "/home/wangyue50/CINN-my/CINN/build/thirds/ResNet18" +67: Test timeout computed to be: 9.99988e+06 +test 68 + Start 68: test_cinn_real_mobilenetV2 + +68: Test command: /usr/bin/cmake "-E" "env" "PYTHONPATH=/home/wangyue50/CINN-my/CINN/build/python:/home/wangyue50/CINN-my/CINN/python/tests/..:/home/wangyue50/CINN-my/CINN/build/python/cinn:/home/wangyue50/CINN-my/CINN" "python3" "/home/wangyue50/CINN-my/CINN/python/tests/test_mobilenetv2.py" "/home/wangyue50/CINN-my/CINN/build/thirds/MobileNetV2" +68: Test timeout computed to be: 9.99988e+06 +60: . +60: ---------------------------------------------------------------------- +60: Ran 1 test in 0.835s +60: +60: OK +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register relu +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register relu6 +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register conv2d +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register depthwise_conv2d +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register batchnorm +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register pool1d +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register pool2d +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register pool3d +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register sigmoid +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register softmax +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register slice +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register depthwise_conv2d +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register elementwise_add +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register elementwise_mul +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register scale +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register matmul +64: I0100 00:00:00.000000 28048 registry.h:89] RAW: Register mul +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: WARNING:root:func: +64: +64: function elementwise_add (_Var_1, _Var_2, _C) +64: { +64: for (i, 100) +64: { +64: for (j, 32) +64: { +64: C[i, j] = (Var_1[i, j] + Var_2[i, j]) +64: } +64: } +64: } +64: +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 24 +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 8 +64: WARNING:root:func: +64: +64: function elementwise_add (_Var_1, _Var_2, _C_0) +64: { +64: for (i, 3) +64: { +64: for (j, 2) +64: { +64: C_0[i, j] = (Var_1[i, j] + Var_2[j]) +64: } +64: } +64: } +64: +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 24 +64: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: WARNING:root:func: +64: +64: function elementwise_mul (_Var_1, _Var_2, _C_1) +64: { +64: for (i, 100) +64: { +64: for (j, 32) +64: { +64: C_1[i, j] = (Var_1[i, j] * Var_2[i, j]) +64: } +64: } +64: } +64: +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 24 +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 8 +64: WARNING:root:func: +64: +64: function elementwise_mul (_Var_1, _Var_2, _C_2) +64: { +64: for (i, 3) +64: { +64: for (j, 2) +64: { +64: C_2[i, j] = (Var_1[i, j] * Var_2[j]) +64: } +64: } +64: } +64: +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 24 +64: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: WARNING:root:func: +64: +64: function scale (_Var_1, _Scale_out) +64: { +64: for (i, 100) +64: { +64: for (j, 32) +64: { +64: Scale_out[i, j] = (0.3 + (0.7 * Var_1[i, j])) +64: } +64: } +64: } +64: +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: WARNING:root:func: +64: +64: function scale (_Var_1, _Scale_out_0) +64: { +64: for (i, 100) +64: { +64: for (j, 32) +64: { +64: Scale_out_0[i, j] = (0.24 + (0.6 * Var_1[i, j])) +64: } +64: } +64: } +64: +64: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 12800 +64: .. +64: ---------------------------------------------------------------------- +64: Ran 7 tests in 0.302s +64: +64: OK +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +61: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4096 +61: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 128 +61: . +61: ---------------------------------------------------------------------- +61: Ran 8 tests in 0.826s +61: +61: OK +61: function test_sum (_x, _T_sum_out) +61: { +61: T_sum_out_init[0, 0] = 2 +61: for (kk, 32) +61: { +61: for (kk_0, 32) +61: { +61: T_sum_out[0, 0] = (T_sum_out[0, 0] + x[kk, kk_0]) +61: } +61: } +61: } +61: function test_prod (_x, _T_prod_out) +61: { +61: T_prod_out_init[0, 0] = 2 +61: for (kk_1, 32) +61: { +61: for (kk_2, 32) +61: { +61: T_prod_out[0, 0] = (T_prod_out[0, 0] * x[kk_1, kk_2]) +61: } +61: } +61: } +61: function test_sum (_x, _T_sum_out) +61: { +61: T_sum_out_init_0[0] = 2 +61: for (kk_3, 32) +61: { +61: for (kk_4, 32) +61: { +61: T_sum_out[0] = (T_sum_out[0] + x[kk_3, kk_4]) +61: } +61: } +61: } +61: function test_prod (_x, _T_prod_out) +61: { +61: T_prod_out_init_0[0] = 2 +61: for (kk_5, 32) +61: { +61: for (kk_6, 32) +61: { +61: T_prod_out[0] = (T_prod_out[0] * x[kk_5, kk_6]) +61: } +61: } +61: } +61: function test_sum (_x, _T_sum_out) +61: { +61: for (i, 32) +61: { +61: T_sum_out_init_1[i] = 2 +61: } +61: for (i, 32) +61: { +61: for (kk_7, 32) +61: { +61: T_sum_out[i] = (T_sum_out[i] + x[kk_7, i]) +61: } +61: } +61: } +61: function test_prod (_x, _T_prod_out) +61: { +61: for (i, 32) +61: { +61: T_prod_out_init_1[i] = 2 +61: } +61: for (i, 32) +61: { +61: for (kk_8, 32) +61: { +61: T_prod_out[i] = (T_prod_out[i] * x[kk_8, i]) +61: } +61: } +61: } +61: function test_sum (_x, _T_sum_out) +61: { +61: for (j, 32) +61: { +61: T_sum_out_init_2[0, j] = 2 +61: } +61: for (j, 32) +61: { +61: for (kk_9, 32) +61: { +61: T_sum_out[0, j] = (T_sum_out[0, j] + x[kk_9, j]) +61: } +61: } +61: } +61: function test_prod (_x, _T_prod_out) +61: { +61: for (j, 32) +61: { +61: T_prod_out_init_2[0, j] = 2 +61: } +61: for (j, 32) +61: { +61: for (kk_10, 32) +61: { +61: T_prod_out[0, j] = (T_prod_out[0, j] * x[kk_10, j]) +61: } +61: } +61: } +61: function test_sum (_x, _T_sum_out) +61: { +61: for (i, 32) +61: { +61: T_sum_out_init_3[i] = 2 +61: } +61: for (i, 32) +61: { +61: for (kk_11, 32) +61: { +61: T_sum_out[i] = (T_sum_out[i] + x[i, kk_11]) +61: } +61: } +61: } +61: function test_prod (_x, _T_prod_out) +61: { +61: for (i, 32) +61: { +61: T_prod_out_init_3[i] = 2 +61: } +61: for (i, 32) +61: { +61: for (kk_12, 32) +61: { +61: T_prod_out[i] = (T_prod_out[i] * x[i, kk_12]) +61: } +61: } +61: } +61: function test_sum (_x, _T_sum_out) +61: { +61: for (i, 32) +61: { +61: T_sum_out_init_4[i, 0] = 2 +61: } +61: for (i, 32) +61: { +61: for (kk_13, 32) +61: { +61: T_sum_out[i, 0] = (T_sum_out[i, 0] + x[i, kk_13]) +61: } +61: } +61: } +61: function test_prod (_x, _T_prod_out) +61: { +61: for (i, 32) +61: { +61: T_prod_out_init_4[i, 0] = 2 +61: } +61: for (i, 32) +61: { +61: for (kk_14, 32) +61: { +61: T_prod_out[i, 0] = (T_prod_out[i, 0] * x[i, kk_14]) +61: } +61: } +61: } +61: function test_max (_x, _T_max_out) +61: { +61: for (i, 32) +61: { +61: for (kk_15, 32) +61: { +61: T_max_out[i, 0] = cinn_max(T_max_out[i, 0], x[i, kk_15]) +61: } +61: } +61: } +61: function test_min (_x, _T_min_out) +61: { +61: for (i, 32) +61: { +61: for (kk_16, 32) +61: { +61: T_min_out[i, 0] = cinn_min(T_min_out[i, 0], x[i, kk_16]) +61: } +61: } +61: } +61: function test_max (_x, _T_max_out) +61: { +61: for (i, 32) +61: { +61: for (kk_17, 32) +61: { +61: T_max_out[i] = cinn_max(T_max_out[i], x[i, kk_17]) +61: } +61: } +61: } +61: function test_min (_x, _T_min_out) +61: { +61: for (i, 32) +61: { +61: for (kk_18, 32) +61: { +61: T_min_out[i] = cinn_min(T_min_out[i], x[i, kk_18]) +61: } +61: } +61: } +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register relu +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register relu6 +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register conv2d +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register depthwise_conv2d +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register batchnorm +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register pool1d +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register pool2d +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register pool3d +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register sigmoid +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register softmax +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register slice +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register depthwise_conv2d +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register elementwise_add +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register elementwise_mul +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register scale +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register matmul +66: I0100 00:00:00.000000 28050 registry.h:89] RAW: Register mul +66: WARNING: Logging before InitGoogleLogging() is written to STDERR +66: I0924 13:32:21.558060 28050 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/resnet_model +66: I0924 13:32:21.558151 28050 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/resnet_model +66: I0924 13:32:21.558156 28050 model_parser.cc:189] model_file is: __model__ +66: I0924 13:32:21.558158 28050 model_parser.cc:190] param_file is: +66: I0924 13:32:21.560164 28050 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0.tmp_0] +66: I0924 13:32:21.560267 28050 executor.cc:49] Program: +66: Program { +66: var_3, var_4 = depthwise_conv2d(resnet_input, depthwise_conv2d_0__w_0, stride=[1,1], padding=[1,1], groups=32, dilation=[1,1]) +66: var_8 = elementwise_add(var_4, depthwise_conv2d_0__b_0, axis=1) +66: var_10 = relu6(var_8) +66: var_12 = relu(var_10) +66: var_14 = scale(var_12, scale=1) +66: } +59/68 Test #60: test_cinn_pe_elementwise ............... Passed 1.64 sec +60/68 Test #64: test_cinn_op_broadcast ................. Passed 1.07 sec +61/68 Test #61: test_cinn_pe_reduction ................. Passed 1.48 sec +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register relu +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register relu6 +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register conv2d +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register depthwise_conv2d +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register batchnorm +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register pool1d +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register pool2d +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register pool3d +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register sigmoid +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register softmax +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register slice +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register depthwise_conv2d +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register elementwise_add +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register elementwise_mul +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register scale +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register matmul +65: I0100 00:00:00.000000 28049 registry.h:89] RAW: Register mul +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register relu +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register relu6 +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register conv2d +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register depthwise_conv2d +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register batchnorm +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register pool1d +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register pool2d +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register pool3d +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register sigmoid +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register softmax +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register slice +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register depthwise_conv2d +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register elementwise_add +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register elementwise_mul +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register scale +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register matmul +63: I0100 00:00:00.000000 28047 registry.h:89] RAW: Register mul +63: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1200 +63: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 37632 +63: 2020-09-24 13:32:21,684-WARNING: func: +63: +63: function conv2d (_Var_1, _Var_2, _T_pad_out_0, _weights_dilation, _Conv2d_nchw_out) +63: { +63: for (j, 3) +63: { +63: for (k, 12) +63: { +63: for (a, 12) +63: { +63: T_pad_out_0[0, j, k, a] = select(((a < 11) and ((a >= 1) and ((k < 11) and (k >= 1)))), Var_1[0, j, (-1 + k), (-1 + a)], 0) +63: } +63: } +63: } +63: for (i, 64) +63: { +63: for (j, 3) +63: { +63: for (k, 13) +63: { +63: for (a, 13) +63: { +63: weights_dilation[i, j, k, a] = select((((a % 2) == 0) and ((k % 2) == 0)), Var_2[i, j, (k / 2), (a / 2)], 0) +63: } +63: } +63: } +63: } +63: for (j, 64) +63: { +63: for (fc, 3) +63: { +63: for (fy, 13) +63: { +63: for (fx, 13) +63: { +63: Conv2d_nchw_out[0, j, 0, 0] = (Conv2d_nchw_out[0, j, 0, 0] + (T_pad_out_0[0, fc, fy, fx] * weights_dilation[j, fc, fy, fx])) +63: } +63: } +63: } +63: } +63: } +63: +63: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 1728 +63: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 129792 +63: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 256 +63: .. +63: ---------------------------------------------------------------------- +63: Ran 2 tests in 0.212s +63: +63: OK +63: output's shape is: (1, 1, 64, 1, 1) +63: pad's shape is: [1, 3, 12, 12] +66: I0924 13:32:21.720008 28050 graph_compiler.cc:39] [Debug] C Code is: +66: #include +66: #include +66: +66: void fn_depthwise_conv2d_0(void* _args, int32_t num_args) +66: { +66: const cinn_buffer_t* _resnet_input = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +66: const cinn_buffer_t* _depthwise_conv2d_0__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +66: cinn_buffer_t* _T_pad_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +66: cinn_buffer_t* _T_depthwise_conv2d_nchw_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +66: cinn_buffer_malloc((void*)(0), _T_pad_out_0); +66: cinn_buffer_malloc((void*)(0), _T_depthwise_conv2d_nchw_out); +66: float* T_depthwise_conv2d_nchw_out = ((float*)(_T_depthwise_conv2d_nchw_out->memory)); +66: float* T_depthwise_conv2d_nchw_out_init = ((float*)(_T_depthwise_conv2d_nchw_out->memory)); +66: float* T_pad_out_0 = ((float*)(_T_pad_out_0->memory)); +66: const float* depthwise_conv2d_0__w_0 = ((const float*)(_depthwise_conv2d_0__w_0->memory)); +66: const float* resnet_input = ((const float*)(_resnet_input->memory)); +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 112; k += 1) { +66: for (int32_t a = 0; a < 112; a += 1) { +66: T_depthwise_conv2d_nchw_out_init[((12544 * j) + ((112 * k) + a))] = 0; +66: }; +66: }; +66: }; +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 114; k += 1) { +66: for (int32_t a = 0; a < 114; a += 1) { +66: T_pad_out_0[((12996 * j) + ((114 * k) + a))] = ((((a < 113) && ((a >= 1) && ((k < 113) && (k >= 1))))) ? resnet_input[(-113 + ((12544 * j) + ((112 * k) + a)))] : 0); +66: }; +66: }; +66: }; +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 112; k += 1) { +66: for (int32_t a = 0; a < 112; a += 1) { +66: for (int32_t kh = 0; kh < 3; kh += 1) { +66: for (int32_t kw = 0; kw < 3; kw += 1) { +66: T_depthwise_conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] = (T_depthwise_conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] + (T_pad_out_0[(((j / 1) * 12996) + ((114 * k) + ((114 * kh) + (a + kw))))] * depthwise_conv2d_0__w_0[(((j / 1) * 9) + ((3 * kh) + kw))])); +66: }; +66: }; +66: }; +66: }; +66: }; +66: cinn_buffer_free((void*)(0), _T_pad_out_0); +66: cinn_buffer_free((void*)(0), _T_depthwise_conv2d_nchw_out); +66: } +66: +66: void fn_elementwise_add_1(void* _args, int32_t num_args) +66: { +66: const cinn_buffer_t* _var_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +66: const cinn_buffer_t* _depthwise_conv2d_0__b_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +66: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +66: cinn_buffer_malloc((void*)(0), _C); +66: float* C = ((float*)(_C->memory)); +66: const float* depthwise_conv2d_0__b_0 = ((const float*)(_depthwise_conv2d_0__b_0->memory)); +66: const float* var_4 = ((const float*)(_var_4->memory)); +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 112; k += 1) { +66: for (int32_t a = 0; a < 112; a += 1) { +66: C[((12544 * j) + ((112 * k) + a))] = (var_4[((12544 * j) + ((112 * k) + a))] + depthwise_conv2d_0__b_0[j]); +66: }; +66: }; +66: }; +66: cinn_buffer_free((void*)(0), _C); +66: } +66: +66: void fn_relu6_2(void* _args, int32_t num_args) +66: { +66: const cinn_buffer_t* _var_8 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +66: cinn_buffer_t* _Relu6_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +66: cinn_buffer_malloc((void*)(0), _Relu6_output); +66: float* Relu6_output = ((float*)(_Relu6_output->memory)); +66: const float* var_8 = ((const float*)(_var_8->memory)); +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 112; k += 1) { +66: for (int32_t a = 0; a < 112; a += 1) { +66: Relu6_output[((12544 * j) + ((112 * k) + a))] = cinn_min(cinn_max(var_8[((12544 * j) + ((112 * k) + a))], 0), 6); +66: }; +66: }; +66: }; +66: cinn_buffer_free((void*)(0), _Relu6_output); +66: } +66: +66: void fn_relu_3(void* _args, int32_t num_args) +66: { +66: const cinn_buffer_t* _var_10 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +66: cinn_buffer_t* _Relu_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +66: cinn_buffer_malloc((void*)(0), _Relu_output); +66: float* Relu_output = ((float*)(_Relu_output->memory)); +66: const float* var_10 = ((const float*)(_var_10->memory)); +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 112; k += 1) { +66: for (int32_t a = 0; a < 112; a += 1) { +66: Relu_output[((12544 * j) + ((112 * k) + a))] = cinn_max(var_10[((12544 * j) + ((112 * k) + a))], 0); +66: }; +66: }; +66: }; +66: cinn_buffer_free((void*)(0), _Relu_output); +66: } +66: +66: void fn_scale_4(void* _args, int32_t num_args) +66: { +66: const cinn_buffer_t* _var_12 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +66: cinn_buffer_t* _Scale_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +66: cinn_buffer_malloc((void*)(0), _Scale_out); +66: float* Scale_out = ((float*)(_Scale_out->memory)); +66: const float* var_12 = ((const float*)(_var_12->memory)); +66: for (int32_t j = 0; j < 32; j += 1) { +66: for (int32_t k = 0; k < 112; k += 1) { +66: for (int32_t a = 0; a < 112; a += 1) { +66: Scale_out[((12544 * j) + ((112 * k) + a))] = var_12[((12544 * j) + ((112 * k) + a))]; +66: }; +66: }; +66: }; +66: cinn_buffer_free((void*)(0), _Scale_out); +66: } +66: +66: . +66: ---------------------------------------------------------------------- +66: Ran 1 test in 0.399s +66: +66: OK +66: result in test_model: +66: +62/68 Test #63: test_cinn_op_nn ........................ Passed 1.78 sec +63/68 Test #66: test_cinn_fake_resnet .................. Passed 2.08 sec +65: WARNING: Logging before InitGoogleLogging() is written to STDERR +65: I0924 13:32:21.836252 28049 graph_compiler.cc:39] [Debug] C Code is: +65: #include +65: #include +65: +65: void fn_elementwise_add_0(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C); +65: const float* A = ((const float*)(_A->memory)); +65: const float* B = ((const float*)(_B->memory)); +65: float* C = ((float*)(_C->memory)); +65: for (int32_t j = 0; j < 24; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: C[((3136 * j) + ((56 * k) + a))] = (A[((3136 * j) + ((56 * k) + a))] + B[((3136 * j) + ((56 * k) + a))]); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C); +65: } +65: +65: void fn_relu_1(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output); +65: float* Relu_output = ((float*)(_Relu_output->memory)); +65: const float* var = ((const float*)(_var->memory)); +65: for (int32_t j = 0; j < 24; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: Relu_output[((3136 * j) + ((56 * k) + a))] = cinn_max(var[((3136 * j) + ((56 * k) + a))], 0); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output); +65: } +65: +65: void fn_conv2d_2(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _E = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _T_Identity_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_t* _weights_dilation = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +65: cinn_buffer_t* _Conv2d_nchw_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +65: cinn_buffer_malloc((void*)(0), _T_Identity_out); +65: cinn_buffer_malloc((void*)(0), _weights_dilation); +65: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out); +65: float* Conv2d_nchw_out = ((float*)(_Conv2d_nchw_out->memory)); +65: float* Conv2d_nchw_out_init = ((float*)(_Conv2d_nchw_out->memory)); +65: const float* E = ((const float*)(_E->memory)); +65: float* T_Identity_out = ((float*)(_T_Identity_out->memory)); +65: const float* var_0 = ((const float*)(_var_0->memory)); +65: float* weights_dilation = ((float*)(_weights_dilation->memory)); +65: for (int32_t j = 0; j < 144; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: Conv2d_nchw_out_init[((3136 * j) + ((56 * k) + a))] = 0; +65: }; +65: }; +65: }; +65: for (int32_t i = 0; i < 144; i += 1) { +65: for (int32_t j = 0; j < 24; j += 1) { +65: weights_dilation[((24 * i) + j)] = (((((0 % 1) == 0) && ((0 % 1) == 0))) ? E[((24 * i) + j)] : 0); +65: }; +65: }; +65: for (int32_t j = 0; j < 24; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: T_Identity_out[((3136 * j) + ((56 * k) + a))] = var_0[((3136 * j) + ((56 * k) + a))]; +65: }; +65: }; +65: }; +65: for (int32_t j = 0; j < 144; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: for (int32_t fc = 0; fc < 24; fc += 1) { +65: Conv2d_nchw_out[((3136 * j) + ((56 * k) + a))] = (Conv2d_nchw_out[((3136 * j) + ((56 * k) + a))] + (T_Identity_out[((3136 * fc) + ((56 * k) + a))] * weights_dilation[((24 * j) + fc)])); +65: }; +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _T_Identity_out); +65: cinn_buffer_free((void*)(0), _weights_dilation); +65: cinn_buffer_free((void*)(0), _Conv2d_nchw_out); +65: } +65: +65: void fn_scale_3(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Scale_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Scale_out); +65: float* Scale_out = ((float*)(_Scale_out->memory)); +65: const float* var_3 = ((const float*)(_var_3->memory)); +65: for (int32_t j = 0; j < 144; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: Scale_out[((3136 * j) + ((56 * k) + a))] = (0.5 + (2 * var_3[((3136 * j) + ((56 * k) + a))])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Scale_out); +65: } +65: +65: void fn_softmax_4(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _softmax_temp_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _softmax_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _softmax_temp_out); +65: cinn_buffer_malloc((void*)(0), _softmax_out); +65: float* softmax_out = ((float*)(_softmax_out->memory)); +65: float* softmax_temp_out = ((float*)(_softmax_temp_out->memory)); +65: float* softmax_temp_out_init = ((float*)(_softmax_temp_out->memory)); +65: const float* var_4 = ((const float*)(_var_4->memory)); +65: for (int32_t j = 0; j < 144; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: softmax_temp_out_init[((3136 * j) + ((56 * k) + a))] = 0; +65: }; +65: }; +65: }; +65: for (int32_t j = 0; j < 144; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: for (int32_t axis_j = 0; axis_j < 144; axis_j += 1) { +65: softmax_temp_out[((3136 * j) + ((56 * k) + a))] = (softmax_temp_out[((3136 * j) + ((56 * k) + a))] + cinn_cpu_exp_fp32(var_4[((3136 * axis_j) + ((56 * k) + a))])); +65: }; +65: }; +65: }; +65: }; +65: for (int32_t j = 0; j < 144; j += 1) { +65: for (int32_t k = 0; k < 56; k += 1) { +65: for (int32_t a = 0; a < 56; a += 1) { +65: softmax_out[((3136 * j) + ((56 * k) + a))] = (cinn_cpu_exp_fp32(var_4[((3136 * j) + ((56 * k) + a))]) * (1 / softmax_temp_out[((3136 * j) + ((56 * k) + a))])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _softmax_temp_out); +65: cinn_buffer_free((void*)(0), _softmax_out); +65: } +65: +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register relu +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register relu6 +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register conv2d +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register depthwise_conv2d +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register batchnorm +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register pool1d +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register pool2d +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register pool3d +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register sigmoid +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register softmax +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register slice +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register depthwise_conv2d +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register elementwise_add +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register elementwise_mul +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register scale +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register matmul +68: I0100 00:00:00.000000 28814 registry.h:89] RAW: Register mul +68: WARNING: Logging before InitGoogleLogging() is written to STDERR +68: I0924 13:32:22.196000 28814 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/MobileNetV2 +68: I0924 13:32:22.196074 28814 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/MobileNetV2 +68: I0924 13:32:22.196079 28814 model_parser.cc:189] model_file is: __model__ +68: I0924 13:32:22.196080 28814 model_parser.cc:190] param_file is: +68: I0924 13:32:22.255563 28814 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0] +68: I0924 13:32:22.257247 28814 executor.cc:49] Program: +68: Program { +68: var_3, var_4, var_5 = conv2d(image, conv1_1_weights, stride=[2,2], padding=[1,1], groups=1, dilation=[1,1]) +68: var_15 = batchnorm(var_5, conv1_1_bn_scale, conv1_1_bn_offset, conv1_1_bn_mean, conv1_1_bn_variance, epsilon=1e-05) +68: var_17 = relu6(var_15) +68: var_21, var_22, var_23 = conv2d(var_17, conv2_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_33 = batchnorm(var_23, conv2_1_expand_bn_scale, conv2_1_expand_bn_offset, conv2_1_expand_bn_mean, conv2_1_expand_bn_variance, epsilon=1e-05) +68: var_35 = relu6(var_33) +68: var_39, var_40 = depthwise_conv2d(var_35, conv2_1_dwise_weights, stride=[1,1], padding=[1,1], groups=32, dilation=[1,1]) +68: var_50 = batchnorm(var_40, conv2_1_dwise_bn_scale, conv2_1_dwise_bn_offset, conv2_1_dwise_bn_mean, conv2_1_dwise_bn_variance, epsilon=1e-05) +68: var_52 = relu6(var_50) +68: var_56, var_57, var_58 = conv2d(var_52, conv2_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_68 = batchnorm(var_58, conv2_1_linear_bn_scale, conv2_1_linear_bn_offset, conv2_1_linear_bn_mean, conv2_1_linear_bn_variance, epsilon=1e-05) +68: var_72, var_73, var_74 = conv2d(var_68, conv3_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_84 = batchnorm(var_74, conv3_1_expand_bn_scale, conv3_1_expand_bn_offset, conv3_1_expand_bn_mean, conv3_1_expand_bn_variance, epsilon=1e-05) +68: var_86 = relu6(var_84) +68: var_90, var_91 = depthwise_conv2d(var_86, conv3_1_dwise_weights, stride=[2,2], padding=[1,1], groups=96, dilation=[1,1]) +68: var_101 = batchnorm(var_91, conv3_1_dwise_bn_scale, conv3_1_dwise_bn_offset, conv3_1_dwise_bn_mean, conv3_1_dwise_bn_variance, epsilon=1e-05) +68: var_103 = relu6(var_101) +68: var_107, var_108, var_109 = conv2d(var_103, conv3_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_119 = batchnorm(var_109, conv3_1_linear_bn_scale, conv3_1_linear_bn_offset, conv3_1_linear_bn_mean, conv3_1_linear_bn_variance, epsilon=1e-05) +68: var_123, var_124, var_125 = conv2d(var_119, conv3_2_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_135 = batchnorm(var_125, conv3_2_expand_bn_scale, conv3_2_expand_bn_offset, conv3_2_expand_bn_mean, conv3_2_expand_bn_variance, epsilon=1e-05) +68: var_137 = relu6(var_135) +68: var_141, var_142 = depthwise_conv2d(var_137, conv3_2_dwise_weights, stride=[1,1], padding=[1,1], groups=144, dilation=[1,1]) +68: var_152 = batchnorm(var_142, conv3_2_dwise_bn_scale, conv3_2_dwise_bn_offset, conv3_2_dwise_bn_mean, conv3_2_dwise_bn_variance, epsilon=1e-05) +68: var_154 = relu6(var_152) +68: var_158, var_159, var_160 = conv2d(var_154, conv3_2_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_170 = batchnorm(var_160, conv3_2_linear_bn_scale, conv3_2_linear_bn_offset, conv3_2_linear_bn_mean, conv3_2_linear_bn_variance, epsilon=1e-05) +68: var_172 = elementwise_add(var_119, var_170, axis=-1) +68: var_176, var_177, var_178 = conv2d(var_172, conv4_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_188 = batchnorm(var_178, conv4_1_expand_bn_scale, conv4_1_expand_bn_offset, conv4_1_expand_bn_mean, conv4_1_expand_bn_variance, epsilon=1e-05) +68: var_190 = relu6(var_188) +68: var_194, var_195 = depthwise_conv2d(var_190, conv4_1_dwise_weights, stride=[2,2], padding=[1,1], groups=144, dilation=[1,1]) +68: var_205 = batchnorm(var_195, conv4_1_dwise_bn_scale, conv4_1_dwise_bn_offset, conv4_1_dwise_bn_mean, conv4_1_dwise_bn_variance, epsilon=1e-05) +68: var_207 = relu6(var_205) +68: var_211, var_212, var_213 = conv2d(var_207, conv4_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_223 = batchnorm(var_213, conv4_1_linear_bn_scale, conv4_1_linear_bn_offset, conv4_1_linear_bn_mean, conv4_1_linear_bn_variance, epsilon=1e-05) +68: var_227, var_228, var_229 = conv2d(var_223, conv4_2_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_239 = batchnorm(var_229, conv4_2_expand_bn_scale, conv4_2_expand_bn_offset, conv4_2_expand_bn_mean, conv4_2_expand_bn_variance, epsilon=1e-05) +68: var_241 = relu6(var_239) +68: var_245, var_246 = depthwise_conv2d(var_241, conv4_2_dwise_weights, stride=[1,1], padding=[1,1], groups=192, dilation=[1,1]) +68: var_256 = batchnorm(var_246, conv4_2_dwise_bn_scale, conv4_2_dwise_bn_offset, conv4_2_dwise_bn_mean, conv4_2_dwise_bn_variance, epsilon=1e-05) +68: var_258 = relu6(var_256) +68: var_262, var_263, var_264 = conv2d(var_258, conv4_2_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_274 = batchnorm(var_264, conv4_2_linear_bn_scale, conv4_2_linear_bn_offset, conv4_2_linear_bn_mean, conv4_2_linear_bn_variance, epsilon=1e-05) +68: var_276 = elementwise_add(var_223, var_274, axis=-1) +68: var_280, var_281, var_282 = conv2d(var_276, conv4_3_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_292 = batchnorm(var_282, conv4_3_expand_bn_scale, conv4_3_expand_bn_offset, conv4_3_expand_bn_mean, conv4_3_expand_bn_variance, epsilon=1e-05) +68: var_294 = relu6(var_292) +68: var_298, var_299 = depthwise_conv2d(var_294, conv4_3_dwise_weights, stride=[1,1], padding=[1,1], groups=192, dilation=[1,1]) +68: var_309 = batchnorm(var_299, conv4_3_dwise_bn_scale, conv4_3_dwise_bn_offset, conv4_3_dwise_bn_mean, conv4_3_dwise_bn_variance, epsilon=1e-05) +68: var_311 = relu6(var_309) +68: var_315, var_316, var_317 = conv2d(var_311, conv4_3_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_327 = batchnorm(var_317, conv4_3_linear_bn_scale, conv4_3_linear_bn_offset, conv4_3_linear_bn_mean, conv4_3_linear_bn_variance, epsilon=1e-05) +68: var_329 = elementwise_add(var_276, var_327, axis=-1) +68: var_333, var_334, var_335 = conv2d(var_329, conv5_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_345 = batchnorm(var_335, conv5_1_expand_bn_scale, conv5_1_expand_bn_offset, conv5_1_expand_bn_mean, conv5_1_expand_bn_variance, epsilon=1e-05) +68: var_347 = relu6(var_345) +68: var_351, var_352 = depthwise_conv2d(var_347, conv5_1_dwise_weights, stride=[2,2], padding=[1,1], groups=192, dilation=[1,1]) +68: var_362 = batchnorm(var_352, conv5_1_dwise_bn_scale, conv5_1_dwise_bn_offset, conv5_1_dwise_bn_mean, conv5_1_dwise_bn_variance, epsilon=1e-05) +68: var_364 = relu6(var_362) +68: var_368, var_369, var_370 = conv2d(var_364, conv5_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_380 = batchnorm(var_370, conv5_1_linear_bn_scale, conv5_1_linear_bn_offset, conv5_1_linear_bn_mean, conv5_1_linear_bn_variance, epsilon=1e-05) +68: var_384, var_385, var_386 = conv2d(var_380, conv5_2_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_396 = batchnorm(var_386, conv5_2_expand_bn_scale, conv5_2_expand_bn_offset, conv5_2_expand_bn_mean, conv5_2_expand_bn_variance, epsilon=1e-05) +68: var_398 = relu6(var_396) +68: var_402, var_403 = depthwise_conv2d(var_398, conv5_2_dwise_weights, stride=[1,1], padding=[1,1], groups=384, dilation=[1,1]) +68: var_413 = batchnorm(var_403, conv5_2_dwise_bn_scale, conv5_2_dwise_bn_offset, conv5_2_dwise_bn_mean, conv5_2_dwise_bn_variance, epsilon=1e-05) +68: var_415 = relu6(var_413) +68: var_419, var_420, var_421 = conv2d(var_415, conv5_2_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_431 = batchnorm(var_421, conv5_2_linear_bn_scale, conv5_2_linear_bn_offset, conv5_2_linear_bn_mean, conv5_2_linear_bn_variance, epsilon=1e-05) +68: var_433 = elementwise_add(var_380, var_431, axis=-1) +68: var_437, var_438, var_439 = conv2d(var_433, conv5_3_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_449 = batchnorm(var_439, conv5_3_expand_bn_scale, conv5_3_expand_bn_offset, conv5_3_expand_bn_mean, conv5_3_expand_bn_variance, epsilon=1e-05) +68: var_451 = relu6(var_449) +68: var_455, var_456 = depthwise_conv2d(var_451, conv5_3_dwise_weights, stride=[1,1], padding=[1,1], groups=384, dilation=[1,1]) +68: var_466 = batchnorm(var_456, conv5_3_dwise_bn_scale, conv5_3_dwise_bn_offset, conv5_3_dwise_bn_mean, conv5_3_dwise_bn_variance, epsilon=1e-05) +68: var_468 = relu6(var_466) +68: var_472, var_473, var_474 = conv2d(var_468, conv5_3_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_484 = batchnorm(var_474, conv5_3_linear_bn_scale, conv5_3_linear_bn_offset, conv5_3_linear_bn_mean, conv5_3_linear_bn_variance, epsilon=1e-05) +68: var_486 = elementwise_add(var_433, var_484, axis=-1) +68: var_490, var_491, var_492 = conv2d(var_486, conv5_4_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_502 = batchnorm(var_492, conv5_4_expand_bn_scale, conv5_4_expand_bn_offset, conv5_4_expand_bn_mean, conv5_4_expand_bn_variance, epsilon=1e-05) +68: var_504 = relu6(var_502) +68: var_508, var_509 = depthwise_conv2d(var_504, conv5_4_dwise_weights, stride=[1,1], padding=[1,1], groups=384, dilation=[1,1]) +68: var_519 = batchnorm(var_509, conv5_4_dwise_bn_scale, conv5_4_dwise_bn_offset, conv5_4_dwise_bn_mean, conv5_4_dwise_bn_variance, epsilon=1e-05) +68: var_521 = relu6(var_519) +68: var_525, var_526, var_527 = conv2d(var_521, conv5_4_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_537 = batchnorm(var_527, conv5_4_linear_bn_scale, conv5_4_linear_bn_offset, conv5_4_linear_bn_mean, conv5_4_linear_bn_variance, epsilon=1e-05) +68: var_539 = elementwise_add(var_486, var_537, axis=-1) +68: var_543, var_544, var_545 = conv2d(var_539, conv6_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_555 = batchnorm(var_545, conv6_1_expand_bn_scale, conv6_1_expand_bn_offset, conv6_1_expand_bn_mean, conv6_1_expand_bn_variance, epsilon=1e-05) +68: var_557 = relu6(var_555) +68: var_561, var_562 = depthwise_conv2d(var_557, conv6_1_dwise_weights, stride=[1,1], padding=[1,1], groups=384, dilation=[1,1]) +68: var_572 = batchnorm(var_562, conv6_1_dwise_bn_scale, conv6_1_dwise_bn_offset, conv6_1_dwise_bn_mean, conv6_1_dwise_bn_variance, epsilon=1e-05) +68: var_574 = relu6(var_572) +68: var_578, var_579, var_580 = conv2d(var_574, conv6_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_590 = batchnorm(var_580, conv6_1_linear_bn_scale, conv6_1_linear_bn_offset, conv6_1_linear_bn_mean, conv6_1_linear_bn_variance, epsilon=1e-05) +68: var_594, var_595, var_596 = conv2d(var_590, conv6_2_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_606 = batchnorm(var_596, conv6_2_expand_bn_scale, conv6_2_expand_bn_offset, conv6_2_expand_bn_mean, conv6_2_expand_bn_variance, epsilon=1e-05) +68: var_608 = relu6(var_606) +68: var_612, var_613 = depthwise_conv2d(var_608, conv6_2_dwise_weights, stride=[1,1], padding=[1,1], groups=576, dilation=[1,1]) +68: var_623 = batchnorm(var_613, conv6_2_dwise_bn_scale, conv6_2_dwise_bn_offset, conv6_2_dwise_bn_mean, conv6_2_dwise_bn_variance, epsilon=1e-05) +68: var_625 = relu6(var_623) +68: var_629, var_630, var_631 = conv2d(var_625, conv6_2_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_641 = batchnorm(var_631, conv6_2_linear_bn_scale, conv6_2_linear_bn_offset, conv6_2_linear_bn_mean, conv6_2_linear_bn_variance, epsilon=1e-05) +68: var_643 = elementwise_add(var_590, var_641, axis=-1) +68: var_647, var_648, var_649 = conv2d(var_643, conv6_3_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_659 = batchnorm(var_649, conv6_3_expand_bn_scale, conv6_3_expand_bn_offset, conv6_3_expand_bn_mean, conv6_3_expand_bn_variance, epsilon=1e-05) +68: var_661 = relu6(var_659) +68: var_665, var_666 = depthwise_conv2d(var_661, conv6_3_dwise_weights, stride=[1,1], padding=[1,1], groups=576, dilation=[1,1]) +68: var_676 = batchnorm(var_666, conv6_3_dwise_bn_scale, conv6_3_dwise_bn_offset, conv6_3_dwise_bn_mean, conv6_3_dwise_bn_variance, epsilon=1e-05) +68: var_678 = relu6(var_676) +68: var_682, var_683, var_684 = conv2d(var_678, conv6_3_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_694 = batchnorm(var_684, conv6_3_linear_bn_scale, conv6_3_linear_bn_offset, conv6_3_linear_bn_mean, conv6_3_linear_bn_variance, epsilon=1e-05) +68: var_696 = elementwise_add(var_643, var_694, axis=-1) +68: var_700, var_701, var_702 = conv2d(var_696, conv7_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_712 = batchnorm(var_702, conv7_1_expand_bn_scale, conv7_1_expand_bn_offset, conv7_1_expand_bn_mean, conv7_1_expand_bn_variance, epsilon=1e-05) +68: var_714 = relu6(var_712) +68: var_718, var_719 = depthwise_conv2d(var_714, conv7_1_dwise_weights, stride=[2,2], padding=[1,1], groups=576, dilation=[1,1]) +68: var_729 = batchnorm(var_719, conv7_1_dwise_bn_scale, conv7_1_dwise_bn_offset, conv7_1_dwise_bn_mean, conv7_1_dwise_bn_variance, epsilon=1e-05) +68: var_731 = relu6(var_729) +68: var_735, var_736, var_737 = conv2d(var_731, conv7_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_747 = batchnorm(var_737, conv7_1_linear_bn_scale, conv7_1_linear_bn_offset, conv7_1_linear_bn_mean, conv7_1_linear_bn_variance, epsilon=1e-05) +68: var_751, var_752, var_753 = conv2d(var_747, conv7_2_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_763 = batchnorm(var_753, conv7_2_expand_bn_scale, conv7_2_expand_bn_offset, conv7_2_expand_bn_mean, conv7_2_expand_bn_variance, epsilon=1e-05) +68: var_765 = relu6(var_763) +68: var_769, var_770 = depthwise_conv2d(var_765, conv7_2_dwise_weights, stride=[1,1], padding=[1,1], groups=960, dilation=[1,1]) +68: var_780 = batchnorm(var_770, conv7_2_dwise_bn_scale, conv7_2_dwise_bn_offset, conv7_2_dwise_bn_mean, conv7_2_dwise_bn_variance, epsilon=1e-05) +68: var_782 = relu6(var_780) +68: var_786, var_787, var_788 = conv2d(var_782, conv7_2_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_798 = batchnorm(var_788, conv7_2_linear_bn_scale, conv7_2_linear_bn_offset, conv7_2_linear_bn_mean, conv7_2_linear_bn_variance, epsilon=1e-05) +68: var_800 = elementwise_add(var_747, var_798, axis=-1) +68: var_804, var_805, var_806 = conv2d(var_800, conv7_3_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_816 = batchnorm(var_806, conv7_3_expand_bn_scale, conv7_3_expand_bn_offset, conv7_3_expand_bn_mean, conv7_3_expand_bn_variance, epsilon=1e-05) +68: var_818 = relu6(var_816) +68: var_822, var_823 = depthwise_conv2d(var_818, conv7_3_dwise_weights, stride=[1,1], padding=[1,1], groups=960, dilation=[1,1]) +68: var_833 = batchnorm(var_823, conv7_3_dwise_bn_scale, conv7_3_dwise_bn_offset, conv7_3_dwise_bn_mean, conv7_3_dwise_bn_variance, epsilon=1e-05) +68: var_835 = relu6(var_833) +68: var_839, var_840, var_841 = conv2d(var_835, conv7_3_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_851 = batchnorm(var_841, conv7_3_linear_bn_scale, conv7_3_linear_bn_offset, conv7_3_linear_bn_mean, conv7_3_linear_bn_variance, epsilon=1e-05) +68: var_853 = elementwise_add(var_800, var_851, axis=-1) +68: var_857, var_858, var_859 = conv2d(var_853, conv8_1_expand_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_869 = batchnorm(var_859, conv8_1_expand_bn_scale, conv8_1_expand_bn_offset, conv8_1_expand_bn_mean, conv8_1_expand_bn_variance, epsilon=1e-05) +68: var_871 = relu6(var_869) +68: var_875, var_876 = depthwise_conv2d(var_871, conv8_1_dwise_weights, stride=[1,1], padding=[1,1], groups=960, dilation=[1,1]) +68: var_886 = batchnorm(var_876, conv8_1_dwise_bn_scale, conv8_1_dwise_bn_offset, conv8_1_dwise_bn_mean, conv8_1_dwise_bn_variance, epsilon=1e-05) +68: var_888 = relu6(var_886) +68: var_892, var_893, var_894 = conv2d(var_888, conv8_1_linear_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_904 = batchnorm(var_894, conv8_1_linear_bn_scale, conv8_1_linear_bn_offset, conv8_1_linear_bn_mean, conv8_1_linear_bn_variance, epsilon=1e-05) +68: var_908, var_909, var_910 = conv2d(var_904, conv9_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +68: var_920 = batchnorm(var_910, conv9_bn_scale, conv9_bn_offset, conv9_bn_mean, conv9_bn_variance, epsilon=1e-05) +68: var_922 = relu6(var_920) +68: var_924, var_925 = pool2d(var_922, pool_type=avg, stride_size=[1,1], exclusive=true, kernel_size=[7,7], data_format=AnyLayout, padding_size=[0,0,0,0], ceil_mode=false) +68: var_929 = mul(var_925, fc10_weights, y_num_col_dims=1, x_num_col_dims=1) +68: var_933 = elementwise_add(var_929, fc10_offset, axis=1) +68: var_935, var_936 = softmax(var_933, axis=-1) +68: var_938 = scale(var_936, scale=1) +68: } +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register relu +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register relu6 +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register conv2d +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register depthwise_conv2d +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register batchnorm +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register pool1d +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register pool2d +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register pool3d +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register sigmoid +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register softmax +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register slice +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register depthwise_conv2d +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register elementwise_add +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register elementwise_mul +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register scale +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register matmul +67: I0100 00:00:00.000000 28813 registry.h:89] RAW: Register mul +67: WARNING: Logging before InitGoogleLogging() is written to STDERR +67: I0924 13:32:22.166492 28813 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/ResNet18 +67: I0924 13:32:22.166563 28813 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/ResNet18 +67: I0924 13:32:22.166568 28813 model_parser.cc:189] model_file is: __model__ +67: I0924 13:32:22.166569 28813 model_parser.cc:190] param_file is: +67: I0924 13:32:22.216248 28813 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0] +67: I0924 13:32:22.217094 28813 executor.cc:49] Program: +67: Program { +67: var_3, var_4, var_5 = conv2d(image, conv1_weights, stride=[2,2], padding=[3,3], groups=1, dilation=[1,1]) +67: var_15 = batchnorm(var_5, bn_conv1_scale, bn_conv1_offset, bn_conv1_mean, bn_conv1_variance, epsilon=1e-05) +67: var_17 = relu(var_15) +67: var_19, var_20 = pool2d(var_17, pool_type=max, stride_size=[2,2], exclusive=true, kernel_size=[3,3], data_format=AnyLayout, padding_size=[1,1,1,1], ceil_mode=false) +67: var_24, var_25, var_26 = conv2d(var_20, res2a_branch2a_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_36 = batchnorm(var_26, bn2a_branch2a_scale, bn2a_branch2a_offset, bn2a_branch2a_mean, bn2a_branch2a_variance, epsilon=1e-05) +67: var_38 = relu(var_36) +67: var_42, var_43, var_44 = conv2d(var_38, res2a_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_54 = batchnorm(var_44, bn2a_branch2b_scale, bn2a_branch2b_offset, bn2a_branch2b_mean, bn2a_branch2b_variance, epsilon=1e-05) +67: var_58, var_59, var_60 = conv2d(var_20, res2a_branch1_weights, stride=[1,1], padding=[0,0], groups=1, dilation=[1,1]) +67: var_70 = batchnorm(var_60, bn2a_branch1_scale, bn2a_branch1_offset, bn2a_branch1_mean, bn2a_branch1_variance, epsilon=1e-05) +67: var_72 = elementwise_add(var_70, var_54, axis=-1) +67: var_74 = relu(var_72) +67: var_78, var_79, var_80 = conv2d(var_74, res2b_branch2a_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_90 = batchnorm(var_80, bn2b_branch2a_scale, bn2b_branch2a_offset, bn2b_branch2a_mean, bn2b_branch2a_variance, epsilon=1e-05) +67: var_92 = relu(var_90) +67: var_96, var_97, var_98 = conv2d(var_92, res2b_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_108 = batchnorm(var_98, bn2b_branch2b_scale, bn2b_branch2b_offset, bn2b_branch2b_mean, bn2b_branch2b_variance, epsilon=1e-05) +67: var_110 = elementwise_add(var_74, var_108, axis=-1) +67: var_112 = relu(var_110) +67: var_116, var_117, var_118 = conv2d(var_112, res3a_branch2a_weights, stride=[2,2], padding=[1,1], groups=1, dilation=[1,1]) +67: var_128 = batchnorm(var_118, bn3a_branch2a_scale, bn3a_branch2a_offset, bn3a_branch2a_mean, bn3a_branch2a_variance, epsilon=1e-05) +67: var_130 = relu(var_128) +67: var_134, var_135, var_136 = conv2d(var_130, res3a_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_146 = batchnorm(var_136, bn3a_branch2b_scale, bn3a_branch2b_offset, bn3a_branch2b_mean, bn3a_branch2b_variance, epsilon=1e-05) +67: var_150, var_151, var_152 = conv2d(var_112, res3a_branch1_weights, stride=[2,2], padding=[0,0], groups=1, dilation=[1,1]) +67: var_162 = batchnorm(var_152, bn3a_branch1_scale, bn3a_branch1_offset, bn3a_branch1_mean, bn3a_branch1_variance, epsilon=1e-05) +67: var_164 = elementwise_add(var_162, var_146, axis=-1) +67: var_166 = relu(var_164) +67: var_170, var_171, var_172 = conv2d(var_166, res3b_branch2a_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_182 = batchnorm(var_172, bn3b_branch2a_scale, bn3b_branch2a_offset, bn3b_branch2a_mean, bn3b_branch2a_variance, epsilon=1e-05) +67: var_184 = relu(var_182) +67: var_188, var_189, var_190 = conv2d(var_184, res3b_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_200 = batchnorm(var_190, bn3b_branch2b_scale, bn3b_branch2b_offset, bn3b_branch2b_mean, bn3b_branch2b_variance, epsilon=1e-05) +67: var_202 = elementwise_add(var_166, var_200, axis=-1) +67: var_204 = relu(var_202) +67: var_208, var_209, var_210 = conv2d(var_204, res4a_branch2a_weights, stride=[2,2], padding=[1,1], groups=1, dilation=[1,1]) +67: var_220 = batchnorm(var_210, bn4a_branch2a_scale, bn4a_branch2a_offset, bn4a_branch2a_mean, bn4a_branch2a_variance, epsilon=1e-05) +67: var_222 = relu(var_220) +67: var_226, var_227, var_228 = conv2d(var_222, res4a_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_238 = batchnorm(var_228, bn4a_branch2b_scale, bn4a_branch2b_offset, bn4a_branch2b_mean, bn4a_branch2b_variance, epsilon=1e-05) +67: var_242, var_243, var_244 = conv2d(var_204, res4a_branch1_weights, stride=[2,2], padding=[0,0], groups=1, dilation=[1,1]) +67: var_254 = batchnorm(var_244, bn4a_branch1_scale, bn4a_branch1_offset, bn4a_branch1_mean, bn4a_branch1_variance, epsilon=1e-05) +67: var_256 = elementwise_add(var_254, var_238, axis=-1) +67: var_258 = relu(var_256) +67: var_262, var_263, var_264 = conv2d(var_258, res4b_branch2a_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_274 = batchnorm(var_264, bn4b_branch2a_scale, bn4b_branch2a_offset, bn4b_branch2a_mean, bn4b_branch2a_variance, epsilon=1e-05) +67: var_276 = relu(var_274) +67: var_280, var_281, var_282 = conv2d(var_276, res4b_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_292 = batchnorm(var_282, bn4b_branch2b_scale, bn4b_branch2b_offset, bn4b_branch2b_mean, bn4b_branch2b_variance, epsilon=1e-05) +67: var_294 = elementwise_add(var_258, var_292, axis=-1) +67: var_296 = relu(var_294) +67: var_300, var_301, var_302 = conv2d(var_296, res5a_branch2a_weights, stride=[2,2], padding=[1,1], groups=1, dilation=[1,1]) +67: var_312 = batchnorm(var_302, bn5a_branch2a_scale, bn5a_branch2a_offset, bn5a_branch2a_mean, bn5a_branch2a_variance, epsilon=1e-05) +67: var_314 = relu(var_312) +67: var_318, var_319, var_320 = conv2d(var_314, res5a_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_330 = batchnorm(var_320, bn5a_branch2b_scale, bn5a_branch2b_offset, bn5a_branch2b_mean, bn5a_branch2b_variance, epsilon=1e-05) +67: var_334, var_335, var_336 = conv2d(var_296, res5a_branch1_weights, stride=[2,2], padding=[0,0], groups=1, dilation=[1,1]) +67: var_346 = batchnorm(var_336, bn5a_branch1_scale, bn5a_branch1_offset, bn5a_branch1_mean, bn5a_branch1_variance, epsilon=1e-05) +67: var_348 = elementwise_add(var_346, var_330, axis=-1) +67: var_350 = relu(var_348) +67: var_354, var_355, var_356 = conv2d(var_350, res5b_branch2a_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_366 = batchnorm(var_356, bn5b_branch2a_scale, bn5b_branch2a_offset, bn5b_branch2a_mean, bn5b_branch2a_variance, epsilon=1e-05) +67: var_368 = relu(var_366) +67: var_372, var_373, var_374 = conv2d(var_368, res5b_branch2b_weights, stride=[1,1], padding=[1,1], groups=1, dilation=[1,1]) +67: var_384 = batchnorm(var_374, bn5b_branch2b_scale, bn5b_branch2b_offset, bn5b_branch2b_mean, bn5b_branch2b_variance, epsilon=1e-05) +67: var_386 = elementwise_add(var_350, var_384, axis=-1) +67: var_388 = relu(var_386) +67: var_390, var_391 = pool2d(var_388, pool_type=avg, stride_size=[1,1], exclusive=true, kernel_size=[7,7], data_format=AnyLayout, padding_size=[0,0,0,0], ceil_mode=false) +67: var_395 = mul(var_391, fc_0__w_0, y_num_col_dims=1, x_num_col_dims=1) +67: var_399 = elementwise_add(var_395, fc_0__b_0, axis=1) +67: var_401, var_402 = softmax(var_399, axis=-1) +67: var_404 = scale(var_402, scale=1) +67: } +67: I0924 13:32:22.343083 28813 nn.cc:477] kernel_size length is: 2 +67: I0924 13:32:22.343093 28813 nn.cc:478] kernel_size is: 3 +67: I0924 13:32:22.343096 28813 nn.cc:479] padding_size length is: 4 +67: I0924 13:32:22.343097 28813 nn.cc:480] padding_size is: 1 +54: WARNING: Logging before InitGoogleLogging() is written to STDERR +54: I0924 13:32:23.535948 27840 test02_matmul_case.cc:132] Testing matmul +65: .I0924 13:32:24.203958 28049 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model +65: I0924 13:32:24.204044 28049 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/naive_mul_model +65: I0924 13:32:24.204051 28049 model_parser.cc:189] model_file is: __model__ +65: I0924 13:32:24.204054 28049 model_parser.cc:190] param_file is: +65: I0924 13:32:24.205775 28049 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0.tmp_0] +65: I0924 13:32:24.205875 28049 executor.cc:49] Program: +65: Program { +65: var_11 = mul(A, fc_0__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_15 = elementwise_add(var_11, fc_0__b_0, axis=1) +65: var_17 = relu(var_15) +65: var_19 = scale(var_17, scale=1) +65: } +65: I0924 13:32:24.252491 28049 graph_compiler.cc:39] [Debug] C Code is: +65: #include +65: #include +65: +65: void fn_mul_0(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_0__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out); +65: const float* A_reshape = ((const float*)(_A->memory)); +65: float* Mul_out = ((float*)(_Mul_out->memory)); +65: float* Mul_out_init = ((float*)(_Mul_out->memory)); +65: const float* fc_0__w_0_reshape = ((const float*)(_fc_0__w_0->memory)); +65: for (int32_t i = 0; i < 4; i += 1) { +65: for (int32_t j = 0; j < 30; j += 1) { +65: Mul_out_init[((30 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 4; i += 1) { +65: for (int32_t j = 0; j < 30; j += 1) { +65: for (int32_t axis_k = 0; axis_k < 30; axis_k += 1) { +65: Mul_out[((30 * i) + j)] = (Mul_out[((30 * i) + j)] + (A_reshape[((30 * i) + axis_k)] * fc_0__w_0_reshape[((30 * axis_k) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out); +65: } +65: +65: void fn_elementwise_add_1(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_11 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_0__b_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_0); +65: float* C_0 = ((float*)(_C_0->memory)); +65: const float* fc_0__b_0 = ((const float*)(_fc_0__b_0->memory)); +65: const float* var_11 = ((const float*)(_var_11->memory)); +65: for (int32_t i = 0; i < 4; i += 1) { +65: for (int32_t j = 0; j < 30; j += 1) { +65: C_0[((30 * i) + j)] = (var_11[((30 * i) + j)] + fc_0__b_0[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_0); +65: } +65: +65: void fn_relu_2(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_15 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_0); +65: float* Relu_output_0 = ((float*)(_Relu_output_0->memory)); +65: const float* var_15 = ((const float*)(_var_15->memory)); +65: for (int32_t i = 0; i < 4; i += 1) { +65: for (int32_t j = 0; j < 30; j += 1) { +65: Relu_output_0[((30 * i) + j)] = cinn_max(var_15[((30 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_0); +65: } +65: +65: void fn_scale_3(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_17 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Scale_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Scale_out_0); +65: float* Scale_out_0 = ((float*)(_Scale_out_0->memory)); +65: const float* var_17 = ((const float*)(_var_17->memory)); +65: for (int32_t i = 0; i < 4; i += 1) { +65: for (int32_t j = 0; j < 30; j += 1) { +65: Scale_out_0[((30 * i) + j)] = var_17[((30 * i) + j)]; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Scale_out_0); +65: } +65: +65: I0924 13:32:24.283535 28049 analysis_predictor.cc:138] Profiler is deactivated, and no profiling report will be generated. +65: I0924 13:32:24.283852 28049 analysis_predictor.cc:875] MODEL VERSION: 1.8.4 +65: I0924 13:32:24.283857 28049 analysis_predictor.cc:877] PREDICTOR VERSION: 1.8.4 +65: I0924 13:32:24.283890 28049 analysis_predictor.cc:474] ir_optim is turned off, no IR pass will be executed +65: --- Running analysis [ir_graph_build_pass] +65: --- Running analysis [ir_graph_clean_pass] +65: --- Running analysis [ir_analysis_pass] +65: --- Running analysis [ir_params_sync_among_devices_pass] +65: --- Running analysis [adjust_cudnn_workspace_size_pass] +65: --- Running analysis [inference_op_replace_pass] +65: --- Running analysis [ir_graph_to_program_pass] +65: I0924 13:32:24.284680 28049 analysis_predictor.cc:496] ======= optimize end ======= +65: .I0924 13:32:24.286247 28049 syntax.cc:130] Loading Paddle model from /home/wangyue50/CINN-my/CINN/build/thirds/multi_fc_model +65: I0924 13:32:24.286298 28049 model_parser.cc:188] model_dir is: /home/wangyue50/CINN-my/CINN/build/thirds/multi_fc_model +65: I0924 13:32:24.286303 28049 model_parser.cc:189] model_file is: __model__ +65: I0924 13:32:24.286304 28049 model_parser.cc:190] param_file is: +65: I0924 13:32:24.290424 28049 paddle_model_to_program.cc:26] detect model output: [save_infer_model/scale_0.tmp_0] +65: I0924 13:32:24.290608 28049 executor.cc:49] Program: +65: Program { +65: var_25 = mul(A, fc_0__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_29 = elementwise_add(var_25, fc_bias, axis=1) +65: var_31 = relu(var_29) +65: var_35 = mul(var_31, fc_1__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_37 = elementwise_add(var_35, fc_bias, axis=1) +65: var_39 = relu(var_37) +65: var_43 = mul(var_39, fc_2__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_45 = elementwise_add(var_43, fc_bias, axis=1) +65: var_47 = relu(var_45) +65: var_51 = mul(var_47, fc_3__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_53 = elementwise_add(var_51, fc_bias, axis=1) +65: var_55 = relu(var_53) +65: var_59 = mul(var_55, fc_4__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_61 = elementwise_add(var_59, fc_bias, axis=1) +65: var_63 = relu(var_61) +65: var_67 = mul(var_63, fc_5__w_0, y_num_col_dims=1, x_num_col_dims=1) +65: var_69 = elementwise_add(var_67, fc_bias, axis=1) +65: var_71 = relu(var_69) +65: var_73 = scale(var_71, scale=1) +65: } +65: I0924 13:32:24.521476 28049 graph_compiler.cc:39] [Debug] C Code is: +65: #include +65: #include +65: +65: void fn_mul_0(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_0__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out_0); +65: const float* A_reshape_0 = ((const float*)(_A->memory)); +65: float* Mul_out_0 = ((float*)(_Mul_out_0->memory)); +65: float* Mul_out_0_init = ((float*)(_Mul_out_0->memory)); +65: const float* fc_0__w_0_reshape_0 = ((const float*)(_fc_0__w_0->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Mul_out_0_init[((64 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: for (int32_t axis_k_0 = 0; axis_k_0 < 64; axis_k_0 += 1) { +65: Mul_out_0[((64 * i) + j)] = (Mul_out_0[((64 * i) + j)] + (A_reshape_0[((64 * i) + axis_k_0)] * fc_0__w_0_reshape_0[((64 * axis_k_0) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out_0); +65: } +65: +65: void fn_elementwise_add_1(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_25 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_bias = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_1); +65: float* C_1 = ((float*)(_C_1->memory)); +65: const float* fc_bias = ((const float*)(_fc_bias->memory)); +65: const float* var_25 = ((const float*)(_var_25->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: C_1[((64 * i) + j)] = (var_25[((64 * i) + j)] + fc_bias[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_1); +65: } +65: +65: void fn_relu_2(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_29 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_1); +65: float* Relu_output_1 = ((float*)(_Relu_output_1->memory)); +65: const float* var_29 = ((const float*)(_var_29->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Relu_output_1[((64 * i) + j)] = cinn_max(var_29[((64 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_1); +65: } +65: +65: void fn_mul_3(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_31 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_1__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out_1); +65: float* Mul_out_1 = ((float*)(_Mul_out_1->memory)); +65: float* Mul_out_1_init = ((float*)(_Mul_out_1->memory)); +65: const float* fc_1__w_0_reshape = ((const float*)(_fc_1__w_0->memory)); +65: const float* var_31_reshape = ((const float*)(_var_31->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Mul_out_1_init[((64 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: for (int32_t axis_k_1 = 0; axis_k_1 < 64; axis_k_1 += 1) { +65: Mul_out_1[((64 * i) + j)] = (Mul_out_1[((64 * i) + j)] + (var_31_reshape[((64 * i) + axis_k_1)] * fc_1__w_0_reshape[((64 * axis_k_1) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out_1); +65: } +65: +65: void fn_elementwise_add_4(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_35 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_bias = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_2); +65: float* C_2 = ((float*)(_C_2->memory)); +65: const float* fc_bias = ((const float*)(_fc_bias->memory)); +65: const float* var_35 = ((const float*)(_var_35->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: C_2[((64 * i) + j)] = (var_35[((64 * i) + j)] + fc_bias[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_2); +65: } +65: +65: void fn_relu_5(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_37 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_2); +65: float* Relu_output_2 = ((float*)(_Relu_output_2->memory)); +65: const float* var_37 = ((const float*)(_var_37->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Relu_output_2[((64 * i) + j)] = cinn_max(var_37[((64 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_2); +65: } +65: +65: void fn_mul_6(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_39 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_2__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out_2); +65: float* Mul_out_2 = ((float*)(_Mul_out_2->memory)); +65: float* Mul_out_2_init = ((float*)(_Mul_out_2->memory)); +65: const float* fc_2__w_0_reshape = ((const float*)(_fc_2__w_0->memory)); +65: const float* var_39_reshape = ((const float*)(_var_39->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Mul_out_2_init[((64 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: for (int32_t axis_k_2 = 0; axis_k_2 < 64; axis_k_2 += 1) { +65: Mul_out_2[((64 * i) + j)] = (Mul_out_2[((64 * i) + j)] + (var_39_reshape[((64 * i) + axis_k_2)] * fc_2__w_0_reshape[((64 * axis_k_2) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out_2); +65: } +65: +65: void fn_elementwise_add_7(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_43 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_bias = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_3); +65: float* C_3 = ((float*)(_C_3->memory)); +65: const float* fc_bias = ((const float*)(_fc_bias->memory)); +65: const float* var_43 = ((const float*)(_var_43->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: C_3[((64 * i) + j)] = (var_43[((64 * i) + j)] + fc_bias[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_3); +65: } +65: +65: void fn_relu_8(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_45 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_3); +65: float* Relu_output_3 = ((float*)(_Relu_output_3->memory)); +65: const float* var_45 = ((const float*)(_var_45->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Relu_output_3[((64 * i) + j)] = cinn_max(var_45[((64 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_3); +65: } +65: +65: void fn_mul_9(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_47 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_3__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out_3); +65: float* Mul_out_3 = ((float*)(_Mul_out_3->memory)); +65: float* Mul_out_3_init = ((float*)(_Mul_out_3->memory)); +65: const float* fc_3__w_0_reshape = ((const float*)(_fc_3__w_0->memory)); +65: const float* var_47_reshape = ((const float*)(_var_47->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Mul_out_3_init[((64 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: for (int32_t axis_k_3 = 0; axis_k_3 < 64; axis_k_3 += 1) { +65: Mul_out_3[((64 * i) + j)] = (Mul_out_3[((64 * i) + j)] + (var_47_reshape[((64 * i) + axis_k_3)] * fc_3__w_0_reshape[((64 * axis_k_3) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out_3); +65: } +65: +65: void fn_elementwise_add_10(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_51 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_bias = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_4); +65: float* C_4 = ((float*)(_C_4->memory)); +65: const float* fc_bias = ((const float*)(_fc_bias->memory)); +65: const float* var_51 = ((const float*)(_var_51->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: C_4[((64 * i) + j)] = (var_51[((64 * i) + j)] + fc_bias[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_4); +65: } +65: +65: void fn_relu_11(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_53 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_4); +65: float* Relu_output_4 = ((float*)(_Relu_output_4->memory)); +65: const float* var_53 = ((const float*)(_var_53->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Relu_output_4[((64 * i) + j)] = cinn_max(var_53[((64 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_4); +65: } +65: +65: void fn_mul_12(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_55 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_4__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out_4); +65: float* Mul_out_4 = ((float*)(_Mul_out_4->memory)); +65: float* Mul_out_4_init = ((float*)(_Mul_out_4->memory)); +65: const float* fc_4__w_0_reshape = ((const float*)(_fc_4__w_0->memory)); +65: const float* var_55_reshape = ((const float*)(_var_55->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Mul_out_4_init[((64 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: for (int32_t axis_k_4 = 0; axis_k_4 < 64; axis_k_4 += 1) { +65: Mul_out_4[((64 * i) + j)] = (Mul_out_4[((64 * i) + j)] + (var_55_reshape[((64 * i) + axis_k_4)] * fc_4__w_0_reshape[((64 * axis_k_4) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out_4); +65: } +65: +65: void fn_elementwise_add_13(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_59 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_bias = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_5); +65: float* C_5 = ((float*)(_C_5->memory)); +65: const float* fc_bias = ((const float*)(_fc_bias->memory)); +65: const float* var_59 = ((const float*)(_var_59->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: C_5[((64 * i) + j)] = (var_59[((64 * i) + j)] + fc_bias[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_5); +65: } +65: +65: void fn_relu_14(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_61 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_5); +65: float* Relu_output_5 = ((float*)(_Relu_output_5->memory)); +65: const float* var_61 = ((const float*)(_var_61->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Relu_output_5[((64 * i) + j)] = cinn_max(var_61[((64 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_5); +65: } +65: +65: void fn_mul_15(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_63 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_5__w_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _Mul_out_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _Mul_out_5); +65: float* Mul_out_5 = ((float*)(_Mul_out_5->memory)); +65: float* Mul_out_5_init = ((float*)(_Mul_out_5->memory)); +65: const float* fc_5__w_0_reshape = ((const float*)(_fc_5__w_0->memory)); +65: const float* var_63_reshape = ((const float*)(_var_63->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Mul_out_5_init[((64 * i) + j)] = 0; +65: }; +65: }; +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: for (int32_t axis_k_5 = 0; axis_k_5 < 64; axis_k_5 += 1) { +65: Mul_out_5[((64 * i) + j)] = (Mul_out_5[((64 * i) + j)] + (var_63_reshape[((64 * i) + axis_k_5)] * fc_5__w_0_reshape[((64 * axis_k_5) + j)])); +65: }; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Mul_out_5); +65: } +65: +65: void fn_elementwise_add_16(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_67 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: const cinn_buffer_t* _fc_bias = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_t* _C_6 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +65: cinn_buffer_malloc((void*)(0), _C_6); +65: float* C_6 = ((float*)(_C_6->memory)); +65: const float* fc_bias = ((const float*)(_fc_bias->memory)); +65: const float* var_67 = ((const float*)(_var_67->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: C_6[((64 * i) + j)] = (var_67[((64 * i) + j)] + fc_bias[j]); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _C_6); +65: } +65: +65: void fn_relu_17(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_69 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Relu_output_6 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Relu_output_6); +65: float* Relu_output_6 = ((float*)(_Relu_output_6->memory)); +65: const float* var_69 = ((const float*)(_var_69->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Relu_output_6[((64 * i) + j)] = cinn_max(var_69[((64 * i) + j)], 0); +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Relu_output_6); +65: } +65: +65: void fn_scale_18(void* _args, int32_t num_args) +65: { +65: const cinn_buffer_t* _var_71 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +65: cinn_buffer_t* _Scale_out_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +65: cinn_buffer_malloc((void*)(0), _Scale_out_1); +65: float* Scale_out_1 = ((float*)(_Scale_out_1->memory)); +65: const float* var_71 = ((const float*)(_var_71->memory)); +65: for (int32_t i = 0; i < 8; i += 1) { +65: for (int32_t j = 0; j < 64; j += 1) { +65: Scale_out_1[((64 * i) + j)] = var_71[((64 * i) + j)]; +65: }; +65: }; +65: cinn_buffer_free((void*)(0), _Scale_out_1); +65: } +65: +65: I0924 13:32:24.582810 28049 analysis_predictor.cc:138] Profiler is deactivated, and no profiling report will be generated. +65: I0924 13:32:24.583323 28049 analysis_predictor.cc:875] MODEL VERSION: 1.8.4 +65: I0924 13:32:24.583328 28049 analysis_predictor.cc:877] PREDICTOR VERSION: 1.8.4 +65: I0924 13:32:24.583356 28049 analysis_predictor.cc:474] ir_optim is turned off, no IR pass will be executed +65: --- Running analysis [ir_graph_build_pass] +65: --- Running analysis [ir_graph_clean_pass] +65: --- Running analysis [ir_analysis_pass] +65: --- Running analysis [ir_params_sync_among_devices_pass] +65: --- Running analysis [adjust_cudnn_workspace_size_pass] +65: --- Running analysis [inference_op_replace_pass] +65: --- Running analysis [ir_graph_to_program_pass] +65: I0924 13:32:24.585050 28049 analysis_predictor.cc:496] ======= optimize end ======= +65: . +65: ---------------------------------------------------------------------- +65: Ran 3 tests in 3.034s +65: +65: OK +65: f var_3 +65: var = elementwise_add(A, B) +65: var_0 = relu(var) +65: var_1, var_2, var_3 = conv2d(var_0, E, dilation=[1,1], padding=[0,0], stride=[1,1]) +65: var_4 = scale(var_3, scale=2, bias=0.5) +65: var_5, var_6 = softmax(var_4, axis=1) +65: result in paddle_verify: +65: +65: x_data [[0.5488281 0.71533203 0.60253906 0.5449219 0.42358398 0.6459961 +65: 0.4375 0.89160156 0.9638672 0.38354492 0.7915039 0.5288086 +65: 0.5678711 0.92578125 0.07104492 0.0871582 0.0202179 0.83251953 +65: 0.7783203 0.8701172 0.9785156 0.7993164 0.46142578 0.7807617 +65: 0.11828613 0.6401367 0.14331055 0.9448242 0.52197266 0.41455078] +65: [0.26464844 0.77441406 0.4560547 0.5683594 0.01878357 0.6176758 +65: 0.6123047 0.6166992 0.94384766 0.6816406 0.35961914 0.43701172 +65: 0.6977539 0.06021118 0.6669922 0.67041016 0.21032715 0.12890625 +65: 0.3154297 0.36376953 0.5703125 0.4387207 0.98828125 0.10205078 +65: 0.2088623 0.16125488 0.6533203 0.25317383 0.4663086 0.24438477] +65: [0.15893555 0.11035156 0.65625 0.1381836 0.1965332 0.36865234 +65: 0.8208008 0.09710693 0.8378906 0.09606934 0.9765625 0.46875 +65: 0.9765625 0.60498047 0.7392578 0.03918457 0.28271484 0.12017822 +65: 0.29614258 0.11871338 0.3178711 0.41430664 0.06414795 0.6923828 +65: 0.56640625 0.26538086 0.5234375 0.09393311 0.5761719 0.9291992 ] +65: [0.31860352 0.66748047 0.13183594 0.7163086 0.28930664 0.18322754 +65: 0.5864258 0.02011108 0.82910156 0.00469589 0.6777344 0.27001953 +65: 0.73535156 0.96240234 0.2487793 0.5761719 0.59228516 0.5722656 +65: 0.22302246 0.9526367 0.44702148 0.8461914 0.69970703 0.29736328 +65: 0.81396484 0.39648438 0.8808594 0.5810547 0.88183594 0.6923828 ]] +67: I0924 13:32:24.899466 28813 nn.cc:477] kernel_size length is: 2 +67: I0924 13:32:24.899487 28813 nn.cc:478] kernel_size is: 7 +67: I0924 13:32:24.899490 28813 nn.cc:479] padding_size length is: 4 +67: I0924 13:32:24.899492 28813 nn.cc:480] padding_size is: 0 +64/68 Test #65: test_cinn_frontend ..................... Passed 4.61 sec +67: I0924 13:32:25.657984 28813 graph_compiler.cc:39] [Debug] C Code is: +67: #include +67: #include +67: +67: void fn_conv2d_0(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _image = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _conv1_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_pad_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_t* _weights_dilation = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: cinn_buffer_t* _Conv2d_nchw_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_malloc((void*)(0), _T_pad_out_0); +67: cinn_buffer_malloc((void*)(0), _weights_dilation); +67: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out); +67: float* Conv2d_nchw_out = ((float*)(_Conv2d_nchw_out->memory)); +67: float* Conv2d_nchw_out_init = ((float*)(_Conv2d_nchw_out->memory)); +67: float* T_pad_out_0 = ((float*)(_T_pad_out_0->memory)); +67: const float* conv1_weights = ((const float*)(_conv1_weights->memory)); +67: const float* image = ((const float*)(_image->memory)); +67: float* weights_dilation = ((float*)(_weights_dilation->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 112; k += 1) { +67: for (int32_t a = 0; a < 112; a += 1) { +67: Conv2d_nchw_out_init[((12544 * j) + ((112 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t i = 0; i < 64; i += 1) { +67: for (int32_t j = 0; j < 3; j += 1) { +67: for (int32_t k = 0; k < 7; k += 1) { +67: for (int32_t a = 0; a < 7; a += 1) { +67: weights_dilation[((147 * i) + ((49 * j) + ((7 * k) + a)))] = (((((a % 1) == 0) && ((k % 1) == 0))) ? conv1_weights[((a / 1) + (((k / 1) * 7) + ((147 * i) + (49 * j))))] : 0); +67: }; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 3; j += 1) { +67: for (int32_t k = 0; k < 230; k += 1) { +67: for (int32_t a = 0; a < 230; a += 1) { +67: T_pad_out_0[((52900 * j) + ((230 * k) + a))] = ((((a < 227) && ((a >= 3) && ((k < 227) && (k >= 3))))) ? image[(-675 + ((50176 * j) + ((224 * k) + a)))] : 0); +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 112; k += 1) { +67: for (int32_t a = 0; a < 112; a += 1) { +67: for (int32_t fc = 0; fc < 3; fc += 1) { +67: for (int32_t fy = 0; fy < 7; fy += 1) { +67: for (int32_t fx = 0; fx < 7; fx += 1) { +67: Conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] = (Conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] + (T_pad_out_0[((2 * a) + ((52900 * fc) + ((230 * fy) + ((460 * k) + fx))))] * weights_dilation[((49 * fc) + ((7 * fy) + ((147 * j) + fx)))])); +67: }; +67: }; +67: }; +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _T_pad_out_0); +67: cinn_buffer_free((void*)(0), _weights_dilation); +67: cinn_buffer_free((void*)(0), _Conv2d_nchw_out); +67: } +67: +67: void fn_batchnorm_1(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _bn_conv1_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: const cinn_buffer_t* _bn_conv1_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: const cinn_buffer_t* _bn_conv1_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: const cinn_buffer_t* _bn_conv1_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_t* _BatchNorm_output_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +67: cinn_buffer_malloc((void*)(0), _BatchNorm_output_0); +67: float* BatchNorm_output_0 = ((float*)(_BatchNorm_output_0->memory)); +67: const float* bn_conv1_mean = ((const float*)(_bn_conv1_mean->memory)); +67: const float* bn_conv1_offset = ((const float*)(_bn_conv1_offset->memory)); +67: const float* bn_conv1_scale = ((const float*)(_bn_conv1_scale->memory)); +67: const float* bn_conv1_variance = ((const float*)(_bn_conv1_variance->memory)); +67: const float* var_5 = ((const float*)(_var_5->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 112; k += 1) { +67: for (int32_t a = 0; a < 112; a += 1) { +67: BatchNorm_output_0[((12544 * j) + ((112 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((bn_conv1_variance[j] + 1e-05))) * (bn_conv1_scale[j] * var_5[((12544 * j) + ((112 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((bn_conv1_variance[j] + 1e-05))) * (bn_conv1_scale[j] * bn_conv1_mean[j]))) + bn_conv1_offset[j])); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _BatchNorm_output_0); +67: } +67: +67: void fn_relu_2(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_15 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: cinn_buffer_t* _Relu_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_malloc((void*)(0), _Relu_output); +67: float* Relu_output = ((float*)(_Relu_output->memory)); +67: const float* var_15 = ((const float*)(_var_15->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 112; k += 1) { +67: for (int32_t a = 0; a < 112; a += 1) { +67: Relu_output[((12544 * j) + ((112 * k) + a))] = cinn_max(var_15[((12544 * j) + ((112 * k) + a))], 0); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _Relu_output); +67: } +67: +67: void fn_pool2d_3(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_17 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: cinn_buffer_t* _pad_temp_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_Pool2d_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_malloc((void*)(0), _pad_temp_0); +67: cinn_buffer_malloc((void*)(0), _T_Pool2d_out_0); +67: float* T_Pool2d_out_0 = ((float*)(_T_Pool2d_out_0->memory)); +67: float* T_Pool2d_out_0_init = ((float*)(_T_Pool2d_out_0->memory)); +67: float* pad_temp_0 = ((float*)(_pad_temp_0->memory)); +67: const float* var_17 = ((const float*)(_var_17->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: T_Pool2d_out_0_init[((3136 * j) + ((56 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 114; k += 1) { +67: for (int32_t a = 0; a < 114; a += 1) { +67: pad_temp_0[((12996 * j) + ((114 * k) + a))] = ((((a < 113) && ((a >= 1) && ((k < 113) && (k >= 1))))) ? var_17[(-113 + ((12544 * j) + ((112 * k) + a)))] : -3.40282e+38); +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: for (int32_t kernel_idx = 0; kernel_idx < 3; kernel_idx += 1) { +67: for (int32_t kernel_idx_0 = 0; kernel_idx_0 < 3; kernel_idx_0 += 1) { +67: T_Pool2d_out_0[((3136 * j) + ((56 * k) + a))] = cinn_max(T_Pool2d_out_0[((3136 * j) + ((56 * k) + a))], pad_temp_0[((2 * a) + ((12996 * j) + ((228 * k) + ((114 * kernel_idx) + kernel_idx_0))))]); +67: }; +67: }; +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _pad_temp_0); +67: cinn_buffer_free((void*)(0), _T_Pool2d_out_0); +67: } +67: +67: void fn_conv2d_9(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_20 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _res2a_branch1_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_Identity_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_t* _weights_dilation_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: cinn_buffer_t* _Conv2d_nchw_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_malloc((void*)(0), _T_Identity_out); +67: cinn_buffer_malloc((void*)(0), _weights_dilation_0); +67: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_0); +67: float* Conv2d_nchw_out_0 = ((float*)(_Conv2d_nchw_out_0->memory)); +67: float* Conv2d_nchw_out_0_init = ((float*)(_Conv2d_nchw_out_0->memory)); +67: float* T_Identity_out = ((float*)(_T_Identity_out->memory)); +67: const float* res2a_branch1_weights = ((const float*)(_res2a_branch1_weights->memory)); +67: const float* var_20 = ((const float*)(_var_20->memory)); +67: float* weights_dilation_0 = ((float*)(_weights_dilation_0->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Conv2d_nchw_out_0_init[((3136 * j) + ((56 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t i = 0; i < 64; i += 1) { +67: for (int32_t j = 0; j < 64; j += 1) { +67: weights_dilation_0[((64 * i) + j)] = (((((0 % 1) == 0) && ((0 % 1) == 0))) ? res2a_branch1_weights[((64 * i) + j)] : 0); +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: T_Identity_out[((3136 * j) + ((56 * k) + a))] = var_20[((3136 * j) + ((56 * k) + a))]; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: for (int32_t fc_0 = 0; fc_0 < 64; fc_0 += 1) { +67: Conv2d_nchw_out_0[((3136 * j) + ((56 * k) + a))] = (Conv2d_nchw_out_0[((3136 * j) + ((56 * k) + a))] + (T_Identity_out[((3136 * fc_0) + ((56 * k) + a))] * weights_dilation_0[((64 * j) + fc_0)])); +67: }; +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _T_Identity_out); +67: cinn_buffer_free((void*)(0), _weights_dilation_0); +67: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_0); +67: } +67: +67: void fn_conv2d_4(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_20 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _res2a_branch2a_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_pad_out_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_t* _weights_dilation_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: cinn_buffer_t* _Conv2d_nchw_out_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_malloc((void*)(0), _T_pad_out_1); +67: cinn_buffer_malloc((void*)(0), _weights_dilation_1); +67: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_1); +67: float* Conv2d_nchw_out_1 = ((float*)(_Conv2d_nchw_out_1->memory)); +67: float* Conv2d_nchw_out_1_init = ((float*)(_Conv2d_nchw_out_1->memory)); +67: float* T_pad_out_1 = ((float*)(_T_pad_out_1->memory)); +67: const float* res2a_branch2a_weights = ((const float*)(_res2a_branch2a_weights->memory)); +67: const float* var_20 = ((const float*)(_var_20->memory)); +67: float* weights_dilation_1 = ((float*)(_weights_dilation_1->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Conv2d_nchw_out_1_init[((3136 * j) + ((56 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t i = 0; i < 64; i += 1) { +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 3; k += 1) { +67: for (int32_t a = 0; a < 3; a += 1) { +67: weights_dilation_1[((576 * i) + ((9 * j) + ((3 * k) + a)))] = (((((a % 1) == 0) && ((k % 1) == 0))) ? res2a_branch2a_weights[((a / 1) + (((k / 1) * 3) + ((576 * i) + (9 * j))))] : 0); +67: }; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 58; k += 1) { +67: for (int32_t a = 0; a < 58; a += 1) { +67: T_pad_out_1[((3364 * j) + ((58 * k) + a))] = ((((a < 57) && ((a >= 1) && ((k < 57) && (k >= 1))))) ? var_20[(-57 + ((3136 * j) + ((56 * k) + a)))] : 0); +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: for (int32_t fc_1 = 0; fc_1 < 64; fc_1 += 1) { +67: for (int32_t fy_1 = 0; fy_1 < 3; fy_1 += 1) { +67: for (int32_t fx_1 = 0; fx_1 < 3; fx_1 += 1) { +67: Conv2d_nchw_out_1[((3136 * j) + ((56 * k) + a))] = (Conv2d_nchw_out_1[((3136 * j) + ((56 * k) + a))] + (T_pad_out_1[((3364 * fc_1) + ((58 * fy_1) + ((58 * k) + (a + fx_1))))] * weights_dilation_1[((9 * fc_1) + ((3 * fy_1) + ((576 * j) + fx_1)))])); +67: }; +67: }; +67: }; +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _T_pad_out_1); +67: cinn_buffer_free((void*)(0), _weights_dilation_1); +67: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_1); +67: } +67: +67: void fn_batchnorm_10(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_60 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _bn2a_branch1_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: const cinn_buffer_t* _bn2a_branch1_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: const cinn_buffer_t* _bn2a_branch1_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: const cinn_buffer_t* _bn2a_branch1_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_t* _BatchNorm_output_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +67: cinn_buffer_malloc((void*)(0), _BatchNorm_output_1); +67: float* BatchNorm_output_1 = ((float*)(_BatchNorm_output_1->memory)); +67: const float* bn2a_branch1_mean = ((const float*)(_bn2a_branch1_mean->memory)); +67: const float* bn2a_branch1_offset = ((const float*)(_bn2a_branch1_offset->memory)); +67: const float* bn2a_branch1_scale = ((const float*)(_bn2a_branch1_scale->memory)); +67: const float* bn2a_branch1_variance = ((const float*)(_bn2a_branch1_variance->memory)); +67: const float* var_60 = ((const float*)(_var_60->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: BatchNorm_output_1[((3136 * j) + ((56 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((bn2a_branch1_variance[j] + 1e-05))) * (bn2a_branch1_scale[j] * var_60[((3136 * j) + ((56 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((bn2a_branch1_variance[j] + 1e-05))) * (bn2a_branch1_scale[j] * bn2a_branch1_mean[j]))) + bn2a_branch1_offset[j])); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _BatchNorm_output_1); +67: } +67: +67: void fn_batchnorm_5(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_26 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _bn2a_branch2a_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: const cinn_buffer_t* _bn2a_branch2a_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: const cinn_buffer_t* _bn2a_branch2a_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: const cinn_buffer_t* _bn2a_branch2a_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_t* _BatchNorm_output_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +67: cinn_buffer_malloc((void*)(0), _BatchNorm_output_2); +67: float* BatchNorm_output_2 = ((float*)(_BatchNorm_output_2->memory)); +67: const float* bn2a_branch2a_mean = ((const float*)(_bn2a_branch2a_mean->memory)); +67: const float* bn2a_branch2a_offset = ((const float*)(_bn2a_branch2a_offset->memory)); +67: const float* bn2a_branch2a_scale = ((const float*)(_bn2a_branch2a_scale->memory)); +67: const float* bn2a_branch2a_variance = ((const float*)(_bn2a_branch2a_variance->memory)); +67: const float* var_26 = ((const float*)(_var_26->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: BatchNorm_output_2[((3136 * j) + ((56 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((bn2a_branch2a_variance[j] + 1e-05))) * (bn2a_branch2a_scale[j] * var_26[((3136 * j) + ((56 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((bn2a_branch2a_variance[j] + 1e-05))) * (bn2a_branch2a_scale[j] * bn2a_branch2a_mean[j]))) + bn2a_branch2a_offset[j])); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _BatchNorm_output_2); +67: } +67: +67: void fn_relu_6(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_36 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: cinn_buffer_t* _Relu_output_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_malloc((void*)(0), _Relu_output_0); +67: float* Relu_output_0 = ((float*)(_Relu_output_0->memory)); +67: const float* var_36 = ((const float*)(_var_36->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Relu_output_0[((3136 * j) + ((56 * k) + a))] = cinn_max(var_36[((3136 * j) + ((56 * k) + a))], 0); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _Relu_output_0); +67: } +67: +67: void fn_conv2d_7(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_38 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _res2a_branch2b_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_pad_out_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_t* _weights_dilation_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: cinn_buffer_t* _Conv2d_nchw_out_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_malloc((void*)(0), _T_pad_out_2); +67: cinn_buffer_malloc((void*)(0), _weights_dilation_2); +67: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_2); +67: float* Conv2d_nchw_out_2 = ((float*)(_Conv2d_nchw_out_2->memory)); +67: float* Conv2d_nchw_out_2_init = ((float*)(_Conv2d_nchw_out_2->memory)); +67: float* T_pad_out_2 = ((float*)(_T_pad_out_2->memory)); +67: const float* res2a_branch2b_weights = ((const float*)(_res2a_branch2b_weights->memory)); +67: const float* var_38 = ((const float*)(_var_38->memory)); +67: float* weights_dilation_2 = ((float*)(_weights_dilation_2->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Conv2d_nchw_out_2_init[((3136 * j) + ((56 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t i = 0; i < 64; i += 1) { +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 3; k += 1) { +67: for (int32_t a = 0; a < 3; a += 1) { +67: weights_dilation_2[((576 * i) + ((9 * j) + ((3 * k) + a)))] = (((((a % 1) == 0) && ((k % 1) == 0))) ? res2a_branch2b_weights[((a / 1) + (((k / 1) * 3) + ((576 * i) + (9 * j))))] : 0); +67: }; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 58; k += 1) { +67: for (int32_t a = 0; a < 58; a += 1) { +67: T_pad_out_2[((3364 * j) + ((58 * k) + a))] = ((((a < 57) && ((a >= 1) && ((k < 57) && (k >= 1))))) ? var_38[(-57 + ((3136 * j) + ((56 * k) + a)))] : 0); +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: for (int32_t fc_2 = 0; fc_2 < 64; fc_2 += 1) { +67: for (int32_t fy_2 = 0; fy_2 < 3; fy_2 += 1) { +67: for (int32_t fx_2 = 0; fx_2 < 3; fx_2 += 1) { +67: Conv2d_nchw_out_2[((3136 * j) + ((56 * k) + a))] = (Conv2d_nchw_out_2[((3136 * j) + ((56 * k) + a))] + (T_pad_out_2[((3364 * fc_2) + ((58 * fy_2) + ((58 * k) + (a + fx_2))))] * weights_dilation_2[((9 * fc_2) + ((3 * fy_2) + ((576 * j) + fx_2)))])); +67: }; +67: }; +67: }; +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _T_pad_out_2); +67: cinn_buffer_free((void*)(0), _weights_dilation_2); +67: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_2); +67: } +67: +67: void fn_batchnorm_8(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_44 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _bn2a_branch2b_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: const cinn_buffer_t* _bn2a_branch2b_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: const cinn_buffer_t* _bn2a_branch2b_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: const cinn_buffer_t* _bn2a_branch2b_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_t* _BatchNorm_output_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +67: cinn_buffer_malloc((void*)(0), _BatchNorm_output_3); +67: float* BatchNorm_output_3 = ((float*)(_BatchNorm_output_3->memory)); +67: const float* bn2a_branch2b_mean = ((const float*)(_bn2a_branch2b_mean->memory)); +67: const float* bn2a_branch2b_offset = ((const float*)(_bn2a_branch2b_offset->memory)); +67: const float* bn2a_branch2b_scale = ((const float*)(_bn2a_branch2b_scale->memory)); +67: const float* bn2a_branch2b_variance = ((const float*)(_bn2a_branch2b_variance->memory)); +67: const float* var_44 = ((const float*)(_var_44->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: BatchNorm_output_3[((3136 * j) + ((56 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((bn2a_branch2b_variance[j] + 1e-05))) * (bn2a_branch2b_scale[j] * var_44[((3136 * j) + ((56 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((bn2a_branch2b_variance[j] + 1e-05))) * (bn2a_branch2b_scale[j] * bn2a_branch2b_mean[j]))) + bn2a_branch2b_offset[j])); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _BatchNorm_output_3); +67: } +67: +67: void fn_elementwise_add_11(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_70 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _var_54 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _C = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_malloc((void*)(0), _C); +67: float* C = ((float*)(_C->memory)); +67: const float* var_54 = ((const float*)(_var_54->memory)); +67: const float* var_70 = ((const float*)(_var_70->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: C[((3136 * j) + ((56 * k) + a))] = (var_70[((3136 * j) + ((56 * k) + a))] + var_54[((3136 * j) + ((56 * k) + a))]); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _C); +67: } +67: +67: void fn_relu_12(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_72 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: cinn_buffer_t* _Relu_output_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_malloc((void*)(0), _Relu_output_1); +67: float* Relu_output_1 = ((float*)(_Relu_output_1->memory)); +67: const float* var_72 = ((const float*)(_var_72->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Relu_output_1[((3136 * j) + ((56 * k) + a))] = cinn_max(var_72[((3136 * j) + ((56 * k) + a))], 0); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _Relu_output_1); +67: } +67: +67: void fn_conv2d_13(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_74 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _res2b_branch2a_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_pad_out_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_t* _weights_dilation_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: cinn_buffer_t* _Conv2d_nchw_out_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_malloc((void*)(0), _T_pad_out_3); +67: cinn_buffer_malloc((void*)(0), _weights_dilation_3); +67: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_3); +67: float* Conv2d_nchw_out_3 = ((float*)(_Conv2d_nchw_out_3->memory)); +67: float* Conv2d_nchw_out_3_init = ((float*)(_Conv2d_nchw_out_3->memory)); +67: float* T_pad_out_3 = ((float*)(_T_pad_out_3->memory)); +67: const float* res2b_branch2a_weights = ((const float*)(_res2b_branch2a_weights->memory)); +67: const float* var_74 = ((const float*)(_var_74->memory)); +67: float* weights_dilation_3 = ((float*)(_weights_dilation_3->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Conv2d_nchw_out_3_init[((3136 * j) + ((56 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t i = 0; i < 64; i += 1) { +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 3; k += 1) { +67: for (int32_t a = 0; a < 3; a += 1) { +67: weights_dilation_3[((576 * i) + ((9 * j) + ((3 * k) + a)))] = (((((a % 1) == 0) && ((k % 1) == 0))) ? res2b_branch2a_weights[((a / 1) + (((k / 1) * 3) + ((576 * i) + (9 * j))))] : 0); +67: }; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 58; k += 1) { +67: for (int32_t a = 0; a < 58; a += 1) { +67: T_pad_out_3[((3364 * j) + ((58 * k) + a))] = ((((a < 57) && ((a >= 1) && ((k < 57) && (k >= 1))))) ? var_74[(-57 + ((3136 * j) + ((56 * k) + a)))] : 0); +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: for (int32_t fc_3 = 0; fc_3 < 64; fc_3 += 1) { +67: for (int32_t fy_3 = 0; fy_3 < 3; fy_3 += 1) { +67: for (int32_t fx_3 = 0; fx_3 < 3; fx_3 += 1) { +67: Conv2d_nchw_out_3[((3136 * j) + ((56 * k) + a))] = (Conv2d_nchw_out_3[((3136 * j) + ((56 * k) + a))] + (T_pad_out_3[((3364 * fc_3) + ((58 * fy_3) + ((58 * k) + (a + fx_3))))] * weights_dilation_3[((9 * fc_3) + ((3 * fy_3) + ((576 * j) + fx_3)))])); +67: }; +67: }; +67: }; +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _T_pad_out_3); +67: cinn_buffer_free((void*)(0), _weights_dilation_3); +67: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_3); +67: } +67: +67: void fn_batchnorm_14(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_80 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _bn2b_branch2a_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: const cinn_buffer_t* _bn2b_branch2a_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: const cinn_buffer_t* _bn2b_branch2a_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: const cinn_buffer_t* _bn2b_branch2a_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_t* _BatchNorm_output_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +67: cinn_buffer_malloc((void*)(0), _BatchNorm_output_4); +67: float* BatchNorm_output_4 = ((float*)(_BatchNorm_output_4->memory)); +67: const float* bn2b_branch2a_mean = ((const float*)(_bn2b_branch2a_mean->memory)); +67: const float* bn2b_branch2a_offset = ((const float*)(_bn2b_branch2a_offset->memory)); +67: const float* bn2b_branch2a_scale = ((const float*)(_bn2b_branch2a_scale->memory)); +67: const float* bn2b_branch2a_variance = ((const float*)(_bn2b_branch2a_variance->memory)); +67: const float* var_80 = ((const float*)(_var_80->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: BatchNorm_output_4[((3136 * j) + ((56 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((bn2b_branch2a_variance[j] + 1e-05))) * (bn2b_branch2a_scale[j] * var_80[((3136 * j) + ((56 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((bn2b_branch2a_variance[j] + 1e-05))) * (bn2b_branch2a_scale[j] * bn2b_branch2a_mean[j]))) + bn2b_branch2a_offset[j])); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _BatchNorm_output_4); +67: } +67: +67: void fn_relu_15(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_90 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: cinn_buffer_t* _Relu_output_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_malloc((void*)(0), _Relu_output_2); +67: float* Relu_output_2 = ((float*)(_Relu_output_2->memory)); +67: const float* var_90 = ((const float*)(_var_90->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Relu_output_2[((3136 * j) + ((56 * k) + a))] = cinn_max(var_90[((3136 * j) + ((56 * k) + a))], 0); +67: }; +67: }; +67: }; +67: cinn_buffer_free((void*)(0), _Relu_output_2); +67: } +67: +67: void fn_conv2d_16(void* _args, int32_t num_args) +67: { +67: const cinn_buffer_t* _var_92 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +67: const cinn_buffer_t* _res2b_branch2b_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +67: cinn_buffer_t* _T_pad_out_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +67: cinn_buffer_t* _weights_dilation_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +67: cinn_buffer_t* _Conv2d_nchw_out_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +67: cinn_buffer_malloc((void*)(0), _T_pad_out_4); +67: cinn_buffer_malloc((void*)(0), _weights_dilation_4); +67: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_4); +67: float* Conv2d_nchw_out_4 = ((float*)(_Conv2d_nchw_out_4->memory)); +67: float* Conv2d_nchw_out_4_init = ((float*)(_Conv2d_nchw_out_4->memory)); +67: float* T_pad_out_4 = ((float*)(_T_pad_out_4->memory)); +67: const float* res2b_branch2b_weights = ((const float*)(_res2b_branch2b_weights->memory)); +67: const float* var_92 = ((const float*)(_var_92->memory)); +67: float* weights_dilation_4 = ((float*)(_weights_dilation_4->memory)); +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 56; k += 1) { +67: for (int32_t a = 0; a < 56; a += 1) { +67: Conv2d_nchw_out_4_init[((3136 * j) + ((56 * k) + a))] = 0; +67: }; +67: }; +67: }; +67: for (int32_t i = 0; i < 64; i += 1) { +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 3; k += 1) { +67: for (int32_t a = 0; a < 3; a += 1) { +67: weights_dilation_4[((576 * i) + ((9 * j) + ((3 * k) + a)))] = (((((a % 1) == 0) && ((k % 1) == 0))) ? res2b_branch2b_weights[((a / 1) + (((k / 1) * 3) + ((576 * i) + (9 * j))))] : 0); +67: }; +67: }; +67: }; +67: }; +67: for (int32_t j = 0; j < 64; j += 1) { +67: for (int32_t k = 0; k < 58; k += 1) { +67: for (int32_t a = 0; a < 58; a += 1) { +67: T_pad_out_4[((3364 * +67: I0924 13:32:26.094007 28813 codegen_llvm.cc:344] instr: i32 49 +67: W0924 13:32:26.746232 28813 init.cc:226] Warning: PaddlePaddle catches a failure signal, it may not work properly +67: W0924 13:32:26.746279 28813 init.cc:228] You could check whether you killed PaddlePaddle thread/process accidentally or report the case to PaddlePaddle +67: W0924 13:32:26.746281 28813 init.cc:231] The detail failure signal is: +67: +67: W0924 13:32:26.746284 28813 init.cc:234] *** Aborted at 1600954346 (unix time) try "date -d @1600954346" if you are using GNU date *** +67: W0924 13:32:26.747192 28813 init.cc:234] PC: @ 0x0 (unknown) +67: W0924 13:32:26.747408 28813 init.cc:234] *** SIGSEGV (@0x7fb48a9f8e4c) received by PID 28813 (TID 0x7fb4b0ce7740) from PID 18446744071740296780; stack trace: *** +67: W0924 13:32:26.748113 28813 init.cc:234] @ 0x7fb4b071af20 (unknown) +67: W0924 13:32:26.748739 28813 init.cc:234] @ 0x7fb4b0b99661 (unknown) +67: W0924 13:32:26.750941 28813 init.cc:234] @ 0x7fb347c177d5 cinn::hlir::framework::Instruction::Run() +67: W0924 13:32:26.752528 28813 init.cc:234] @ 0x7fb347c178f2 cinn::hlir::framework::Program::Execute() +67: W0924 13:32:26.754045 28813 init.cc:234] @ 0x7fb347ea62d6 cinn::frontend::Executor::Run() +67: W0924 13:32:26.754400 28813 init.cc:234] @ 0x7fb347c216d9 _ZZN8pybind1112cpp_functionC4IvN4cinn8frontend8ExecutorEJEJNS_4nameENS_9is_methodENS_7siblingEEEEMT0_FT_DpT1_EDpRKT2_ENKUlPS4_E_clESI_ +67: W0924 13:32:26.755192 28813 init.cc:234] @ 0x7fb347c34443 _ZN8pybind116detail15argument_loaderIJPN4cinn8frontend8ExecutorEEE9call_implIvRZNS_12cpp_functionC4IvS4_JEJNS_4nameENS_9is_methodENS_7siblingEEEEMT0_FT_DpT1_EDpRKT2_EUlS5_E_JLm0EENS0_9void_typeEEESE_OSD_St16integer_sequenceImJXspT1_EEEOT2_ +67: W0924 13:32:26.755627 28813 init.cc:234] @ 0x7fb347c2deec _ZNO8pybind116detail15argument_loaderIJPN4cinn8frontend8ExecutorEEE4callIvNS0_9void_typeERZNS_12cpp_functionC4IvS4_JEJNS_4nameENS_9is_methodENS_7siblingEEEEMT0_FT_DpT1_EDpRKT2_EUlS5_E_EENSt9enable_ifIXsrSt7is_voidISF_E5valueES8_E4typeEOT1_ +67: W0924 13:32:26.756399 28813 init.cc:234] @ 0x7fb347c29ac6 _ZZN8pybind1112cpp_function10initializeIZNS0_C4IvN4cinn8frontend8ExecutorEJEJNS_4nameENS_9is_methodENS_7siblingEEEEMT0_FT_DpT1_EDpRKT2_EUlPS5_E_vJSJ_EJS6_S7_S8_EEEvOSA_PFS9_SC_ESI_ENKUlRNS_6detail13function_callEE1_clESQ_ +67: W0924 13:32:26.756747 28813 init.cc:234] @ 0x7fb347c29bf3 _ZZN8pybind1112cpp_function10initializeIZNS0_C4IvN4cinn8frontend8ExecutorEJEJNS_4nameENS_9is_methodENS_7siblingEEEEMT0_FT_DpT1_EDpRKT2_EUlPS5_E_vJSJ_EJS6_S7_S8_EEEvOSA_PFS9_SC_ESI_ENUlRNS_6detail13function_callEE1_4_FUNESQ_ +67: W0924 13:32:26.757316 28813 init.cc:234] @ 0x7fb34784e4d9 pybind11::cpp_function::dispatcher() +67: W0924 13:32:26.757386 28813 init.cc:234] @ 0x5674fc _PyCFunction_FastCallDict +67: W0924 13:32:26.757441 28813 init.cc:234] @ 0x50abb3 (unknown) +67: W0924 13:32:26.757491 28813 init.cc:234] @ 0x50c5b9 _PyEval_EvalFrameDefault +67: W0924 13:32:26.757541 28813 init.cc:234] @ 0x509d48 (unknown) +67: W0924 13:32:26.757591 28813 init.cc:234] @ 0x50aa7d (unknown) +67: W0924 13:32:26.757639 28813 init.cc:234] @ 0x50c5b9 _PyEval_EvalFrameDefault +67: W0924 13:32:26.757689 28813 init.cc:234] @ 0x508245 (unknown) +67: W0924 13:32:26.757722 28813 init.cc:234] @ 0x509642 _PyFunction_FastCallDict +67: W0924 13:32:26.757778 28813 init.cc:234] @ 0x595311 (unknown) +67: W0924 13:32:26.757822 28813 init.cc:234] @ 0x5a067e PyObject_Call +67: W0924 13:32:26.757869 28813 init.cc:234] @ 0x50d966 _PyEval_EvalFrameDefault +67: W0924 13:32:26.757920 28813 init.cc:234] @ 0x508245 (unknown) +67: W0924 13:32:26.757954 28813 init.cc:234] @ 0x509642 _PyFunction_FastCallDict +67: W0924 13:32:26.758008 28813 init.cc:234] @ 0x595311 (unknown) +67: W0924 13:32:26.758064 28813 init.cc:234] @ 0x54b1e1 (unknown) +67: W0924 13:32:26.758112 28813 init.cc:234] @ 0x5aa6ec _PyObject_FastCallKeywords +67: W0924 13:32:26.758174 28813 init.cc:234] @ 0x50abb3 (unknown) +67: W0924 13:32:26.758225 28813 init.cc:234] @ 0x50c5b9 _PyEval_EvalFrameDefault +67: W0924 13:32:26.758278 28813 init.cc:234] @ 0x508245 (unknown) +67: W0924 13:32:26.758311 28813 init.cc:234] @ 0x509642 _PyFunction_FastCallDict +67: W0924 13:32:26.758368 28813 init.cc:234] @ 0x595311 (unknown) +67: Segmentation fault +65/68 Test #67: test_cinn_real_resnet18 ................***Failed 5.96 sec +57: ./home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +57: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +68: I0924 13:32:27.612771 28814 nn.cc:477] kernel_size length is: 2 +68: I0924 13:32:27.612797 28814 nn.cc:478] kernel_size is: 7 +68: I0924 13:32:27.612800 28814 nn.cc:479] padding_size length is: 4 +68: I0924 13:32:27.612802 28814 nn.cc:480] padding_size is: 0 +68: I0924 13:32:28.877796 28814 graph_compiler.cc:39] [Debug] C Code is: +68: #include +68: #include +68: +68: void fn_conv2d_0(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _image = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv1_1_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_pad_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _weights_dilation = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_t* _Conv2d_nchw_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_malloc((void*)(0), _T_pad_out_0); +68: cinn_buffer_malloc((void*)(0), _weights_dilation); +68: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out); +68: float* Conv2d_nchw_out = ((float*)(_Conv2d_nchw_out->memory)); +68: float* Conv2d_nchw_out_init = ((float*)(_Conv2d_nchw_out->memory)); +68: float* T_pad_out_0 = ((float*)(_T_pad_out_0->memory)); +68: const float* conv1_1_weights = ((const float*)(_conv1_1_weights->memory)); +68: const float* image = ((const float*)(_image->memory)); +68: float* weights_dilation = ((float*)(_weights_dilation->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Conv2d_nchw_out_init[((12544 * j) + ((112 * k) + a))] = 0; +68: }; +68: }; +68: }; +68: for (int32_t i = 0; i < 32; i += 1) { +68: for (int32_t j = 0; j < 3; j += 1) { +68: for (int32_t k = 0; k < 3; k += 1) { +68: for (int32_t a = 0; a < 3; a += 1) { +68: weights_dilation[((27 * i) + ((9 * j) + ((3 * k) + a)))] = (((((a % 1) == 0) && ((k % 1) == 0))) ? conv1_1_weights[((a / 1) + (((k / 1) * 3) + ((27 * i) + (9 * j))))] : 0); +68: }; +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 3; j += 1) { +68: for (int32_t k = 0; k < 226; k += 1) { +68: for (int32_t a = 0; a < 226; a += 1) { +68: T_pad_out_0[((51076 * j) + ((226 * k) + a))] = ((((a < 225) && ((a >= 1) && ((k < 225) && (k >= 1))))) ? image[(-225 + ((50176 * j) + ((224 * k) + a)))] : 0); +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: for (int32_t fc = 0; fc < 3; fc += 1) { +68: for (int32_t fy = 0; fy < 3; fy += 1) { +68: for (int32_t fx = 0; fx < 3; fx += 1) { +68: Conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] = (Conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] + (T_pad_out_0[((2 * a) + ((51076 * fc) + ((226 * fy) + ((452 * k) + fx))))] * weights_dilation[((9 * fc) + ((3 * fy) + ((27 * j) + fx)))])); +68: }; +68: }; +68: }; +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _T_pad_out_0); +68: cinn_buffer_free((void*)(0), _weights_dilation); +68: cinn_buffer_free((void*)(0), _Conv2d_nchw_out); +68: } +68: +68: void fn_batchnorm_1(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv1_1_bn_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: const cinn_buffer_t* _conv1_1_bn_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: const cinn_buffer_t* _conv1_1_bn_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: const cinn_buffer_t* _conv1_1_bn_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_t* _BatchNorm_output_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +68: cinn_buffer_malloc((void*)(0), _BatchNorm_output_0); +68: float* BatchNorm_output_0 = ((float*)(_BatchNorm_output_0->memory)); +68: const float* conv1_1_bn_mean = ((const float*)(_conv1_1_bn_mean->memory)); +68: const float* conv1_1_bn_offset = ((const float*)(_conv1_1_bn_offset->memory)); +68: const float* conv1_1_bn_scale = ((const float*)(_conv1_1_bn_scale->memory)); +68: const float* conv1_1_bn_variance = ((const float*)(_conv1_1_bn_variance->memory)); +68: const float* var_5 = ((const float*)(_var_5->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: BatchNorm_output_0[((12544 * j) + ((112 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((conv1_1_bn_variance[j] + 1e-05))) * (conv1_1_bn_scale[j] * var_5[((12544 * j) + ((112 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((conv1_1_bn_variance[j] + 1e-05))) * (conv1_1_bn_scale[j] * conv1_1_bn_mean[j]))) + conv1_1_bn_offset[j])); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _BatchNorm_output_0); +68: } +68: +68: void fn_relu6_2(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_15 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: cinn_buffer_t* _Relu6_output = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_malloc((void*)(0), _Relu6_output); +68: float* Relu6_output = ((float*)(_Relu6_output->memory)); +68: const float* var_15 = ((const float*)(_var_15->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Relu6_output[((12544 * j) + ((112 * k) + a))] = cinn_min(cinn_max(var_15[((12544 * j) + ((112 * k) + a))], 0), 6); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _Relu6_output); +68: } +68: +68: void fn_conv2d_3(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_17 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv2_1_expand_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_Identity_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _weights_dilation_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_t* _Conv2d_nchw_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_malloc((void*)(0), _T_Identity_out); +68: cinn_buffer_malloc((void*)(0), _weights_dilation_0); +68: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_0); +68: float* Conv2d_nchw_out_0 = ((float*)(_Conv2d_nchw_out_0->memory)); +68: float* Conv2d_nchw_out_0_init = ((float*)(_Conv2d_nchw_out_0->memory)); +68: float* T_Identity_out = ((float*)(_T_Identity_out->memory)); +68: const float* conv2_1_expand_weights = ((const float*)(_conv2_1_expand_weights->memory)); +68: const float* var_17 = ((const float*)(_var_17->memory)); +68: float* weights_dilation_0 = ((float*)(_weights_dilation_0->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Conv2d_nchw_out_0_init[((12544 * j) + ((112 * k) + a))] = 0; +68: }; +68: }; +68: }; +68: for (int32_t i = 0; i < 32; i += 1) { +68: for (int32_t j = 0; j < 32; j += 1) { +68: weights_dilation_0[((32 * i) + j)] = (((((0 % 1) == 0) && ((0 % 1) == 0))) ? conv2_1_expand_weights[((32 * i) + j)] : 0); +68: }; +68: }; +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: T_Identity_out[((12544 * j) + ((112 * k) + a))] = var_17[((12544 * j) + ((112 * k) + a))]; +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: for (int32_t fc_0 = 0; fc_0 < 32; fc_0 += 1) { +68: Conv2d_nchw_out_0[((12544 * j) + ((112 * k) + a))] = (Conv2d_nchw_out_0[((12544 * j) + ((112 * k) + a))] + (T_Identity_out[((12544 * fc_0) + ((112 * k) + a))] * weights_dilation_0[((32 * j) + fc_0)])); +68: }; +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _T_Identity_out); +68: cinn_buffer_free((void*)(0), _weights_dilation_0); +68: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_0); +68: } +68: +68: void fn_batchnorm_4(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_23 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv2_1_expand_bn_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: const cinn_buffer_t* _conv2_1_expand_bn_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: const cinn_buffer_t* _conv2_1_expand_bn_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: const cinn_buffer_t* _conv2_1_expand_bn_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_t* _BatchNorm_output_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +68: cinn_buffer_malloc((void*)(0), _BatchNorm_output_1); +68: float* BatchNorm_output_1 = ((float*)(_BatchNorm_output_1->memory)); +68: const float* conv2_1_expand_bn_mean = ((const float*)(_conv2_1_expand_bn_mean->memory)); +68: const float* conv2_1_expand_bn_offset = ((const float*)(_conv2_1_expand_bn_offset->memory)); +68: const float* conv2_1_expand_bn_scale = ((const float*)(_conv2_1_expand_bn_scale->memory)); +68: const float* conv2_1_expand_bn_variance = ((const float*)(_conv2_1_expand_bn_variance->memory)); +68: const float* var_23 = ((const float*)(_var_23->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: BatchNorm_output_1[((12544 * j) + ((112 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((conv2_1_expand_bn_variance[j] + 1e-05))) * (conv2_1_expand_bn_scale[j] * var_23[((12544 * j) + ((112 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((conv2_1_expand_bn_variance[j] + 1e-05))) * (conv2_1_expand_bn_scale[j] * conv2_1_expand_bn_mean[j]))) + conv2_1_expand_bn_offset[j])); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _BatchNorm_output_1); +68: } +68: +68: void fn_relu6_5(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_33 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: cinn_buffer_t* _Relu6_output_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_malloc((void*)(0), _Relu6_output_0); +68: float* Relu6_output_0 = ((float*)(_Relu6_output_0->memory)); +68: const float* var_33 = ((const float*)(_var_33->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Relu6_output_0[((12544 * j) + ((112 * k) + a))] = cinn_min(cinn_max(var_33[((12544 * j) + ((112 * k) + a))], 0), 6); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _Relu6_output_0); +68: } +68: +68: void fn_depthwise_conv2d_6(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_35 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv2_1_dwise_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_pad_out_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _T_depthwise_conv2d_nchw_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_malloc((void*)(0), _T_pad_out_1); +68: cinn_buffer_malloc((void*)(0), _T_depthwise_conv2d_nchw_out); +68: float* T_depthwise_conv2d_nchw_out = ((float*)(_T_depthwise_conv2d_nchw_out->memory)); +68: float* T_depthwise_conv2d_nchw_out_init = ((float*)(_T_depthwise_conv2d_nchw_out->memory)); +68: float* T_pad_out_1 = ((float*)(_T_pad_out_1->memory)); +68: const float* conv2_1_dwise_weights = ((const float*)(_conv2_1_dwise_weights->memory)); +68: const float* var_35 = ((const float*)(_var_35->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: T_depthwise_conv2d_nchw_out_init[((12544 * j) + ((112 * k) + a))] = 0; +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 114; k += 1) { +68: for (int32_t a = 0; a < 114; a += 1) { +68: T_pad_out_1[((12996 * j) + ((114 * k) + a))] = ((((a < 113) && ((a >= 1) && ((k < 113) && (k >= 1))))) ? var_35[(-113 + ((12544 * j) + ((112 * k) + a)))] : 0); +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: for (int32_t kh = 0; kh < 3; kh += 1) { +68: for (int32_t kw = 0; kw < 3; kw += 1) { +68: T_depthwise_conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] = (T_depthwise_conv2d_nchw_out[((12544 * j) + ((112 * k) + a))] + (T_pad_out_1[(((j / 1) * 12996) + ((114 * k) + ((114 * kh) + (a + kw))))] * conv2_1_dwise_weights[(((j / 1) * 9) + ((3 * kh) + kw))])); +68: }; +68: }; +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _T_pad_out_1); +68: cinn_buffer_free((void*)(0), _T_depthwise_conv2d_nchw_out); +68: } +68: +68: void fn_batchnorm_7(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_40 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv2_1_dwise_bn_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: const cinn_buffer_t* _conv2_1_dwise_bn_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: const cinn_buffer_t* _conv2_1_dwise_bn_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: const cinn_buffer_t* _conv2_1_dwise_bn_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_t* _BatchNorm_output_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +68: cinn_buffer_malloc((void*)(0), _BatchNorm_output_2); +68: float* BatchNorm_output_2 = ((float*)(_BatchNorm_output_2->memory)); +68: const float* conv2_1_dwise_bn_mean = ((const float*)(_conv2_1_dwise_bn_mean->memory)); +68: const float* conv2_1_dwise_bn_offset = ((const float*)(_conv2_1_dwise_bn_offset->memory)); +68: const float* conv2_1_dwise_bn_scale = ((const float*)(_conv2_1_dwise_bn_scale->memory)); +68: const float* conv2_1_dwise_bn_variance = ((const float*)(_conv2_1_dwise_bn_variance->memory)); +68: const float* var_40 = ((const float*)(_var_40->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: BatchNorm_output_2[((12544 * j) + ((112 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((conv2_1_dwise_bn_variance[j] + 1e-05))) * (conv2_1_dwise_bn_scale[j] * var_40[((12544 * j) + ((112 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((conv2_1_dwise_bn_variance[j] + 1e-05))) * (conv2_1_dwise_bn_scale[j] * conv2_1_dwise_bn_mean[j]))) + conv2_1_dwise_bn_offset[j])); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _BatchNorm_output_2); +68: } +68: +68: void fn_relu6_8(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_50 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: cinn_buffer_t* _Relu6_output_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_malloc((void*)(0), _Relu6_output_1); +68: float* Relu6_output_1 = ((float*)(_Relu6_output_1->memory)); +68: const float* var_50 = ((const float*)(_var_50->memory)); +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Relu6_output_1[((12544 * j) + ((112 * k) + a))] = cinn_min(cinn_max(var_50[((12544 * j) + ((112 * k) + a))], 0), 6); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _Relu6_output_1); +68: } +68: +68: void fn_conv2d_9(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_52 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv2_1_linear_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_Identity_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _weights_dilation_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_t* _Conv2d_nchw_out_1 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_malloc((void*)(0), _T_Identity_out); +68: cinn_buffer_malloc((void*)(0), _weights_dilation_1); +68: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_1); +68: float* Conv2d_nchw_out_1 = ((float*)(_Conv2d_nchw_out_1->memory)); +68: float* Conv2d_nchw_out_1_init = ((float*)(_Conv2d_nchw_out_1->memory)); +68: float* T_Identity_out = ((float*)(_T_Identity_out->memory)); +68: const float* conv2_1_linear_weights = ((const float*)(_conv2_1_linear_weights->memory)); +68: const float* var_52 = ((const float*)(_var_52->memory)); +68: float* weights_dilation_1 = ((float*)(_weights_dilation_1->memory)); +68: for (int32_t j = 0; j < 16; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Conv2d_nchw_out_1_init[((12544 * j) + ((112 * k) + a))] = 0; +68: }; +68: }; +68: }; +68: for (int32_t i = 0; i < 16; i += 1) { +68: for (int32_t j = 0; j < 32; j += 1) { +68: weights_dilation_1[((32 * i) + j)] = (((((0 % 1) == 0) && ((0 % 1) == 0))) ? conv2_1_linear_weights[((32 * i) + j)] : 0); +68: }; +68: }; +68: for (int32_t j = 0; j < 32; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: T_Identity_out[((12544 * j) + ((112 * k) + a))] = var_52[((12544 * j) + ((112 * k) + a))]; +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 16; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: for (int32_t fc_1 = 0; fc_1 < 32; fc_1 += 1) { +68: Conv2d_nchw_out_1[((12544 * j) + ((112 * k) + a))] = (Conv2d_nchw_out_1[((12544 * j) + ((112 * k) + a))] + (T_Identity_out[((12544 * fc_1) + ((112 * k) + a))] * weights_dilation_1[((32 * j) + fc_1)])); +68: }; +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _T_Identity_out); +68: cinn_buffer_free((void*)(0), _weights_dilation_1); +68: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_1); +68: } +68: +68: void fn_batchnorm_10(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_58 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv2_1_linear_bn_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: const cinn_buffer_t* _conv2_1_linear_bn_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: const cinn_buffer_t* _conv2_1_linear_bn_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: const cinn_buffer_t* _conv2_1_linear_bn_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_t* _BatchNorm_output_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +68: cinn_buffer_malloc((void*)(0), _BatchNorm_output_3); +68: float* BatchNorm_output_3 = ((float*)(_BatchNorm_output_3->memory)); +68: const float* conv2_1_linear_bn_mean = ((const float*)(_conv2_1_linear_bn_mean->memory)); +68: const float* conv2_1_linear_bn_offset = ((const float*)(_conv2_1_linear_bn_offset->memory)); +68: const float* conv2_1_linear_bn_scale = ((const float*)(_conv2_1_linear_bn_scale->memory)); +68: const float* conv2_1_linear_bn_variance = ((const float*)(_conv2_1_linear_bn_variance->memory)); +68: const float* var_58 = ((const float*)(_var_58->memory)); +68: for (int32_t j = 0; j < 16; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: BatchNorm_output_3[((12544 * j) + ((112 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((conv2_1_linear_bn_variance[j] + 1e-05))) * (conv2_1_linear_bn_scale[j] * var_58[((12544 * j) + ((112 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((conv2_1_linear_bn_variance[j] + 1e-05))) * (conv2_1_linear_bn_scale[j] * conv2_1_linear_bn_mean[j]))) + conv2_1_linear_bn_offset[j])); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _BatchNorm_output_3); +68: } +68: +68: void fn_conv2d_11(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_68 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv3_1_expand_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_Identity_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _weights_dilation_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_t* _Conv2d_nchw_out_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_malloc((void*)(0), _T_Identity_out); +68: cinn_buffer_malloc((void*)(0), _weights_dilation_2); +68: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_2); +68: float* Conv2d_nchw_out_2 = ((float*)(_Conv2d_nchw_out_2->memory)); +68: float* Conv2d_nchw_out_2_init = ((float*)(_Conv2d_nchw_out_2->memory)); +68: float* T_Identity_out = ((float*)(_T_Identity_out->memory)); +68: const float* conv3_1_expand_weights = ((const float*)(_conv3_1_expand_weights->memory)); +68: const float* var_68 = ((const float*)(_var_68->memory)); +68: float* weights_dilation_2 = ((float*)(_weights_dilation_2->memory)); +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Conv2d_nchw_out_2_init[((12544 * j) + ((112 * k) + a))] = 0; +68: }; +68: }; +68: }; +68: for (int32_t i = 0; i < 96; i += 1) { +68: for (int32_t j = 0; j < 16; j += 1) { +68: weights_dilation_2[((16 * i) + j)] = (((((0 % 1) == 0) && ((0 % 1) == 0))) ? conv3_1_expand_weights[((16 * i) + j)] : 0); +68: }; +68: }; +68: for (int32_t j = 0; j < 16; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: T_Identity_out[((12544 * j) + ((112 * k) + a))] = var_68[((12544 * j) + ((112 * k) + a))]; +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: for (int32_t fc_2 = 0; fc_2 < 16; fc_2 += 1) { +68: Conv2d_nchw_out_2[((12544 * j) + ((112 * k) + a))] = (Conv2d_nchw_out_2[((12544 * j) + ((112 * k) + a))] + (T_Identity_out[((12544 * fc_2) + ((112 * k) + a))] * weights_dilation_2[((16 * j) + fc_2)])); +68: }; +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _T_Identity_out); +68: cinn_buffer_free((void*)(0), _weights_dilation_2); +68: cinn_buffer_free((void*)(0), _Conv2d_nchw_out_2); +68: } +68: +68: void fn_batchnorm_12(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_74 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv3_1_expand_bn_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: const cinn_buffer_t* _conv3_1_expand_bn_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: const cinn_buffer_t* _conv3_1_expand_bn_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: const cinn_buffer_t* _conv3_1_expand_bn_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_t* _BatchNorm_output_4 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +68: cinn_buffer_malloc((void*)(0), _BatchNorm_output_4); +68: float* BatchNorm_output_4 = ((float*)(_BatchNorm_output_4->memory)); +68: const float* conv3_1_expand_bn_mean = ((const float*)(_conv3_1_expand_bn_mean->memory)); +68: const float* conv3_1_expand_bn_offset = ((const float*)(_conv3_1_expand_bn_offset->memory)); +68: const float* conv3_1_expand_bn_scale = ((const float*)(_conv3_1_expand_bn_scale->memory)); +68: const float* conv3_1_expand_bn_variance = ((const float*)(_conv3_1_expand_bn_variance->memory)); +68: const float* var_74 = ((const float*)(_var_74->memory)); +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: BatchNorm_output_4[((12544 * j) + ((112 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((conv3_1_expand_bn_variance[j] + 1e-05))) * (conv3_1_expand_bn_scale[j] * var_74[((12544 * j) + ((112 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((conv3_1_expand_bn_variance[j] + 1e-05))) * (conv3_1_expand_bn_scale[j] * conv3_1_expand_bn_mean[j]))) + conv3_1_expand_bn_offset[j])); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _BatchNorm_output_4); +68: } +68: +68: void fn_relu6_13(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_84 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: cinn_buffer_t* _Relu6_output_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_malloc((void*)(0), _Relu6_output_2); +68: float* Relu6_output_2 = ((float*)(_Relu6_output_2->memory)); +68: const float* var_84 = ((const float*)(_var_84->memory)); +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 112; k += 1) { +68: for (int32_t a = 0; a < 112; a += 1) { +68: Relu6_output_2[((12544 * j) + ((112 * k) + a))] = cinn_min(cinn_max(var_84[((12544 * j) + ((112 * k) + a))], 0), 6); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _Relu6_output_2); +68: } +68: +68: void fn_depthwise_conv2d_14(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_86 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv3_1_dwise_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_pad_out_2 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _T_depthwise_conv2d_nchw_out_0 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_malloc((void*)(0), _T_pad_out_2); +68: cinn_buffer_malloc((void*)(0), _T_depthwise_conv2d_nchw_out_0); +68: float* T_depthwise_conv2d_nchw_out_0 = ((float*)(_T_depthwise_conv2d_nchw_out_0->memory)); +68: float* T_depthwise_conv2d_nchw_out_0_init = ((float*)(_T_depthwise_conv2d_nchw_out_0->memory)); +68: float* T_pad_out_2 = ((float*)(_T_pad_out_2->memory)); +68: const float* conv3_1_dwise_weights = ((const float*)(_conv3_1_dwise_weights->memory)); +68: const float* var_86 = ((const float*)(_var_86->memory)); +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 56; k += 1) { +68: for (int32_t a = 0; a < 56; a += 1) { +68: T_depthwise_conv2d_nchw_out_0_init[((3136 * j) + ((56 * k) + a))] = 0; +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 114; k += 1) { +68: for (int32_t a = 0; a < 114; a += 1) { +68: T_pad_out_2[((12996 * j) + ((114 * k) + a))] = ((((a < 113) && ((a >= 1) && ((k < 113) && (k >= 1))))) ? var_86[(-113 + ((12544 * j) + ((112 * k) + a)))] : 0); +68: }; +68: }; +68: }; +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 56; k += 1) { +68: for (int32_t a = 0; a < 56; a += 1) { +68: for (int32_t kh = 0; kh < 3; kh += 1) { +68: for (int32_t kw = 0; kw < 3; kw += 1) { +68: T_depthwise_conv2d_nchw_out_0[((3136 * j) + ((56 * k) + a))] = (T_depthwise_conv2d_nchw_out_0[((3136 * j) + ((56 * k) + a))] + (T_pad_out_2[(((j / 1) * 12996) + ((2 * a) + ((228 * k) + ((114 * kh) + kw))))] * conv3_1_dwise_weights[(((j / 1) * 9) + ((3 * kh) + kw))])); +68: }; +68: }; +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _T_pad_out_2); +68: cinn_buffer_free((void*)(0), _T_depthwise_conv2d_nchw_out_0); +68: } +68: +68: void fn_batchnorm_15(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_91 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv3_1_dwise_bn_scale = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: const cinn_buffer_t* _conv3_1_dwise_bn_offset = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: const cinn_buffer_t* _conv3_1_dwise_bn_mean = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: const cinn_buffer_t* _conv3_1_dwise_bn_variance = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_t* _BatchNorm_output_5 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[5])); +68: cinn_buffer_malloc((void*)(0), _BatchNorm_output_5); +68: float* BatchNorm_output_5 = ((float*)(_BatchNorm_output_5->memory)); +68: const float* conv3_1_dwise_bn_mean = ((const float*)(_conv3_1_dwise_bn_mean->memory)); +68: const float* conv3_1_dwise_bn_offset = ((const float*)(_conv3_1_dwise_bn_offset->memory)); +68: const float* conv3_1_dwise_bn_scale = ((const float*)(_conv3_1_dwise_bn_scale->memory)); +68: const float* conv3_1_dwise_bn_variance = ((const float*)(_conv3_1_dwise_bn_variance->memory)); +68: const float* var_91 = ((const float*)(_var_91->memory)); +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 56; k += 1) { +68: for (int32_t a = 0; a < 56; a += 1) { +68: BatchNorm_output_5[((3136 * j) + ((56 * k) + a))] = (((1 / cinn_cpu_sqrt_fp32((conv3_1_dwise_bn_variance[j] + 1e-05))) * (conv3_1_dwise_bn_scale[j] * var_91[((3136 * j) + ((56 * k) + a))])) + ((-1 * ((1 / cinn_cpu_sqrt_fp32((conv3_1_dwise_bn_variance[j] + 1e-05))) * (conv3_1_dwise_bn_scale[j] * conv3_1_dwise_bn_mean[j]))) + conv3_1_dwise_bn_offset[j])); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _BatchNorm_output_5); +68: } +68: +68: void fn_relu6_16(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_101 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: cinn_buffer_t* _Relu6_output_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_malloc((void*)(0), _Relu6_output_3); +68: float* Relu6_output_3 = ((float*)(_Relu6_output_3->memory)); +68: const float* var_101 = ((const float*)(_var_101->memory)); +68: for (int32_t j = 0; j < 96; j += 1) { +68: for (int32_t k = 0; k < 56; k += 1) { +68: for (int32_t a = 0; a < 56; a += 1) { +68: Relu6_output_3[((3136 * j) + ((56 * k) + a))] = cinn_min(cinn_max(var_101[((3136 * j) + ((56 * k) + a))], 0), 6); +68: }; +68: }; +68: }; +68: cinn_buffer_free((void*)(0), _Relu6_output_3); +68: } +68: +68: void fn_conv2d_17(void* _args, int32_t num_args) +68: { +68: const cinn_buffer_t* _var_103 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +68: const cinn_buffer_t* _conv3_1_linear_weights = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +68: cinn_buffer_t* _T_Identity_out = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +68: cinn_buffer_t* _weights_dilation_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[3])); +68: cinn_buffer_t* _Conv2d_nchw_out_3 = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[4])); +68: cinn_buffer_malloc((void*)(0), _T_Identity_out); +68: cinn_buffer_malloc((void*)(0), _weights_dilation_3); +68: cinn_buffer_malloc((void*)(0), _Conv2d_nchw_out_3); +68: float* Conv2d_nchw_out_3 = ((float*)(_Conv2d_nchw_out_3->memory)); +68: float* Conv2d_nchw_out_ +68: I0924 13:32:29.892637 28814 codegen_llvm.cc:344] instr: i32 49 +54: I0924 13:32:30.845150 27840 test02_matmul_case.cc:132] 3654.5 +54: I0924 13:32:30.863699 27840 test02_matmul_case.cc:134] Testing matmul_tile +68: malloc_consolidate(): invalid chunk size +57: . +57: ---------------------------------------------------------------------- +57: Ran 2 tests in 14.638s +57: +57: OK +57: func function matmul (_A, _B, _c) +57: { +57: for (i, 1024) +57: { +57: for (j, 1024) +57: { +57: c_init[i, j] = 0 +57: } +57: } +57: for (i, 1024) +57: { +57: for (j, 1024) +57: { +57: for (k1, 1024) +57: { +57: c[i, j] = (c[i, j] + (A[i, k1] * B[k1, j])) +57: } +57: } +57: } +57: } +57: func function matmul_tile (_A, _B, _c) +57: { +57: for (i, 1024) +57: { +57: for (j, 1024) +57: { +57: c_init[i, j] = 0 +57: } +57: } +57: for (i_outer, 256) +57: { +57: for (i_inner, 4) +57: { +57: for (j_outer, 256) +57: { +57: for (j_inner, 4) +57: { +57: for (k1, 1024) +57: { +57: c[((4 * i_outer) + i_inner), ((4 * j_outer) + j_inner)] = (c[((4 * i_outer) + i_inner), ((4 * j_outer) + j_inner)] + (A[((4 * i_outer) + i_inner), k1] * B[k1, ((4 * j_outer) + j_inner)])) +57: } +57: } +57: } +57: } +57: } +57: } +57: module: +57: #include +57: #include +57: +57: void matmul_tile(void* _args, int32_t num_args) +57: { +57: const cinn_buffer_t* _A = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[0])); +57: const cinn_buffer_t* _B = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[1])); +57: cinn_buffer_t* _c = cinn_pod_value_to_buffer_p(&(((cinn_pod_value_t*)(_args))[2])); +57: cinn_buffer_malloc((void*)(0), _c); +57: const float* A = ((const float*)(_A->memory)); +57: const float* B = ((const float*)(_B->memory)); +57: float* c = ((float*)(_c->memory)); +57: float* c_init = ((float*)(_c->memory)); +57: for (int32_t i = 0; i < 1024; i += 1) { +57: for (int32_t j = 0; j < 1024; j += 1) { +57: c_init[((1024 * i) + j)] = 0; +57: }; +57: }; +57: for (int32_t i_outer = 0; i_outer < 256; i_outer += 1) { +57: for (int32_t i_inner = 0; i_inner < 4; i_inner += 1) { +57: for (int32_t j_outer = 0; j_outer < 256; j_outer += 1) { +57: for (int32_t j_inner = 0; j_inner < 4; j_inner += 1) { +57: for (int32_t k1 = 0; k1 < 1024; k1 += 1) { +57: c[((1024 * i_inner) + ((4096 * i_outer) + ((4 * j_outer) + j_inner)))] = (c[((1024 * i_inner) + ((4096 * i_outer) + ((4 * j_outer) + j_inner)))] + (A[((1024 * i_inner) + ((4096 * i_outer) + k1))] * B[((4 * j_outer) + ((1024 * k1) + j_inner))])); +57: }; +57: }; +57: }; +57: }; +57: }; +57: cinn_buffer_free((void*)(0), _c); +57: } +57: +57: +66/68 Test #57: test_cinn_python_api ................... Passed 15.10 sec +54: I0924 13:32:38.111878 27840 test02_matmul_case.cc:134] 3624 +54: I0924 13:32:38.129998 27840 test02_matmul_case.cc:136] Testing matmul_split +54: I0924 13:32:45.469558 27840 test02_matmul_case.cc:136] 3669.5 +54: I0924 13:32:45.488063 27840 test02_matmul_case.cc:138] Testing matmul_block +54: I0924 13:32:46.234335 27840 test02_matmul_case.cc:138] 373 +54: I0924 13:32:46.252069 27840 test02_matmul_case.cc:140] Testing matmul_vectorize +54: I0924 13:32:46.931332 27840 test02_matmul_case.cc:140] 339.5 +54: I0924 13:32:46.949447 27840 test02_matmul_case.cc:142] Testing matmul_loop_permutation +54: I0924 13:32:47.270104 27840 test02_matmul_case.cc:142] 160 +54: I0924 13:32:47.287595 27840 test02_matmul_case.cc:144] Testing matmul_array_packing +54: I0924 13:32:47.568820 27840 test02_matmul_case.cc:144] 140.5 +54: I0924 13:32:47.586277 27840 test02_matmul_case.cc:146] Testing matmul_dynamic_shape +54: I0924 13:32:54.837365 27840 test02_matmul_case.cc:146] 3625.5 +54: I0924 13:32:54.854187 27840 test02_matmul_case.cc:148] Testing matmul_dynamic_shape_tile +54: I0924 13:33:02.101543 27840 test02_matmul_case.cc:148] 3623.5 +54: I0924 13:33:02.119555 27840 test02_matmul_case.cc:150] Testing matmul_array_packing_dynamic_shape +54: /home/wangyue50/CINN-my/CINN/cinn/runtime/cinn_x86_device_impl.cc:21:cinn_x86_malloc(): buf.memory size is 4194304 +54: I0924 13:33:02.403213 27840 test02_matmul_case.cc:150] 141.5 +54: I0924 13:33:02.463613 27840 test02_matmul_case.cc:178] Testing matmul_basic +54: I0924 13:33:15.219161 27840 test02_matmul_case.cc:178] 6377.5 +54: I0924 13:33:15.293460 27840 test02_matmul_case.cc:179] Testing matmul_tile +54: I0924 13:33:30.818352 27840 test02_matmul_case.cc:179] 7762 +54: I0924 13:33:30.905640 27840 test02_matmul_case.cc:180] Testing matmul_block +54: I0924 13:33:40.564250 27840 test02_matmul_case.cc:180] 4829 +54: I0924 13:33:40.663898 27840 test02_matmul_case.cc:181] Testing matmul_vectorize +54: I0924 13:33:41.966856 27840 test02_matmul_case.cc:181] 651 +54: I0924 13:33:42.108276 27840 test02_matmul_case.cc:182] Testing matmul_loop_permutation +54: I0924 13:33:42.751489 27840 test02_matmul_case.cc:182] 321.5 +54: I0924 13:33:42.865382 27840 test02_matmul_case.cc:183] Testing matmul_array_packing +54: I0924 13:33:44.057888 27840 test02_matmul_case.cc:183] 596 +54: warning: :0:0: loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering +54: I0924 13:33:44.756733 27840 test02_matmul_case.cc:189] Testing matmul_fn +54: I0924 13:33:53.072249 27840 test02_matmul_case.cc:189] 4157.5 +54: [ OK ] test02.basic (93555 ms) +54: [----------] 1 test from test02 (93555 ms total) +54: +54: [----------] Global test environment tear-down +54: [==========] 1 test from 1 test case ran. (93555 ms total) +54: [ PASSED ] 1 test. +67/68 Test #54: test02_matmul_case ..................... Passed 93.58 sec diff --git a/Testing/Temporary/CTestCostData.txt b/Testing/Temporary/CTestCostData.txt new file mode 100644 index 0000000000..ed97d539c0 --- /dev/null +++ b/Testing/Temporary/CTestCostData.txt @@ -0,0 +1 @@ +--- diff --git a/Testing/Temporary/LastTest.log b/Testing/Temporary/LastTest.log new file mode 100644 index 0000000000..cd3e6b3b6b --- /dev/null +++ b/Testing/Temporary/LastTest.log @@ -0,0 +1,3 @@ +Start testing: Sep 25 08:24 UTC +---------------------------------------------------------- +End testing: Sep 25 08:24 UTC diff --git a/cinn/hlir/pe/nn.cc b/cinn/hlir/pe/nn.cc index 31ae8f5da5..531f94f766 100644 --- a/cinn/hlir/pe/nn.cc +++ b/cinn/hlir/pe/nn.cc @@ -37,6 +37,55 @@ Tensor PRelu(const Tensor &A, const Tensor &slope, const int axis, const std::st output_name); } +// std::vector Conv2d_NCHW(const ir::Tensor &input, +// const ir::Tensor &weights, +// int pad_h, +// int pad_w, +// int stride_h, +// int stride_w, +// int dilation_h, +// int dilation_w, +// const std::vector> &output_shapes, +// const std::string &output_name) { +// CHECK_EQ(4, input->shape.size()) << "Input's dimension of Conv2d op is not 4! Please check."; +// CHECK_EQ(4, weights->shape.size()) << "Weight's dimension of Conv2d op is not 4! Please check."; +// CHECK_EQ(3, output_shapes.size()) << "The size of output_shapes of Conv2d op is not 3! Please check."; +// CHECK_EQ(4, output_shapes[0].size()) << "The size of output_shapes[0] of Conv2d op is not 4! Please check."; +// CHECK_EQ(4, output_shapes[1].size()) << "The size of output_shapes[1] of Conv2d op is not 4! Please check."; +// CHECK_EQ(4, output_shapes[2].size()) << "The size of output_shapes[2] of Conv2d op is not 4! Please check."; +// std::vector output_shape{ +// Expr(output_shapes[2][0]), Expr(output_shapes[2][1]), Expr(output_shapes[2][2]), Expr(output_shapes[2][3])}; +// auto input_pad = Compute( +// {Expr(output_shapes[0][0]), Expr(output_shapes[0][1]), Expr(output_shapes[0][2]), Expr(output_shapes[0][3])}, +// [=](Expr nn, Expr cc, Expr yy, Expr xx) { +// auto cond = +// ir::logic_and({yy >= pad_h, yy - pad_h < input->shape[2], xx >= pad_w, xx - pad_w < input->shape[3]}); +// return ir::Select::Make(cond, input(nn, cc, yy - pad_h, xx - pad_w), Expr(0.f)); +// }, +// UniqName("input_pad")); +// auto weights_dilation = Compute( +// {Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}, +// [=](Expr nn, Expr cc, Expr yy, Expr xx) { +// auto cond = ir::logic_and({(xx) % dilation_h == 0, yy % dilation_w == 0}); +// return ir::Select::Make(cond, weights(nn, cc, yy / dilation_h, xx / dilation_w), Expr(0.f)); +// }, +// UniqName("weights_dilation")); + +// Var rc(input_pad->shape[1], UniqName("rc")); +// Var ry(weights_dilation->shape[2], UniqName("ry")); +// Var rx(weights_dilation->shape[3], UniqName("rx")); + +// auto res = Compute(output_shape, +// [=](Expr nn, Expr ff, Expr yy, Expr xx) { +// return ir::ReduceSum( +// input_pad(nn, rc, yy * stride_h + ry, xx * stride_w + rx) * weights_dilation(ff, rc, ry, +// rx), Expr(0.f)); +// }, +// output_name, +// {ry, rx, rc}); +// return {input_pad, weights_dilation, res}; +// } + std::vector Conv2d_NCHW(const ir::Tensor &input, const ir::Tensor &weights, int pad_h, @@ -51,6 +100,7 @@ std::vector Conv2d_NCHW(const ir::Tensor &input, CHECK_EQ(4, weights->shape.size()) << "Weight's dimension of Conv2d_NCHW op is not 4! Please check."; std::vector output_shape; std::vector new_weights_shape; + std::vector input_pad_shape; if (output_shapes.size() == 3) { // already computed by infer_shape CHECK_EQ(4, output_shapes[0].size()) << "The size of output_shapes[0] of Conv2d op is not 4! Please check."; @@ -60,6 +110,8 @@ std::vector Conv2d_NCHW(const ir::Tensor &input, Expr(output_shapes[2][0]), Expr(output_shapes[2][1]), Expr(output_shapes[2][2]), Expr(output_shapes[2][3])}; new_weights_shape = { Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}; + input_pad_shape = { + Expr(output_shapes[0][0]), Expr(output_shapes[0][1]), Expr(output_shapes[0][2]), Expr(output_shapes[0][3])}; } else { output_shape = { input->shape[0], // B @@ -71,10 +123,16 @@ std::vector Conv2d_NCHW(const ir::Tensor &input, weights->shape[1], dilation_h * (weights->shape[2] - 1) + 1, dilation_w * (weights->shape[3] - 1) + 1}; + input_pad_shape = {input->shape[0], input->shape[1], input->shape[2] + 2 * pad_h, input->shape[3] + 2 * pad_w}; } - auto input_pad = - (pad_h == 0 && pad_w == 0) ? Identity(input) : Pad(input, {Expr(0), Expr(0), Expr(pad_h), Expr(pad_w)}); - + auto input_pad = Compute( + input_pad_shape, + [=](Expr nn, Expr cc, Expr yy, Expr xx) { + auto cond = + ir::logic_and({yy >= pad_h, yy - pad_h < input->shape[2], xx >= pad_w, xx - pad_w < input->shape[3]}); + return ir::Select::Make(cond, input(nn, cc, yy - pad_h, xx - pad_w), ir::Zero(input->type())); + }, + UniqName("input_pad")); auto weights_dilation = Compute( new_weights_shape, [=](Expr nn, Expr cc, Expr yy, Expr xx) { @@ -120,6 +178,7 @@ std::vector Conv2d_NHWC(const ir::Tensor &input, CHECK_EQ(4, weights->shape.size()) << "Weight's dimension of Conv2d_NHWC op is not 4! Please check."; std::vector output_shape; std::vector new_weights_shape; + std::vector input_pad_shape; if (output_shapes.size() == 3) { // already computed by infer_shape CHECK_EQ(4, output_shapes[0].size()) << "The size of output_shapes[0] of Conv2d op is not 4! Please check."; @@ -129,6 +188,8 @@ std::vector Conv2d_NHWC(const ir::Tensor &input, Expr(output_shapes[2][0]), Expr(output_shapes[2][1]), Expr(output_shapes[2][2]), Expr(output_shapes[2][3])}; new_weights_shape = { Expr(output_shapes[1][0]), Expr(output_shapes[1][1]), Expr(output_shapes[1][2]), Expr(output_shapes[1][3])}; + input_pad_shape = { + Expr(output_shapes[0][0]), Expr(output_shapes[0][1]), Expr(output_shapes[0][2]), Expr(output_shapes[0][3])}; } else { output_shape = { input->shape[0], // B @@ -140,9 +201,16 @@ std::vector Conv2d_NHWC(const ir::Tensor &input, weights->shape[1], dilation_h * (weights->shape[2] - 1) + 1, dilation_w * (weights->shape[3] - 1) + 1}; + input_pad_shape = {input->shape[0], input->shape[1] + 2 * pad_h, input->shape[2] + 2 * pad_w, input->shape[3]}; } - auto input_pad = - (pad_h == 0 && pad_w == 0) ? Identity(input) : Pad(input, {Expr(0), Expr(pad_h), Expr(pad_w), Expr(0)}); + auto input_pad = Compute( + input_pad_shape, + [=](Expr nn, Expr yy, Expr xx, Expr cc) { + auto cond = + ir::logic_and({yy >= pad_h, yy - pad_h < input->shape[1], xx >= pad_w, xx - pad_w < input->shape[2]}); + return ir::Select::Make(cond, input(nn, yy - pad_h, xx - pad_w, cc), ir::Zero(input->type())); + }, + UniqName("input_pad")); auto weights_dilation = Compute( new_weights_shape, diff --git a/python/tests/conv2d_utils.py b/python/tests/conv2d_utils.py index 636968aef6..02c9f880ae 100644 --- a/python/tests/conv2d_utils.py +++ b/python/tests/conv2d_utils.py @@ -73,10 +73,10 @@ def conv2d_native(inputs_data, input_shape, filter_size, attrs, is_depthwise): res_shape = output.shape[1:] pad_shape = list(input_shape) - dialtion_shape = list(filter_size_new) + dilation_shape = list(filter_size_new) assert len(padding) == 2 assert len(pad_shape) == 4 - assert len(dialtion_shape) == 4 + assert len(dilation_shape) == 4 if data_format == "NCHW": h_index = 2 w_index = 3 @@ -86,11 +86,12 @@ def conv2d_native(inputs_data, input_shape, filter_size, attrs, is_depthwise): pad_shape[h_index] += 2 * padding[0] pad_shape[w_index] += 2 * padding[1] - dialtion_shape[h_index] = (filter_size_new[h_index] - 1) * dilation[0] + 1 - dialtion_shape[w_index] = (filter_size_new[w_index] - 1) * dilation[1] + 1 + dilation_shape[2] = (filter_size_new[2] - 1) * dilation[0] + 1 + dilation_shape[3] = (filter_size_new[3] - 1) * dilation[1] + 1 print("pad's shape is:", pad_shape) + print("dilation's shape is:", dilation_shape) if is_depthwise: return output, [pad_shape, res_shape] else: - return output, [pad_shape, dialtion_shape, res_shape] + return output, [pad_shape, dilation_shape, res_shape] diff --git a/python/tests/test_op_nn.py b/python/tests/test_op_nn.py index f1632ef9bd..57c8fa2c9d 100644 --- a/python/tests/test_op_nn.py +++ b/python/tests/test_op_nn.py @@ -46,7 +46,7 @@ def init_testcase(self): self.groups = 1 assert np.mod(self.input_size[1], self.groups) == 0 f_c = self.input_size[1] // self.groups - self.filter_size = [64, f_c, 7, 7] + self.filter_size = [2, f_c, 2, 2] assert np.mod(self.filter_size[0], self.groups) == 0 self.data_format = "NCHW" self.attrs = framework.NodeAttr() @@ -170,9 +170,9 @@ def init_testcase(self): self.filter_size = [16, f_c, 7, 7] self.data_format = "NHWC" self.attrs = framework.NodeAttr() - self.padding = [1, 1] + self.padding = [2, 2] self.stride = [2, 2] - self.dilation = [1, 1] + self.dilation = [2, 2] self.attrs.attr_store = { "stride": self.stride, "padding": self.padding, diff --git a/python/tests/test_pe_reduction.py b/python/tests/test_pe_reduction.py index d803eb6071..51c0cadd93 100644 --- a/python/tests/test_pe_reduction.py +++ b/python/tests/test_pe_reduction.py @@ -99,7 +99,7 @@ def reduction_tester(self, fn_name, cinn_fn, np_fn, axes, keep_dims, stages = create_stages([x.to_tensor()]) if initial: y = cinn_fn(x.to_tensor(), stages, axes_expr, keep_dims, - ir.Expr(initial)) + ir.Expr(initial)) func = lang.lower(func_name, stages, [x.to_tensor(), y]) else: y = cinn_fn(x.to_tensor(), stages, axes_expr, keep_dims)