diff --git a/CMakeLists.txt b/CMakeLists.txt
index d67ead44e..9e969442e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,9 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 
 set(PROJECT_NAME "torch-xpu-ops")
 set(PROJECT_VERSION "2.3.0")
+# Avoid SYCL compiler error
+string(APPEND CMAKE_CXX_FLAGS " -Wno-error=comment")
+
 cmake_policy(SET CMP0048 NEW)
 project(${PROJECT_NAME} VERSION "${PROJECT_VERSION}" LANGUAGES C CXX)
 
diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index e1f3e66f9..ae980a7a8 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -2688,16 +2688,8 @@
     "nn/test_pruning_xpu.py": None,
 
     "test_foreach_xpu.py": (
-        # CPU fallback fails. Implementation difference between CPU and CUDA. Expect success on CPU and expect fail on CUDA. When we use CPU fallback and align expected fail list with CUDA, these cases fail.
-        "test_binary_op_with_scalar_self_support__foreach_pow_is_fastpath_True_xpu_bool",
-        # AssertionError: RuntimeError not raised
-        # https://github.com/intel/torch-xpu-ops/issues/784 
-        "test_0dim_tensor_overload_exception_xpu",
         # RuntimeError: Tried to instantiate dummy base class CUDAGraph
-        "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float32",
-        "test_big_num_tensors__foreach_max_use_cuda_graph_True_xpu_float64",
-        "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float32",
-        "test_big_num_tensors__foreach_norm_use_cuda_graph_True_xpu_float64",
+        "use_cuda_graph_True",
     ),
 
     "nn/test_convolution_xpu.py": (