From 1a013f7a0000695c8223eb87f14a2153ed62b5fd Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 11:32:09 +0800
Subject: [PATCH 01/22] Move sync_diff_inspector into tiflow repo

---
 .github/workflows/dm_binlog_999999.yaml       |    6 +-
 Makefile                                      |   15 +-
 dm/tests/README.md                            |    3 +-
 .../download-compatibility-test-binaries.sh   |    3 -
 .../download-integration-test-binaries.sh     |    1 -
 dm/tests/mariadb_master_down_and_up/case.sh   |    1 -
 dm/tests/mariadb_master_down_and_up/lib.sh    |    6 -
 dm/tests/tiup/lib.sh                          |    6 -
 dm/tests/tiup/upgrade-from-v1.sh              |    2 -
 dm/tests/tiup/upgrade-from-v2.sh              |    2 -
 dm/tests/tiup/upgrade-tidb.sh                 |    2 -
 dm/tests/upstream_switch/case.sh              |    1 -
 dm/tests/upstream_switch/lib.sh               |    6 -
 go.mod                                        |    1 +
 go.sum                                        |    2 +
 scripts/download-sync-diff.sh                 |   23 -
 sync_diff_inspector/README.md                 |   23 +
 .../checkpoints/checkpoints.go                |  243 ++++
 .../checkpoints/checkpoints_test.go           |  120 ++
 sync_diff_inspector/chunk/chunk.go            |  464 ++++++++
 sync_diff_inspector/chunk/chunk_test.go       |  648 ++++++++++
 sync_diff_inspector/config/config.go          |  642 ++++++++++
 sync_diff_inspector/config/config.toml        |   73 ++
 .../config/config_conflict.toml               |   72 ++
 sync_diff_inspector/config/config_dm.toml     |   31 +
 .../config/config_sharding.toml               |   99 ++
 sync_diff_inspector/config/config_test.go     |  112 ++
 sync_diff_inspector/config/dm.go              |  264 ++++
 sync_diff_inspector/config/dm_test.go         |   72 ++
 sync_diff_inspector/config/template.go        |  124 ++
 sync_diff_inspector/diff/diff.go              |  844 +++++++++++++
 sync_diff_inspector/main.go                   |  150 +++
 sync_diff_inspector/progress/progress.go      |  480 ++++++++
 sync_diff_inspector/progress/progress_test.go |  108 ++
 sync_diff_inspector/report/report.go          |  419 +++++++
 sync_diff_inspector/report/report_test.go     |  526 ++++++++
 sync_diff_inspector/source/chunks_iter.go     |  189 +++
 .../source/common/common_test.go              |   64 +
 sync_diff_inspector/source/common/conn.go     |   74 ++
 .../source/common/conn_test.go                |   48 +
 sync_diff_inspector/source/common/rows.go     |  101 ++
 .../source/common/table_diff.go               |   83 ++
 sync_diff_inspector/source/mysql_shard.go     |  390 ++++++
 sync_diff_inspector/source/source.go          |  429 +++++++
 sync_diff_inspector/source/source_test.go     |  955 +++++++++++++++
 sync_diff_inspector/source/tidb.go            |  285 +++++
 sync_diff_inspector/splitter/bucket.go        |  365 ++++++
 sync_diff_inspector/splitter/index_fields.go  |  111 ++
 .../splitter/index_fields_test.go             |  106 ++
 sync_diff_inspector/splitter/limit.go         |  259 ++++
 sync_diff_inspector/splitter/random.go        |  248 ++++
 sync_diff_inspector/splitter/splitter.go      |   87 ++
 sync_diff_inspector/splitter/splitter_test.go |  936 +++++++++++++++
 sync_diff_inspector/utils/pd.go               |  288 +++++
 sync_diff_inspector/utils/table.go            |  187 +++
 sync_diff_inspector/utils/utils.go            | 1059 +++++++++++++++++
 sync_diff_inspector/utils/utils_test.go       |  688 +++++++++++
 57 files changed, 12479 insertions(+), 67 deletions(-)
 delete mode 100755 scripts/download-sync-diff.sh
 create mode 100644 sync_diff_inspector/README.md
 create mode 100644 sync_diff_inspector/checkpoints/checkpoints.go
 create mode 100644 sync_diff_inspector/checkpoints/checkpoints_test.go
 create mode 100644 sync_diff_inspector/chunk/chunk.go
 create mode 100644 sync_diff_inspector/chunk/chunk_test.go
 create mode 100644 sync_diff_inspector/config/config.go
 create mode 100644 sync_diff_inspector/config/config.toml
 create mode 100644 sync_diff_inspector/config/config_conflict.toml
 create mode 100644 sync_diff_inspector/config/config_dm.toml
 create mode 100644 sync_diff_inspector/config/config_sharding.toml
 create mode 100644 sync_diff_inspector/config/config_test.go
 create mode 100644 sync_diff_inspector/config/dm.go
 create mode 100644 sync_diff_inspector/config/dm_test.go
 create mode 100644 sync_diff_inspector/config/template.go
 create mode 100644 sync_diff_inspector/diff/diff.go
 create mode 100644 sync_diff_inspector/main.go
 create mode 100644 sync_diff_inspector/progress/progress.go
 create mode 100644 sync_diff_inspector/progress/progress_test.go
 create mode 100644 sync_diff_inspector/report/report.go
 create mode 100644 sync_diff_inspector/report/report_test.go
 create mode 100644 sync_diff_inspector/source/chunks_iter.go
 create mode 100644 sync_diff_inspector/source/common/common_test.go
 create mode 100755 sync_diff_inspector/source/common/conn.go
 create mode 100644 sync_diff_inspector/source/common/conn_test.go
 create mode 100644 sync_diff_inspector/source/common/rows.go
 create mode 100644 sync_diff_inspector/source/common/table_diff.go
 create mode 100644 sync_diff_inspector/source/mysql_shard.go
 create mode 100644 sync_diff_inspector/source/source.go
 create mode 100644 sync_diff_inspector/source/source_test.go
 create mode 100644 sync_diff_inspector/source/tidb.go
 create mode 100644 sync_diff_inspector/splitter/bucket.go
 create mode 100644 sync_diff_inspector/splitter/index_fields.go
 create mode 100644 sync_diff_inspector/splitter/index_fields_test.go
 create mode 100644 sync_diff_inspector/splitter/limit.go
 create mode 100644 sync_diff_inspector/splitter/random.go
 create mode 100644 sync_diff_inspector/splitter/splitter.go
 create mode 100644 sync_diff_inspector/splitter/splitter_test.go
 create mode 100644 sync_diff_inspector/utils/pd.go
 create mode 100644 sync_diff_inspector/utils/table.go
 create mode 100644 sync_diff_inspector/utils/utils.go
 create mode 100644 sync_diff_inspector/utils/utils_test.go

diff --git a/.github/workflows/dm_binlog_999999.yaml b/.github/workflows/dm_binlog_999999.yaml
index 39ed09558fd..6371b773cab 100644
--- a/.github/workflows/dm_binlog_999999.yaml
+++ b/.github/workflows/dm_binlog_999999.yaml
@@ -44,13 +44,13 @@ jobs:
           key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }}
 
       - name: Build DM binary
-        run: make dm_integration_test_build
+        run: |
+          make dm_integration_test_build
+          make sync_diff_inspector
 
       - name: Setup CI environment
         run: |
           docker-compose -f ./dm/tests/binlog_999999/docker-compose.yml up -d
-          curl http://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
-          mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
           curl http://download.pingcap.org/tidb-nightly-linux-amd64.tar.gz | tar xz
           mv tidb-nightly-linux-amd64/bin/tidb-server bin/
           curl -O https://dl.min.io/server/minio/release/linux-amd64/minio
diff --git a/Makefile b/Makefile
index cb0cc65532d..bd71b9d90f9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 ### Makefile for tiflow
-.PHONY: build test check clean fmt cdc kafka_consumer storage_consumer coverage \
+.PHONY: build test check clean fmt sync-diff-inspector cdc kafka_consumer storage_consumer coverage \
 	integration_test_build integration_test integration_test_mysql integration_test_kafka bank \
 	kafka_docker_integration_test kafka_docker_integration_test_with_build \
 	clean_integration_test_containers \
@@ -13,7 +13,7 @@
 .DEFAULT_GOAL := default
 
 # Adapted from https://www.thapaliya.com/en/writings/well-documented-makefiles/
-help: ## Display this help and any documented user-facing targets. Other undocumented targets may be present in the Makefile.
+help: ## Display this help ann any documented user-facing targets. Other undocumented targets may be present in the Makefile.
 help:
 	@awk 'BEGIN {FS = ": ##"; printf "Usage:\n  make <target>\n\nTargets:\n"} /^[a-zA-Z0-9_\.\-\/%]+: ##/ { printf "  %-45s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
 
@@ -136,7 +136,7 @@ dev: check test
 
 test: unit_test dm_unit_test engine_unit_test
 
-build: cdc dm engine
+build: cdc dm engine sync_diff_inspector
 
 check-makefiles: ## Check the makefiles format. Please run this target after the changes are committed.
 check-makefiles: format-makefiles
@@ -158,6 +158,9 @@ build-cdc-with-failpoint: ## Build cdc with failpoint enabled.
 cdc:
 	$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc ./cmd/cdc
 
+sync_diff_inspector:
+	$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/sync_diff_inspector ./sync_diff_inspector/main.go
+
 kafka_consumer:
 	$(CONSUMER_GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc_kafka_consumer ./cmd/kafka-consumer
 
@@ -218,7 +221,6 @@ check_third_party_binary:
 	@which bin/pd-server
 	@which bin/tiflash
 	@which bin/pd-ctl
-	@which bin/sync_diff_inspector
 	@which bin/go-ycsb
 	@which bin/etcdctl
 	@which bin/jq
@@ -496,7 +498,6 @@ install_test_python_dep:
 
 check_third_party_binary_for_dm:
 	@which bin/tidb-server
-	@which bin/sync_diff_inspector
 	@which mysql
 	@which bin/minio
 
@@ -565,7 +566,6 @@ check_third_party_binary_for_engine:
 	@which mysql || (echo "mysql not found in ${PATH}"; exit 1)
 	@which jq || (echo "jq not found in ${PATH}"; exit 1)
 	@which mc || (echo "mc not found in ${PATH}, you can use 'make bin/mc' and move bin/mc to ${PATH}"; exit 1)
-	@which bin/sync_diff_inspector || (echo "run 'make bin/sync_diff_inspector' to download it if you need")
 
 check_engine_integration_test:
 	./engine/test/utils/check_case.sh
@@ -580,9 +580,6 @@ check_cdc_integration_test:
 bin/mc:
 	./scripts/download-mc.sh
 
-bin/sync_diff_inspector:
-	./scripts/download-sync-diff.sh
-
 define run_engine_unit_test
 	@echo "running unit test for packages:" $(1)
 	mkdir -p $(ENGINE_TEST_DIR)
diff --git a/dm/tests/README.md b/dm/tests/README.md
index f72fe191fee..9d3e4432ad5 100644
--- a/dm/tests/README.md
+++ b/dm/tests/README.md
@@ -4,7 +4,6 @@
 1. The following executables must be copied or generated or linked into these locations.
 
     * `bin/tidb-server` can be downloaded from [tidb-master-linux-amd64](https://download.pingcap.org/tidb-master-linux-amd64.tar.gz) or installed by [tiup](https://github.com/pingcap/tiup), you can use the command `find ~/.tiup -name tidb-server` to locate `tidb-server` binary file and copy it
-    * `bin/sync_diff_inspector` # can be downloaded from [tidb-enterprise-tools-latest-linux-amd64](http://download.pingcap.org/tidb-enterprise-tools-latest-linux-amd64.tar.gz) or build from [source code](https://github.com/pingcap/tidb-tools)
     * `bin/minio` can be build from (https://github.com/minio/minio)
     * `bin/dm-master.test` # generated by `make dm_integration_test_build`
     * `bin/dm-worker.test` # generated by `make dm_integration_test_build`
@@ -32,7 +31,7 @@
 
 ### Integration Test
 
-1. Run `make dm_integration_test_build` to generate DM related binary for integration test
+1. Run `make dm_integration_test_build` and `make sync_diff_inspector` to generate DM related binary for integration test.
 
 2. Setup two MySQL servers (the first one: 5.6 ~ 5.7; the second one: 8.0.21, suggest you are same as [CI](https://github.com/PingCAP-QE/ci/blob/main/jenkins/pipelines/ci/dm/dm_ghpr_new_test.groovy#L164-L172)) with [binlog enabled first](https://dev.mysql.com/doc/refman/5.7/en/replication-howto-masterbaseconfig.html) and [set `GTID_MODE=ON`](https://dev.mysql.com/doc/refman/5.7/en/replication-mode-change-online-enable-gtids.html), You need set the mysql port and root password according to the following table.
 
diff --git a/dm/tests/download-compatibility-test-binaries.sh b/dm/tests/download-compatibility-test-binaries.sh
index cceb8c4432d..8cab339dd39 100755
--- a/dm/tests/download-compatibility-test-binaries.sh
+++ b/dm/tests/download-compatibility-test-binaries.sh
@@ -70,9 +70,6 @@ color-green "Download binaries..."
 download "$tidb_download_url" "tidb-server.tar.gz" "tmp/tidb-server.tar.gz"
 tar -xz -C third_bin bin/tidb-server -f tmp/tidb-server.tar.gz && mv third_bin/bin/tidb-server third_bin/
 
-download "$sync_diff_inspector_download_url" "tidb-enterprise-tools-nightly-linux-amd64.tar.gz" "tmp/tidb-enterprise-tools-nightly-linux-amd64.tar.gz"
-tar -xz -C third_bin tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector -f tmp/tidb-enterprise-tools-nightly-linux-amd64.tar.gz
-mv third_bin/tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector third_bin/ && rm -rf third_bin/tidb-enterprise-tools-nightly-linux-amd64
 download "$mydumper_download_url" "tidb-enterprise-tools-latest-linux-amd64.tar.gz" "tmp/tidb-enterprise-tools-latest-linux-amd64.tar.gz"
 tar -xz -C third_bin tidb-enterprise-tools-latest-linux-amd64/bin/mydumper -f tmp/tidb-enterprise-tools-latest-linux-amd64.tar.gz
 mv third_bin/tidb-enterprise-tools-latest-linux-amd64/bin/mydumper third_bin/ && rm -rf third_bin/tidb-enterprise-tools-latest-linux-amd64
diff --git a/dm/tests/download-integration-test-binaries.sh b/dm/tests/download-integration-test-binaries.sh
index 34dab604e85..6086b1c3367 100755
--- a/dm/tests/download-integration-test-binaries.sh
+++ b/dm/tests/download-integration-test-binaries.sh
@@ -91,7 +91,6 @@ tar -xz -C third_bin 'bin/*' -f tmp/pd-server.tar.gz && mv third_bin/bin/* third
 download "$tikv_download_url" "tikv-server.tar.gz" "tmp/tikv-server.tar.gz"
 tar -xz -C third_bin bin/tikv-server -f tmp/tikv-server.tar.gz && mv third_bin/bin/tikv-server third_bin/
 download "$tidb_tools_download_url" "tidb-tools.tar.gz" "tmp/tidb-tools.tar.gz"
-tar -xz -C third_bin 'bin/sync_diff_inspector' -f tmp/tidb-tools.tar.gz && mv third_bin/bin/sync_diff_inspector third_bin/
 download "$minio_download_url" "minio.tar.gz" "tmp/minio.tar.gz"
 tar -xz -C third_bin -f tmp/minio.tar.gz
 download "$gh_os_download_url" "gh-ost-binary-linux-20200828140552.tar.gz" "tmp/gh-ost-binary-linux-20200828140552.tar.gz"
diff --git a/dm/tests/mariadb_master_down_and_up/case.sh b/dm/tests/mariadb_master_down_and_up/case.sh
index e678d3bcfd2..dc59fe3e1b2 100644
--- a/dm/tests/mariadb_master_down_and_up/case.sh
+++ b/dm/tests/mariadb_master_down_and_up/case.sh
@@ -107,7 +107,6 @@ function clean_task() {
 function test_master_down_and_up() {
 	cleanup_process
 	clean_data
-	install_sync_diff
 	setup_replica
 	gen_full_data
 	run_dm_components_and_create_source $1
diff --git a/dm/tests/mariadb_master_down_and_up/lib.sh b/dm/tests/mariadb_master_down_and_up/lib.sh
index 3d38de273e7..4a548c73425 100644
--- a/dm/tests/mariadb_master_down_and_up/lib.sh
+++ b/dm/tests/mariadb_master_down_and_up/lib.sh
@@ -27,12 +27,6 @@ function exec_tidb() {
 	echo $2 | mysql -uroot -h127.0.0.1 -P$1
 }
 
-function install_sync_diff() {
-	curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
-	mkdir -p bin
-	mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
-}
-
 function get_master_status() {
 	arr=$(echo "show master status;" | MYSQL_PWD=123456 mysql -uroot -h127.0.0.1 -P3306 | awk 'NR==2')
 	echo $arr
diff --git a/dm/tests/tiup/lib.sh b/dm/tests/tiup/lib.sh
index 8b57d9355e7..441fd2da753 100755
--- a/dm/tests/tiup/lib.sh
+++ b/dm/tests/tiup/lib.sh
@@ -56,12 +56,6 @@ function run_sql_tidb_with_retry() {
 	fi
 }
 
-function install_sync_diff() {
-	curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
-	mkdir -p bin
-	mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
-}
-
 function exec_full_stage() {
 	# drop previous data
 	exec_sql mysql1 3306 "DROP DATABASE IF EXISTS $DB1;"
diff --git a/dm/tests/tiup/upgrade-from-v1.sh b/dm/tests/tiup/upgrade-from-v1.sh
index 3520dd0f7b9..75b4244efb0 100755
--- a/dm/tests/tiup/upgrade-from-v1.sh
+++ b/dm/tests/tiup/upgrade-from-v1.sh
@@ -122,8 +122,6 @@ function destroy_v2_by_tiup() {
 }
 
 function test() {
-	install_sync_diff
-
 	deploy_v1_by_ansible
 
 	migrate_in_v1
diff --git a/dm/tests/tiup/upgrade-from-v2.sh b/dm/tests/tiup/upgrade-from-v2.sh
index f5781c3002c..1a1252e94b2 100755
--- a/dm/tests/tiup/upgrade-from-v2.sh
+++ b/dm/tests/tiup/upgrade-from-v2.sh
@@ -170,8 +170,6 @@ function destroy_v2_by_tiup() {
 }
 
 function test() {
-	install_sync_diff
-
 	deploy_previous_v2
 
 	migrate_in_previous_v2
diff --git a/dm/tests/tiup/upgrade-tidb.sh b/dm/tests/tiup/upgrade-tidb.sh
index 434c74cc7a9..1207e512f27 100755
--- a/dm/tests/tiup/upgrade-tidb.sh
+++ b/dm/tests/tiup/upgrade-tidb.sh
@@ -52,8 +52,6 @@ function destroy_v2_by_tiup() {
 
 # run this before upgrade TiDB.
 function before_upgrade() {
-	install_sync_diff
-
 	deploy_dm
 
 	migrate_before_upgrade
diff --git a/dm/tests/upstream_switch/case.sh b/dm/tests/upstream_switch/case.sh
index 012b4df8ff3..185ebdbd878 100644
--- a/dm/tests/upstream_switch/case.sh
+++ b/dm/tests/upstream_switch/case.sh
@@ -208,7 +208,6 @@ function check_master() {
 function test_relay() {
 	cleanup_process
 	check_master
-	install_sync_diff
 	clean_data
 	prepare_binlogs
 	setup_replica
diff --git a/dm/tests/upstream_switch/lib.sh b/dm/tests/upstream_switch/lib.sh
index 65064fb4cb6..b11537d988f 100644
--- a/dm/tests/upstream_switch/lib.sh
+++ b/dm/tests/upstream_switch/lib.sh
@@ -30,12 +30,6 @@ function exec_tidb() {
 	echo $2 | mysql -uroot -h$1 -P4000
 }
 
-function install_sync_diff() {
-	curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
-	mkdir -p bin
-	mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
-}
-
 function prepare_more_binlogs() {
 	exec_sql $1 "create database db1 collate latin1_bin;"
 	exec_sql $1 "flush logs;"
diff --git a/go.mod b/go.mod
index 3757a1ef1dd..cc9ee391f34 100644
--- a/go.mod
+++ b/go.mod
@@ -62,6 +62,7 @@ require (
 	github.com/mailru/easyjson v0.7.7
 	github.com/mattn/go-shellwords v1.0.12
 	github.com/modern-go/reflect2 v1.0.2
+	github.com/olekukonko/tablewriter v0.0.5
 	github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2
 	github.com/pierrec/lz4/v4 v4.1.18
 	github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0
diff --git a/go.sum b/go.sum
index dd74ef9a83f..3e54b7d9b40 100644
--- a/go.sum
+++ b/go.sum
@@ -826,6 +826,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA
 github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
 github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
+github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
+github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
 github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0=
diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh
deleted file mode 100755
index 3ee26c6e505..00000000000
--- a/scripts/download-sync-diff.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2022 PingCAP, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -eu
-
-echo "will download tidb-tools v6.1.0 to get sync_diff_inspector"
-curl -C - --retry 3 -o /tmp/tidb-tools.tar.gz https://download.pingcap.org/tidb-community-toolkit-v6.1.0-linux-amd64.tar.gz
-mkdir -p /tmp/tidb-tools
-tar -zxf /tmp/tidb-tools.tar.gz -C /tmp/tidb-tools
-mv /tmp/tidb-tools/tidb-community-toolkit-v6.1.0-linux-amd64/sync_diff_inspector ./bin/sync_diff_inspector
-rm -r /tmp/tidb-tools
-rm /tmp/tidb-tools.tar.gz
diff --git a/sync_diff_inspector/README.md b/sync_diff_inspector/README.md
new file mode 100644
index 00000000000..87dc46a10c3
--- /dev/null
+++ b/sync_diff_inspector/README.md
@@ -0,0 +1,23 @@
+# sync-diff-inspector
+
+sync-diff-inspector is a tool for comparing two database's data.
+
+## How to use
+
+```shell
+Usage of diff:
+  -V, --version                  print version of sync_diff_inspector
+  -L, --log-level string         log level: debug, info, warn, error, fatal (default "info")
+  -C, --config string            Config file
+  -T, --template string          <dm|norm> export a template config file
+      --dm-addr string           the address of DM
+      --dm-task string           identifier of dm task
+      --check-thread-count int   how many goroutines are created to check data (default 4)
+      --export-fix-sql           set true if want to compare rows or set to false will only compare checksum (default true)
+```
+
+For more details you can read the [config.toml](./config/config.toml), [config_sharding.toml](./config/config_sharding.toml) and [config_dm.toml](./config/config_dm.toml).
+
+## Documents
+- `zh`: [Overview in Chinese](https://docs.pingcap.com/zh/tidb/stable/sync-diff-inspector-overview)
+- `en`: [Overview in English](https://docs.pingcap.com/tidb/stable/sync-diff-inspector-overview)
diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go
new file mode 100644
index 00000000000..010fdd2a9c2
--- /dev/null
+++ b/sync_diff_inspector/checkpoints/checkpoints.go
@@ -0,0 +1,243 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package checkpoints
+
+import (
+	"container/heap"
+	"context"
+	"encoding/json"
+	"os"
+	"sync"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/report"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/siddontang/go/ioutil2"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"go.uber.org/zap"
+)
+
+const (
+	// SuccessState
+	// for chunk: means this chunk's data is equal
+	// for table: means this all chunk in this table is equal(except ignore chunk)
+	SuccessState = "success"
+
+	// FailedState
+	// for chunk: means this chunk's data is not equal
+	// for table: means some chunks' data is not equal or some chunk check failed in this table
+	FailedState = "failed"
+
+	// IgnoreState
+	// for chunk: this chunk is ignored. if it is Empty chunk, will ignore some chunk
+	// for table: don't have this state
+	IgnoreState = "ignore"
+)
+
+type Node struct {
+	State string `json:"state"` // indicate the state ("success" or "failed") of the chunk
+
+	ChunkRange *chunk.Range `json:"chunk-range"`
+	IndexID    int64        `json:"index-id"`
+}
+
+func (n *Node) GetID() *chunk.ChunkID { return n.ChunkRange.Index }
+
+func (n *Node) GetState() string { return n.State }
+
+func (n *Node) GetTableIndex() int { return n.ChunkRange.Index.TableIndex }
+
+func (n *Node) GetBucketIndexLeft() int { return n.ChunkRange.Index.BucketIndexLeft }
+
+func (n *Node) GetBucketIndexRight() int { return n.ChunkRange.Index.BucketIndexRight }
+
+func (n *Node) GetChunkIndex() int { return n.ChunkRange.Index.ChunkIndex }
+
+// IsAdjacent represents whether the next node is adjacent node.
+// it's the important logic for checkpoint update.
+// we need keep this node save to checkpoint in global order.
+func (n *Node) IsAdjacent(next *Node) bool {
+	if n.GetTableIndex() == next.GetTableIndex()-1 {
+		if n.ChunkRange.IsLastChunkForTable() && next.ChunkRange.IsFirstChunkForTable() {
+			return true
+		}
+		return false
+	}
+	if n.GetTableIndex() == next.GetTableIndex() {
+		// same table
+		if n.GetBucketIndexRight() == next.GetBucketIndexLeft()-1 {
+			if n.ChunkRange.IsLastChunkForBucket() && next.ChunkRange.IsFirstChunkForBucket() {
+				return true
+			}
+			return false
+		}
+		if n.GetBucketIndexLeft() == next.GetBucketIndexLeft() {
+			return n.GetChunkIndex() == next.GetChunkIndex()-1
+		}
+		return false
+	}
+	return false
+}
+
+// IsLess represents whether the cur node is less than next node.
+func (n *Node) IsLess(next *Node) bool {
+	if n.GetTableIndex() < next.GetTableIndex() {
+		return true
+	}
+	if n.GetTableIndex() == next.GetTableIndex() {
+		if n.GetBucketIndexLeft() <= next.GetBucketIndexLeft()-1 {
+			return true
+		}
+		if n.GetBucketIndexLeft() == next.GetBucketIndexLeft() {
+			return n.GetChunkIndex() < next.GetChunkIndex()
+		}
+		return false
+	}
+	return false
+}
+
+// heap maintain a Min Heap, which can be accessed by multiple threads and protected by mutex.
+type nodeHeap struct {
+	Nodes            []*Node
+	CurrentSavedNode *Node       // CurrentSavedNode save the minimum checker chunk, updated by `GetChunkSnapshot` method
+	mu               *sync.Mutex // protect critical section
+}
+
+// Checkpoint provide the ability to restart the sync-diff process from the
+// latest previous exit point (due to error or intention).
+type Checkpoint struct {
+	hp *nodeHeap
+}
+
+// SaveState contains the information of the latest checked chunk and state of `report`
+// When sync-diff start from the checkpoint, it will load this information and continue running
+type SavedState struct {
+	Chunk  *Node          `json:"chunk-info"`
+	Report *report.Report `json:"report-info"`
+}
+
+// InitCurrentSavedID the method is only used in initialization without lock, be cautious
+func (cp *Checkpoint) InitCurrentSavedID(n *Node) {
+	cp.hp.CurrentSavedNode = n
+}
+
+func (cp *Checkpoint) GetCurrentSavedID() *Node {
+	cp.hp.mu.Lock()
+	defer cp.hp.mu.Unlock()
+	return cp.hp.CurrentSavedNode
+}
+
+func (cp *Checkpoint) Insert(node *Node) {
+	cp.hp.mu.Lock()
+	heap.Push(cp.hp, node)
+	cp.hp.mu.Unlock()
+}
+
+// Len - get the length of the heap
+func (hp *nodeHeap) Len() int { return len(hp.Nodes) }
+
+// Less - determine which is more priority than another
+func (hp *nodeHeap) Less(i, j int) bool {
+	return hp.Nodes[i].IsLess(hp.Nodes[j])
+}
+
+// Swap - implementation of swap for the heap interface
+func (hp *nodeHeap) Swap(i, j int) {
+	hp.Nodes[i], hp.Nodes[j] = hp.Nodes[j], hp.Nodes[i]
+}
+
+// Push - implementation of push for the heap interface
+func (hp *nodeHeap) Push(x interface{}) {
+	hp.Nodes = append(hp.Nodes, x.(*Node))
+}
+
+// Pop - implementation of pop for heap interface
+func (hp *nodeHeap) Pop() (item interface{}) {
+	if len(hp.Nodes) == 0 {
+		return
+	}
+
+	hp.Nodes, item = hp.Nodes[:len(hp.Nodes)-1], hp.Nodes[len(hp.Nodes)-1]
+	return
+}
+
+func (cp *Checkpoint) Init() {
+	hp := &nodeHeap{
+		mu:    &sync.Mutex{},
+		Nodes: make([]*Node, 0),
+		CurrentSavedNode: &Node{
+			ChunkRange: &chunk.Range{
+				Index:   chunk.GetInitChunkID(),
+				IsFirst: true,
+				IsLast:  true,
+			},
+		},
+	}
+	heap.Init(hp)
+	cp.hp = hp
+}
+
+// GetChunkSnapshot get the snapshot of the minimum continuous checked chunk
+func (cp *Checkpoint) GetChunkSnapshot() (cur *Node) {
+	cp.hp.mu.Lock()
+	defer cp.hp.mu.Unlock()
+	for cp.hp.Len() != 0 && cp.hp.CurrentSavedNode.IsAdjacent(cp.hp.Nodes[0]) {
+		cp.hp.CurrentSavedNode = heap.Pop(cp.hp).(*Node)
+		cur = cp.hp.CurrentSavedNode
+	}
+	// wait for next 10s to check
+	return cur
+}
+
+// SaveChunk saves the chunk to file.
+func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, reportInfo *report.Report) (*chunk.ChunkID, error) {
+	if cur == nil {
+		return nil, nil
+	}
+
+	savedState := &SavedState{
+		Chunk:  cur,
+		Report: reportInfo,
+	}
+	checkpointData, err := json.Marshal(savedState)
+	if err != nil {
+		log.Warn("fail to save the chunk to the file", zap.Any("chunk index", cur.GetID()), zap.Error(err))
+		return nil, errors.Trace(err)
+	}
+
+	if err = ioutil2.WriteFileAtomic(fileName, checkpointData, config.LocalFilePerm); err != nil {
+		return nil, err
+	}
+	log.Info("save checkpoint",
+		zap.Any("chunk", cur),
+		zap.String("state", cur.GetState()))
+	return cur.GetID(), nil
+}
+
+// LoadChunk loads chunk info from file `chunk`
+func (cp *Checkpoint) LoadChunk(fileName string) (*Node, *report.Report, error) {
+	bytes, err := os.ReadFile(fileName)
+	if err != nil {
+		return nil, nil, errors.Trace(err)
+	}
+	n := &SavedState{}
+	err = json.Unmarshal(bytes, n)
+	if err != nil {
+		return nil, nil, errors.Trace(err)
+	}
+	return n.Chunk, n.Report, nil
+}
diff --git a/sync_diff_inspector/checkpoints/checkpoints_test.go b/sync_diff_inspector/checkpoints/checkpoints_test.go
new file mode 100644
index 00000000000..29b1a76a586
--- /dev/null
+++ b/sync_diff_inspector/checkpoints/checkpoints_test.go
@@ -0,0 +1,120 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package checkpoints
+
+import (
+	"context"
+	"math/rand"
+	"os"
+	"strconv"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSaveChunk(t *testing.T) {
+	checker := new(Checkpoint)
+	checker.Init()
+	ctx := context.Background()
+	cur := checker.GetChunkSnapshot()
+	id, err := checker.SaveChunk(ctx, "TestSaveChunk", cur, nil)
+	require.NoError(t, err)
+	require.Nil(t, id)
+	wg := &sync.WaitGroup{}
+	rounds := 100
+	for i := 0; i < rounds; i++ {
+		wg.Add(1)
+		go func(i int) {
+			node := &Node{
+				ChunkRange: &chunk.Range{
+					Index: &chunk.ChunkID{
+						TableIndex:       0,
+						BucketIndexLeft:  i / 10,
+						BucketIndexRight: i / 10,
+						ChunkIndex:       i % 10,
+						ChunkCnt:         10,
+					},
+					Bounds: []*chunk.Bound{
+						{
+							HasLower: i != 0,
+							Lower:    strconv.Itoa(i + 1000),
+							Upper:    strconv.Itoa(i + 1000 + 1),
+							HasUpper: i != rounds,
+						},
+					},
+				},
+
+				State: SuccessState,
+			}
+			if rand.Intn(4) == 0 {
+				time.Sleep(time.Duration(rand.Intn(3)) * time.Second)
+			}
+			checker.Insert(node)
+			wg.Done()
+		}(i)
+	}
+	wg.Wait()
+	defer os.Remove("TestSaveChunk")
+
+	cur = checker.GetChunkSnapshot()
+	require.NotNil(t, cur)
+	id, err = checker.SaveChunk(ctx, "TestSaveChunk", cur, nil)
+	require.NoError(t, err)
+	require.Equal(t, id.Compare(&chunk.ChunkID{TableIndex: 0, BucketIndexLeft: 9, BucketIndexRight: 9, ChunkIndex: 9}), 0)
+}
+
+func TestLoadChunk(t *testing.T) {
+	checker := new(Checkpoint)
+	checker.Init()
+	ctx := context.Background()
+	rounds := 100
+	wg := &sync.WaitGroup{}
+	for i := 0; i < rounds; i++ {
+		wg.Add(1)
+		go func(i int) {
+			node := &Node{
+				ChunkRange: &chunk.Range{
+					Bounds: []*chunk.Bound{
+						{
+							HasLower: i != 0,
+							Lower:    strconv.Itoa(i + 1000),
+							Upper:    strconv.Itoa(i + 1000 + 1),
+							HasUpper: i != rounds,
+						},
+					},
+					Index: &chunk.ChunkID{
+						TableIndex:       0,
+						BucketIndexLeft:  i / 10,
+						BucketIndexRight: i / 10,
+						ChunkIndex:       i % 10,
+						ChunkCnt:         10,
+					},
+				},
+			}
+			checker.Insert(node)
+			wg.Done()
+		}(i)
+	}
+	wg.Wait()
+	defer os.Remove("TestLoadChunk")
+	cur := checker.GetChunkSnapshot()
+	id, err := checker.SaveChunk(ctx, "TestLoadChunk", cur, nil)
+	require.NoError(t, err)
+	node, _, err := checker.LoadChunk("TestLoadChunk")
+	require.NoError(t, err)
+	require.Equal(t, node.GetID().Compare(id), 0)
+}
diff --git a/sync_diff_inspector/chunk/chunk.go b/sync_diff_inspector/chunk/chunk.go
new file mode 100644
index 00000000000..6943f413d96
--- /dev/null
+++ b/sync_diff_inspector/chunk/chunk.go
@@ -0,0 +1,464 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"go.uber.org/zap"
+)
+
+const (
+	lt  = "<"
+	lte = "<="
+	gt  = ">"
+)
+
+type ChunkType int
+
+const (
+	Bucket ChunkType = iota + 1
+	Random
+	Limit
+	Others
+	Empty
+)
+
+// Bound represents a bound for a column
+type Bound struct {
+	Column string `json:"column"`
+	Lower  string `json:"lower"`
+	Upper  string `json:"upper"`
+
+	HasLower bool `json:"has-lower"`
+	HasUpper bool `json:"has-upper"`
+}
+
+// ChunkID is to identify the sequence of chunks
+type ChunkID struct {
+	TableIndex int `json:"table-index"`
+	// we especially treat random split has only one bucket
+	// which is the whole table
+	// range is [left, right]
+	BucketIndexLeft  int `json:"bucket-index-left"`
+	BucketIndexRight int `json:"bucket-index-right"`
+	ChunkIndex       int `json:"chunk-index"`
+	//  `ChunkCnt` is the number of chunks in this bucket
+	//  We can compare `ChunkIndex` and `ChunkCnt` to know
+	// whether this chunk is the last one
+	ChunkCnt int `json:"chunk-count"`
+}
+
+func GetInitChunkID() *ChunkID {
+	return &ChunkID{
+		TableIndex:       -1,
+		BucketIndexLeft:  -1,
+		BucketIndexRight: -1,
+		ChunkIndex:       -1,
+		ChunkCnt:         0,
+	}
+}
+
+func (c *ChunkID) Compare(o *ChunkID) int {
+	if c.TableIndex < o.TableIndex {
+		return -1
+	}
+	if c.TableIndex > o.TableIndex {
+		return 1
+	}
+
+	// c.TableIndex == o.TableIndex
+	if c.BucketIndexLeft < o.BucketIndexLeft {
+		return -1
+	}
+	if c.BucketIndexLeft > o.BucketIndexLeft {
+		return 1
+	}
+	// c.BucketIndexLeft == o.BucketIndexLeft
+	if c.ChunkIndex < o.ChunkIndex {
+		return -1
+	}
+	if c.ChunkIndex == o.ChunkIndex {
+		return 0
+	}
+	return 1
+}
+
+func (c *ChunkID) Copy() *ChunkID {
+	cp := *c
+	return &cp
+}
+
+func (c *ChunkID) ToString() string {
+	return fmt.Sprintf("%d:%d-%d:%d:%d", c.TableIndex, c.BucketIndexLeft, c.BucketIndexRight, c.ChunkIndex, c.ChunkCnt)
+}
+
+func (c *ChunkID) FromString(s string) error {
+	ids := strings.Split(s, ":")
+	tableIndex, err := strconv.Atoi(ids[0])
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	bucketIndex := strings.Split(ids[1], "-")
+	bucketIndexLeft, err := strconv.Atoi(bucketIndex[0])
+	if err != nil {
+		return errors.Trace(err)
+	}
+	bucketIndexRight, err := strconv.Atoi(bucketIndex[1])
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	chunkIndex, err := strconv.Atoi(ids[2])
+	if err != nil {
+		return errors.Trace(err)
+	}
+	chunkCnt, err := strconv.Atoi(ids[3])
+	if err != nil {
+		return errors.Trace(err)
+	}
+	c.TableIndex, c.BucketIndexLeft, c.BucketIndexRight, c.ChunkIndex, c.ChunkCnt = tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, chunkCnt
+	return nil
+}
+
+// Range represents chunk range
+type Range struct {
+	Index   *ChunkID  `json:"index"`
+	Type    ChunkType `json:"type"`
+	Bounds  []*Bound  `json:"bounds"`
+	IsFirst bool      `json:"is-first"`
+	IsLast  bool      `json:"is-last"`
+
+	Where string        `json:"where"`
+	Args  []interface{} `json:"args"`
+
+	columnOffset map[string]int
+}
+
+func (r *Range) IsFirstChunkForBucket() bool {
+	return r.Index.ChunkIndex == 0
+}
+
+func (r *Range) IsLastChunkForBucket() bool {
+	return r.Index.ChunkIndex == r.Index.ChunkCnt-1
+}
+
+// NewChunkRange return a Range.
+func NewChunkRange() *Range {
+	return &Range{
+		Bounds:       make([]*Bound, 0, 2),
+		columnOffset: make(map[string]int),
+		Index:        &ChunkID{},
+	}
+}
+
+// NewChunkRangeOffset return a Range in sequence
+func NewChunkRangeOffset(columnOffset map[string]int) *Range {
+	bounds := make([]*Bound, len(columnOffset))
+	for column, offset := range columnOffset {
+		bounds[offset] = &Bound{
+			Column:   column,
+			HasLower: false,
+			HasUpper: false,
+		}
+	}
+	return &Range{
+		Bounds:       bounds,
+		columnOffset: columnOffset,
+	}
+}
+
+func (c *Range) IsLastChunkForTable() bool {
+	if c.IsLast {
+		return true
+	}
+	// calculate from bounds
+	for _, b := range c.Bounds {
+		if b.HasUpper {
+			return false
+		}
+	}
+	return true
+}
+
+func (c *Range) IsFirstChunkForTable() bool {
+	if c.IsFirst {
+		return true
+	}
+	// calculate from bounds
+	for _, b := range c.Bounds {
+		if b.HasLower {
+			return false
+		}
+	}
+	return true
+}
+
+// String returns the string of Range, used for log.
+func (c *Range) String() string {
+	chunkBytes, err := json.Marshal(c)
+	if err != nil {
+		log.Warn("fail to encode chunk into string", zap.Error(err))
+		return ""
+	}
+
+	return string(chunkBytes)
+}
+
+func (c *Range) ToString(collation string) (string, []interface{}) {
+	if collation != "" {
+		collation = fmt.Sprintf(" COLLATE '%s'", collation)
+	}
+
+	/* for example:
+	there is a bucket in TiDB, and the lowerbound and upperbound are (A, B1, C1), (A, B2, C2), and the columns are `a`, `b` and `c`,
+	this bucket's data range is (a = A) AND (b > B1 or (b == B1 and c > C1)) AND (b < B2 or (b == B2 and c <= C2))
+	*/
+
+	sameCondition := make([]string, 0, 1)
+	lowerCondition := make([]string, 0, 1)
+	upperCondition := make([]string, 0, 1)
+	sameArgs := make([]interface{}, 0, 1)
+	lowerArgs := make([]interface{}, 0, 1)
+	upperArgs := make([]interface{}, 0, 1)
+
+	preConditionForLower := make([]string, 0, 1)
+	preConditionForUpper := make([]string, 0, 1)
+	preConditionArgsForLower := make([]interface{}, 0, 1)
+	preConditionArgsForUpper := make([]interface{}, 0, 1)
+
+	i := 0
+	for ; i < len(c.Bounds); i++ {
+		bound := c.Bounds[i]
+		if !(bound.HasLower && bound.HasUpper) {
+			break
+		}
+
+		if bound.Lower != bound.Upper {
+			break
+		}
+
+		sameCondition = append(sameCondition, fmt.Sprintf("%s%s = ?", dbutil.ColumnName(bound.Column), collation))
+		sameArgs = append(sameArgs, bound.Lower)
+	}
+
+	if i == len(c.Bounds) && i > 0 {
+		// All the columns are equal in bounds, should return FALSE!
+		return "FALSE", nil
+	}
+
+	for ; i < len(c.Bounds); i++ {
+		bound := c.Bounds[i]
+		lowerSymbol := gt
+		upperSymbol := lt
+		if i == len(c.Bounds)-1 {
+			upperSymbol = lte
+		}
+
+		if bound.HasLower {
+			if len(preConditionForLower) > 0 {
+				lowerCondition = append(lowerCondition, fmt.Sprintf("(%s AND %s%s %s ?)", strings.Join(preConditionForLower, " AND "), dbutil.ColumnName(bound.Column), collation, lowerSymbol))
+				lowerArgs = append(append(lowerArgs, preConditionArgsForLower...), bound.Lower)
+			} else {
+				lowerCondition = append(lowerCondition, fmt.Sprintf("(%s%s %s ?)", dbutil.ColumnName(bound.Column), collation, lowerSymbol))
+				lowerArgs = append(lowerArgs, bound.Lower)
+			}
+			preConditionForLower = append(preConditionForLower, fmt.Sprintf("%s%s = ?", dbutil.ColumnName(bound.Column), collation))
+			preConditionArgsForLower = append(preConditionArgsForLower, bound.Lower)
+		}
+
+		if bound.HasUpper {
+			if len(preConditionForUpper) > 0 {
+				upperCondition = append(upperCondition, fmt.Sprintf("(%s AND %s%s %s ?)", strings.Join(preConditionForUpper, " AND "), dbutil.ColumnName(bound.Column), collation, upperSymbol))
+				upperArgs = append(append(upperArgs, preConditionArgsForUpper...), bound.Upper)
+			} else {
+				upperCondition = append(upperCondition, fmt.Sprintf("(%s%s %s ?)", dbutil.ColumnName(bound.Column), collation, upperSymbol))
+				upperArgs = append(upperArgs, bound.Upper)
+			}
+			preConditionForUpper = append(preConditionForUpper, fmt.Sprintf("%s%s = ?", dbutil.ColumnName(bound.Column), collation))
+			preConditionArgsForUpper = append(preConditionArgsForUpper, bound.Upper)
+		}
+	}
+
+	if len(sameCondition) == 0 {
+		if len(upperCondition) == 0 && len(lowerCondition) == 0 {
+			return "TRUE", nil
+		}
+
+		if len(upperCondition) == 0 {
+			return strings.Join(lowerCondition, " OR "), lowerArgs
+		}
+
+		if len(lowerCondition) == 0 {
+			return strings.Join(upperCondition, " OR "), upperArgs
+		}
+
+		return fmt.Sprintf("(%s) AND (%s)", strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(lowerArgs, upperArgs...)
+	} else {
+		if len(upperCondition) == 0 && len(lowerCondition) == 0 {
+			return strings.Join(sameCondition, " AND "), sameArgs
+		}
+
+		if len(upperCondition) == 0 {
+			return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR ")), append(sameArgs, lowerArgs...)
+		}
+
+		if len(lowerCondition) == 0 {
+			return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(upperCondition, " OR ")), append(sameArgs, upperArgs...)
+		}
+
+		return fmt.Sprintf("(%s) AND (%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(append(sameArgs, lowerArgs...), upperArgs...)
+	}
+}
+
+func (c *Range) ToMeta() string {
+	lowerCondition := make([]string, 0, 1)
+	upperCondition := make([]string, 0, 1)
+	columnName := make([]string, 0, 1)
+	for _, bound := range c.Bounds {
+		columnName = append(columnName, bound.Column)
+		if bound.HasLower {
+			lowerCondition = append(lowerCondition, bound.Lower)
+		}
+		if bound.HasUpper {
+			upperCondition = append(upperCondition, bound.Upper)
+		}
+	}
+	if len(upperCondition) == 0 && len(lowerCondition) == 0 {
+		return "range in sequence: Full"
+	}
+	if len(upperCondition) == 0 {
+		return fmt.Sprintf("range in sequence: (%s) < (%s)", strings.Join(lowerCondition, ","), strings.Join(columnName, ","))
+	}
+	if len(lowerCondition) == 0 {
+		return fmt.Sprintf("range in sequence: (%s) <= (%s)", strings.Join(columnName, ","), strings.Join(upperCondition, ","))
+	}
+	return fmt.Sprintf("range in sequence: (%s) < (%s) <= (%s)", strings.Join(lowerCondition, ","), strings.Join(columnName, ","), strings.Join(upperCondition, ","))
+}
+
+func (c *Range) addBound(bound *Bound) {
+	c.Bounds = append(c.Bounds, bound)
+	c.columnOffset[bound.Column] = len(c.Bounds) - 1
+}
+
+func (c *Range) Update(column, lower, upper string, updateLower, updateUpper bool) {
+	if offset, ok := c.columnOffset[column]; ok {
+		// update the bound
+		if updateLower {
+			c.Bounds[offset].Lower = lower
+			c.Bounds[offset].HasLower = true
+		}
+		if updateUpper {
+			c.Bounds[offset].Upper = upper
+			c.Bounds[offset].HasUpper = true
+		}
+
+		return
+	}
+
+	// add a new bound
+	c.addBound(&Bound{
+		Column:   column,
+		Lower:    lower,
+		Upper:    upper,
+		HasLower: updateLower,
+		HasUpper: updateUpper,
+	})
+}
+
+func (c *Range) Copy() *Range {
+	newChunk := NewChunkRange()
+	for _, bound := range c.Bounds {
+		newChunk.addBound(&Bound{
+			Column:   bound.Column,
+			Lower:    bound.Lower,
+			Upper:    bound.Upper,
+			HasLower: bound.HasLower,
+			HasUpper: bound.HasUpper,
+		})
+	}
+
+	return newChunk
+}
+
+func (c *Range) Clone() *Range {
+	newChunk := NewChunkRange()
+	for _, bound := range c.Bounds {
+		newChunk.addBound(&Bound{
+			Column:   bound.Column,
+			Lower:    bound.Lower,
+			Upper:    bound.Upper,
+			HasLower: bound.HasLower,
+			HasUpper: bound.HasUpper,
+		})
+	}
+	newChunk.Type = c.Type
+	newChunk.Where = c.Where
+	newChunk.Args = c.Args
+	for i, v := range c.columnOffset {
+		newChunk.columnOffset[i] = v
+	}
+	newChunk.Index = c.Index.Copy()
+	newChunk.IsFirst = c.IsFirst
+	newChunk.IsLast = c.IsLast
+	return newChunk
+}
+
+func (c *Range) CopyAndUpdate(column, lower, upper string, updateLower, updateUpper bool) *Range {
+	newChunk := c.Copy()
+	newChunk.Update(column, lower, upper, updateLower, updateUpper)
+	return newChunk
+}
+
+// Notice: chunk may contain not only one bucket, which can be expressed as a range [3, 5],
+//
+//	And `lastBucketID` means the `5` and `firstBucketID` means the `3`.
+func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, index int, collation, limits string, chunkCnt int) {
+	if chunks == nil {
+		return
+	}
+	for _, chunk := range chunks {
+		conditions, args := chunk.ToString(collation)
+		chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits)
+		chunk.Args = args
+		chunk.Index = &ChunkID{
+			BucketIndexLeft:  firstBucketID,
+			BucketIndexRight: lastBucketID,
+			ChunkIndex:       index,
+			ChunkCnt:         chunkCnt,
+		}
+		chunk.Type = t
+		index++
+	}
+}
+
+func InitChunk(chunk *Range, t ChunkType, firstBucketID, lastBucketID int, collation, limits string) {
+	conditions, args := chunk.ToString(collation)
+	chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits)
+	chunk.Args = args
+	chunk.Index = &ChunkID{
+		BucketIndexLeft:  firstBucketID,
+		BucketIndexRight: lastBucketID,
+		ChunkIndex:       0,
+		ChunkCnt:         1,
+	}
+	chunk.Type = t
+}
diff --git a/sync_diff_inspector/chunk/chunk_test.go b/sync_diff_inspector/chunk/chunk_test.go
new file mode 100644
index 00000000000..b5d62dd9449
--- /dev/null
+++ b/sync_diff_inspector/chunk/chunk_test.go
@@ -0,0 +1,648 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestChunkUpdate(t *testing.T) {
+	chunk := &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "2",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: true,
+				HasUpper: true,
+			},
+		},
+	}
+
+	testCases := []struct {
+		boundArgs  []string
+		expectStr  string
+		expectArgs []interface{}
+	}{
+		{
+			[]string{"a", "5", "6"},
+			"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+			[]interface{}{"5", "5", "3", "6", "6", "4"},
+		}, {
+			[]string{"b", "5", "6"},
+			"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+			[]interface{}{"1", "1", "5", "2", "2", "6"},
+		}, {
+			[]string{"c", "7", "8"},
+			"((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))",
+			[]interface{}{"1", "1", "3", "1", "3", "7", "2", "2", "4", "2", "4", "8"},
+		},
+	}
+
+	for _, cs := range testCases {
+		newChunk := chunk.CopyAndUpdate(cs.boundArgs[0], cs.boundArgs[1], cs.boundArgs[2], true, true)
+		conditions, args := newChunk.ToString("")
+		require.Equal(t, conditions, cs.expectStr)
+		require.Equal(t, args, cs.expectArgs)
+	}
+
+	// the origin chunk is not changed
+	conditions, args := chunk.ToString("")
+	require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))")
+	expectArgs := []interface{}{"1", "1", "3", "2", "2", "4"}
+	require.Equal(t, args, expectArgs)
+
+	// test chunk update build by offset
+	columnOffset := map[string]int{
+		"a": 1,
+		"b": 0,
+	}
+	chunkRange := NewChunkRangeOffset(columnOffset)
+	chunkRange.Update("a", "1", "2", true, true)
+	chunkRange.Update("b", "3", "4", true, true)
+	require.Equal(t, chunkRange.ToMeta(), "range in sequence: (3,1) < (b,a) <= (4,2)")
+}
+
+func TestChunkToString(t *testing.T) {
+	// lower & upper
+	chunk := &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "2",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: true,
+				HasUpper: true,
+			},
+		},
+	}
+
+	conditions, args := chunk.ToString("")
+	require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))")
+	expectArgs := []string{"1", "1", "3", "1", "3", "5", "2", "2", "4", "2", "4", "6"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	conditions, args = chunk.ToString("latin1")
+	require.Equal(t, conditions, "((`a` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)) AND ((`a` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?))")
+	expectArgs = []string{"1", "1", "3", "1", "3", "5", "2", "2", "4", "2", "4", "6"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":true},{"column":"c","lower":"5","upper":"6","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c) <= (2,4,6)")
+
+	// upper
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "2",
+				HasLower: false,
+				HasUpper: true,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: false,
+				HasUpper: true,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: false,
+				HasUpper: true,
+			},
+		},
+	}
+
+	conditions, args = chunk.ToString("latin1")
+	require.Equal(t, conditions, "(`a` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?)")
+	expectArgs = []string{"2", "2", "4", "2", "4", "6"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":false,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":true},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (a,b,c) <= (2,4,6)")
+
+	// lower
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "2",
+				HasLower: true,
+				HasUpper: false,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: true,
+				HasUpper: false,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: true,
+				HasUpper: false,
+			},
+		},
+	}
+
+	conditions, args = chunk.ToString("")
+	require.Equal(t, conditions, "(`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)")
+	expectArgs = []string{"1", "1", "3", "1", "3", "5"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	conditions, args = chunk.ToString("latin1")
+	require.Equal(t, conditions, "(`a` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)")
+	expectArgs = []string{"1", "1", "3", "1", "3", "5"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":true,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":true,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c)")
+
+	// none
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "2",
+				HasLower: false,
+				HasUpper: false,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: false,
+				HasUpper: false,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: false,
+				HasUpper: false,
+			},
+		},
+	}
+	conditions, args = chunk.ToString("")
+	require.Equal(t, conditions, "TRUE")
+	expectArgs = []string{}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":false,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: Full")
+
+	// same & lower & upper
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "1",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "5",
+				HasLower: true,
+				HasUpper: true,
+			},
+		},
+	}
+
+	conditions, args = chunk.ToString("")
+	require.Equal(t, conditions, "(`a` = ?) AND ((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))")
+	expectArgs = []string{"1", "3", "3", "5", "4", "4", "5"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	conditions, args = chunk.ToString("latin1")
+	require.Equal(t, conditions, "(`a` COLLATE 'latin1' = ?) AND ((`b` COLLATE 'latin1' > ?) OR (`b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)) AND ((`b` COLLATE 'latin1' < ?) OR (`b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?))")
+	expectArgs = []string{"1", "3", "3", "5", "4", "4", "5"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":true},{"column":"c","lower":"5","upper":"5","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c) <= (1,4,5)")
+
+	// same & upper
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "2",
+				Upper:    "2",
+				HasLower: false,
+				HasUpper: true,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: false,
+				HasUpper: true,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: false,
+				HasUpper: true,
+			},
+		},
+	}
+
+	conditions, args = chunk.ToString("latin1")
+	require.Equal(t, conditions, "(`a` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?)")
+	expectArgs = []string{"2", "2", "4", "2", "4", "6"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"2","upper":"2","has-lower":false,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":true},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (a,b,c) <= (2,4,6)")
+
+	// same & lower
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "1",
+				HasLower: true,
+				HasUpper: false,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: true,
+				HasUpper: false,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: true,
+				HasUpper: false,
+			},
+		},
+	}
+
+	conditions, args = chunk.ToString("")
+	require.Equal(t, conditions, "(`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)")
+	expectArgs = []string{"1", "1", "3", "1", "3", "5"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	conditions, args = chunk.ToString("latin1")
+	require.Equal(t, conditions, "(`a` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)")
+	expectArgs = []string{"1", "1", "3", "1", "3", "5"}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":true,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c)")
+
+	// same & none
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "1",
+				HasLower: false,
+				HasUpper: false,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "4",
+				HasLower: false,
+				HasUpper: false,
+			}, {
+				Column:   "c",
+				Lower:    "5",
+				Upper:    "6",
+				HasLower: false,
+				HasUpper: false,
+			},
+		},
+	}
+	conditions, args = chunk.ToString("")
+	require.Equal(t, conditions, "TRUE")
+	expectArgs = []string{}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":false,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: Full")
+
+	// all equal
+	chunk = &Range{
+		Bounds: []*Bound{
+			{
+				Column:   "a",
+				Lower:    "1",
+				Upper:    "1",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "b",
+				Lower:    "3",
+				Upper:    "3",
+				HasLower: true,
+				HasUpper: true,
+			}, {
+				Column:   "c",
+				Lower:    "6",
+				Upper:    "6",
+				HasLower: true,
+				HasUpper: true,
+			},
+		},
+	}
+	conditions, args = chunk.ToString("")
+	require.Equal(t, conditions, "FALSE")
+	expectArgs = []string{}
+	for i, arg := range args {
+		require.Equal(t, arg, expectArgs[i])
+	}
+	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"3","has-lower":true,"has-upper":true},{"column":"c","lower":"6","upper":"6","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`)
+	require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,6) < (a,b,c) <= (1,3,6)")
+
+}
+
+func TestChunkInit(t *testing.T) {
+	chunks := []*Range{
+		{
+			Bounds: []*Bound{
+				{
+					Column:   "a",
+					Lower:    "1",
+					Upper:    "2",
+					HasLower: true,
+					HasUpper: true,
+				}, {
+					Column:   "b",
+					Lower:    "3",
+					Upper:    "4",
+					HasLower: true,
+					HasUpper: true,
+				}, {
+					Column:   "c",
+					Lower:    "5",
+					Upper:    "6",
+					HasLower: true,
+					HasUpper: true,
+				},
+			},
+		}, {
+			Bounds: []*Bound{
+				{
+					Column:   "a",
+					Lower:    "2",
+					Upper:    "3",
+					HasLower: true,
+					HasUpper: true,
+				}, {
+					Column:   "b",
+					Lower:    "4",
+					Upper:    "5",
+					HasLower: true,
+					HasUpper: true,
+				}, {
+					Column:   "c",
+					Lower:    "6",
+					Upper:    "7",
+					HasLower: true,
+					HasUpper: true,
+				},
+			},
+		},
+	}
+
+	InitChunks(chunks, Others, 1, 1, 0, "[123]", "[sdfds fsd fd gd]", 1)
+	require.Equal(t, chunks[0].Where, "((((`a` COLLATE '[123]' > ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' > ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' = ? AND `c` COLLATE '[123]' > ?)) AND ((`a` COLLATE '[123]' < ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' < ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' = ? AND `c` COLLATE '[123]' <= ?))) AND ([sdfds fsd fd gd]))")
+	require.Equal(t, chunks[0].Args, []interface{}{"1", "1", "3", "1", "3", "5", "2", "2", "4", "2", "4", "6"})
+	require.Equal(t, chunks[0].Type, Others)
+	InitChunk(chunks[1], Others, 2, 2, "[456]", "[dsfsdf]")
+	require.Equal(t, chunks[1].Where, "((((`a` COLLATE '[456]' > ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' > ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' = ? AND `c` COLLATE '[456]' > ?)) AND ((`a` COLLATE '[456]' < ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' < ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' = ? AND `c` COLLATE '[456]' <= ?))) AND ([dsfsdf]))")
+	require.Equal(t, chunks[1].Args, []interface{}{"2", "2", "4", "2", "4", "6", "3", "3", "5", "3", "5", "7"})
+	require.Equal(t, chunks[1].Type, Others)
+}
+
+func TestChunkCopyAndUpdate(t *testing.T) {
+	chunk := NewChunkRange()
+	chunk.Update("a", "1", "2", true, true)
+	chunk.Update("a", "2", "3", true, true)
+	chunk.Update("a", "324", "5435", false, false)
+	chunk.Update("b", "4", "5", true, false)
+	chunk.Update("b", "8", "9", false, true)
+	chunk.Update("c", "6", "7", false, true)
+	chunk.Update("c", "10", "11", true, false)
+
+	conditions, args := chunk.ToString("")
+	require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))")
+	require.Equal(t, args, []interface{}{"2", "2", "4", "2", "4", "10", "3", "3", "9", "3", "9", "7"})
+
+	chunk2 := chunk.CopyAndUpdate("a", "4", "6", true, true)
+	conditions, args = chunk2.ToString("")
+	require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))")
+	require.Equal(t, args, []interface{}{"4", "4", "4", "4", "4", "10", "6", "6", "9", "6", "9", "7"})
+	_, args = chunk.ToString("")
+	// `Copy` use the same []string
+	require.Equal(t, args, []interface{}{"2", "2", "4", "2", "4", "10", "3", "3", "9", "3", "9", "7"})
+
+	InitChunk(chunk, Others, 2, 2, "[324]", "[543]")
+	chunk3 := chunk.Clone()
+	chunk3.Update("a", "2", "3", true, true)
+	require.Equal(t, chunk3.Where, "((((`a` COLLATE '[324]' > ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' > ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' = ? AND `c` COLLATE '[324]' > ?)) AND ((`a` COLLATE '[324]' < ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' < ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' = ? AND `c` COLLATE '[324]' <= ?))) AND ([543]))")
+	require.Equal(t, chunk3.Args, []interface{}{"2", "2", "4", "2", "4", "10", "3", "3", "9", "3", "9", "7"})
+	require.Equal(t, chunk3.Type, Others)
+}
+
+func TestChunkID(t *testing.T) {
+	chunkIDBase := &ChunkID{
+		TableIndex:       2,
+		BucketIndexLeft:  2,
+		BucketIndexRight: 2,
+		ChunkIndex:       2,
+		ChunkCnt:         4,
+	}
+
+	str := chunkIDBase.ToString()
+	require.Equal(t, str, "2:2-2:2:4")
+	chunkIDtmp := &ChunkID{}
+	chunkIDtmp.FromString(str)
+	require.Equal(t, chunkIDBase.Compare(chunkIDtmp), 0)
+
+	chunkIDSmalls := []*ChunkID{
+		{
+			TableIndex:       1,
+			BucketIndexLeft:  3,
+			BucketIndexRight: 3,
+			ChunkIndex:       4,
+			ChunkCnt:         5,
+		}, {
+			TableIndex:       2,
+			BucketIndexLeft:  1,
+			BucketIndexRight: 1,
+			ChunkIndex:       3,
+			ChunkCnt:         5,
+		}, {
+			TableIndex:       2,
+			BucketIndexLeft:  2,
+			BucketIndexRight: 2,
+			ChunkIndex:       1,
+			ChunkCnt:         4,
+		},
+	}
+
+	stringRes := []string{
+		"1:3-3:4:5",
+		"2:1-1:3:5",
+		"2:2-2:1:4",
+	}
+
+	for i, chunkIDSmall := range chunkIDSmalls {
+		require.Equal(t, chunkIDBase.Compare(chunkIDSmall), 1)
+		str = chunkIDSmall.ToString()
+		require.Equal(t, str, stringRes[i])
+		chunkIDtmp = &ChunkID{}
+		chunkIDtmp.FromString(str)
+		require.Equal(t, chunkIDSmall.Compare(chunkIDtmp), 0)
+	}
+
+	chunkIDLarges := []*ChunkID{
+		{
+			TableIndex:       3,
+			BucketIndexLeft:  1,
+			BucketIndexRight: 1,
+			ChunkIndex:       2,
+			ChunkCnt:         3,
+		}, {
+			TableIndex:       2,
+			BucketIndexLeft:  3,
+			BucketIndexRight: 3,
+			ChunkIndex:       1,
+			ChunkCnt:         3,
+		}, {
+			TableIndex:       2,
+			BucketIndexLeft:  2,
+			BucketIndexRight: 2,
+			ChunkIndex:       3,
+			ChunkCnt:         4,
+		},
+	}
+
+	stringRes = []string{
+		"3:1-1:2:3",
+		"2:3-3:1:3",
+		"2:2-2:3:4",
+	}
+
+	for i, chunkIDLarge := range chunkIDLarges {
+		require.Equal(t, chunkIDBase.Compare(chunkIDLarge), -1)
+		str = chunkIDLarge.ToString()
+		require.Equal(t, str, stringRes[i])
+		chunkIDtmp = &ChunkID{}
+		chunkIDtmp.FromString(str)
+		require.Equal(t, chunkIDLarge.Compare(chunkIDtmp), 0)
+	}
+
+}
+
+func TestChunkIndex(t *testing.T) {
+	chunkRange := NewChunkRange()
+	chunkRange.Index.ChunkIndex = 0
+	chunkRange.Index.ChunkCnt = 3
+	require.True(t, chunkRange.IsFirstChunkForBucket())
+	require.False(t, chunkRange.IsLastChunkForBucket())
+	chunkRange.Index.ChunkIndex = 2
+	require.False(t, chunkRange.IsFirstChunkForBucket())
+	require.True(t, chunkRange.IsLastChunkForBucket())
+
+	chunkRange.Bounds = []*Bound{
+		{
+			Lower:    "1",
+			HasLower: true,
+		}, {
+			Lower:    "2",
+			HasLower: true,
+		},
+	}
+	require.True(t, chunkRange.IsLastChunkForTable())
+	require.False(t, chunkRange.IsFirstChunkForTable())
+	chunkRange.Bounds = []*Bound{
+		{
+			Upper:    "1",
+			HasUpper: true,
+		}, {
+			Upper:    "2",
+			HasUpper: true,
+		},
+	}
+	require.False(t, chunkRange.IsLastChunkForTable())
+	require.True(t, chunkRange.IsFirstChunkForTable())
+	chunkRange.Bounds = []*Bound{
+		{
+			Upper:    "1",
+			HasUpper: true,
+		}, {
+			Lower:    "2",
+			HasLower: true,
+		},
+	}
+	require.False(t, chunkRange.IsLastChunkForTable())
+	require.False(t, chunkRange.IsFirstChunkForTable())
+}
diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go
new file mode 100644
index 00000000000..3ab749bc890
--- /dev/null
+++ b/sync_diff_inspector/config/config.go
@@ -0,0 +1,642 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"crypto/sha256"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"net"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+
+	"github.com/BurntSushi/toml"
+	"github.com/go-sql-driver/mysql"
+	"github.com/google/uuid"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	tidbutil "github.com/pingcap/tidb/pkg/util"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	filter "github.com/pingcap/tidb/pkg/util/table-filter"
+	router "github.com/pingcap/tidb/pkg/util/table-router"
+	"github.com/pingcap/tiflow/dm/config/security"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	flag "github.com/spf13/pflag"
+	"go.uber.org/zap"
+)
+
+const (
+	LocalDirPerm  os.FileMode = 0o755
+	LocalFilePerm os.FileMode = 0o644
+
+	LogFileName = "sync_diff.log"
+
+	baseSplitThreadCount = 3
+
+	UnifiedTimeZone string = "+0:00"
+)
+
+// TableConfig is the config of table.
+type TableConfig struct {
+	// table's filter to tell us which table should adapt to this config.
+	TargetTables []string `toml:"target-tables" json:"target-tables"`
+	// Internally used to indicate which specified table in target is using this config.
+	Schema string
+	Table  string
+	// Internally used to distinguish different config.
+	ConfigIndex int
+	// Internally used to valid config.
+	HasMatched bool
+
+	// columns be ignored, will not check this column's data
+	IgnoreColumns []string `toml:"ignore-columns"`
+	// field should be the primary key, unique key or field with index
+	Fields []string `toml:"index-fields"`
+	// select range, for example: "age > 10 AND age < 20"
+	Range string `toml:"range"`
+
+	TargetTableInfo *model.TableInfo
+
+	// collation config in mysql/tidb
+	Collation string `toml:"collation"`
+
+	// specify the chunksize for the table
+	ChunkSize int64 `toml:"chunk-size" json:"chunk-size"`
+}
+
+// Valid returns true if table's config is valide.
+func (t *TableConfig) Valid() bool {
+	if len(t.TargetTables) == 0 {
+		log.Error("target tables can't be empty in TableConfig")
+		return false
+	}
+
+	return true
+}
+
+// TLS Security wrapper
+type Security struct {
+	TLSName string `json:"tls-name"`
+
+	CAPath   string `toml:"ca-path" json:"ca-path"`
+	CertPath string `toml:"cert-path" json:"cert-path"`
+	KeyPath  string `toml:"key-path" json:"key-path"`
+
+	// raw content
+	CABytes   string `toml:"ca-bytes" json:"ca-bytes"`
+	CertBytes string `toml:"cert-bytes" json:"cert-bytes"`
+	KeyBytes  string `toml:"key-bytes" json:"key-bytes"`
+}
+
+// DataSource represents the Source Config.
+type DataSource struct {
+	Host     string             `toml:"host" json:"host"`
+	Port     int                `toml:"port" json:"port"`
+	User     string             `toml:"user" json:"user"`
+	Password utils.SecretString `toml:"password" json:"password"`
+	SqlMode  string             `toml:"sql-mode" json:"sql-mode"`
+	Snapshot string             `toml:"snapshot" json:"snapshot"`
+
+	Security *Security `toml:"security" json:"security"`
+
+	RouteRules     []string `toml:"route-rules" json:"route-rules"`
+	Router         *router.Table
+	RouteTargetSet map[string]struct{} `json:"-"`
+
+	Conn *sql.DB
+}
+
+// IsAutoSnapshot returns true if the tidb_snapshot is expected to automatically
+// be set from the syncpoint from the target TiDB instance.
+func (d *DataSource) IsAutoSnapshot() bool {
+	return strings.EqualFold(d.Snapshot, "auto")
+}
+
+// SetSnapshot changes the snapshot in configuration. This is typically
+// used with the auto-snapshot feature.
+func (d *DataSource) SetSnapshot(newSnapshot string) {
+	d.Snapshot = newSnapshot
+}
+
+func (d *DataSource) ToDBConfig() *dbutil.DBConfig {
+	return &dbutil.DBConfig{
+		Host:     d.Host,
+		Port:     d.Port,
+		User:     d.User,
+		Password: d.Password.Plain(),
+		Snapshot: d.Snapshot,
+	}
+}
+
+// register TLS config for driver
+func (d *DataSource) RegisterTLS() error {
+	if d.Security == nil {
+		return nil
+	}
+	sec := d.Security
+	log.Info("try to register tls config")
+	tlsConfig, err := tidbutil.NewTLSConfig(
+		tidbutil.WithCAPath(sec.CAPath),
+		tidbutil.WithCertAndKeyPath(sec.CertPath, sec.KeyPath),
+		tidbutil.WithCAContent([]byte(sec.CABytes)),
+		tidbutil.WithCertAndKeyContent([]byte(sec.CertBytes), []byte(sec.KeyBytes)),
+	)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	if tlsConfig == nil {
+		return nil
+	}
+
+	log.Info("success to parse tls config")
+	sec.TLSName = "sync-diff-inspector-" + uuid.NewString()
+	err = mysql.RegisterTLSConfig(sec.TLSName, tlsConfig)
+	return errors.Trace(err)
+}
+
+func (d *DataSource) ToDriverConfig() *mysql.Config {
+	cfg := mysql.NewConfig()
+	cfg.Params = make(map[string]string)
+
+	cfg.User = d.User
+	cfg.Passwd = d.Password.Plain()
+	cfg.Net = "tcp"
+	cfg.Addr = net.JoinHostPort(d.Host, strconv.Itoa(d.Port))
+	cfg.Params["charset"] = "utf8mb4"
+	cfg.InterpolateParams = true
+	cfg.Params["time_zone"] = fmt.Sprintf("'%s'", UnifiedTimeZone)
+	if len(d.Snapshot) > 0 && !d.IsAutoSnapshot() {
+		log.Info("create connection with snapshot", zap.String("snapshot", d.Snapshot))
+		cfg.Params["tidb_snapshot"] = d.Snapshot
+	}
+	if d.Security != nil && len(d.Security.TLSName) > 0 {
+		cfg.TLSConfig = d.Security.TLSName
+	}
+
+	return cfg
+}
+
+type TaskConfig struct {
+	Source       []string `toml:"source-instances" json:"source-instances"`
+	Routes       []string `toml:"source-routes" json:"source-routes"`
+	Target       string   `toml:"target-instance" json:"target-instance"`
+	CheckTables  []string `toml:"target-check-tables" json:"target-check-tables"`
+	TableConfigs []string `toml:"target-configs" json:"target-configs"`
+	// OutputDir include these
+	// 1. checkpoint Dir
+	// 2. fix-target-sql Dir
+	// 3. summary file
+	// 4. sync diff log file
+	// 5. fix
+	OutputDir string `toml:"output-dir" json:"output-dir"`
+
+	SourceInstances    []*DataSource
+	TargetInstance     *DataSource
+	TargetTableConfigs []*TableConfig
+	TargetCheckTables  filter.Filter
+
+	FixDir        string
+	CheckpointDir string
+	HashFile      string
+}
+
+func (t *TaskConfig) Init(
+	dataSources map[string]*DataSource,
+	tableConfigs map[string]*TableConfig,
+) (err error) {
+	// Parse Source/Target
+	dataSourceList := make([]*DataSource, 0, len(t.Source))
+	for _, si := range t.Source {
+		ds, ok := dataSources[si]
+		if !ok {
+			log.Error("not found source instance, please correct the config", zap.String("instance", si))
+			return errors.Errorf("not found source instance, please correct the config. instance is `%s`", si)
+		}
+		// try to register tls
+		if err := ds.RegisterTLS(); err != nil {
+			return errors.Trace(err)
+		}
+		dataSourceList = append(dataSourceList, ds)
+	}
+	t.SourceInstances = dataSourceList
+
+	ts, ok := dataSources[t.Target]
+	if !ok {
+		log.Error("not found target instance, please correct the config", zap.String("instance", t.Target))
+		return errors.Errorf("not found target instance, please correct the config. instance is `%s`", t.Target)
+	}
+	// try to register tls
+	if err := ts.RegisterTLS(); err != nil {
+		return errors.Trace(err)
+	}
+	t.TargetInstance = ts
+
+	t.TargetCheckTables, err = filter.Parse(t.CheckTables)
+	if err != nil {
+		log.Error("parse check tables failed", zap.Error(err))
+		return errors.Annotate(err, "parse check tables failed")
+	}
+
+	targetConfigs := t.TableConfigs
+	if targetConfigs != nil {
+		// table config can be nil
+		tableConfigsList := make([]*TableConfig, 0, len(targetConfigs))
+		for configIndex, c := range targetConfigs {
+			tc, ok := tableConfigs[c]
+			if !ok {
+				log.Error("not found table config", zap.String("config", c))
+				return errors.Errorf("not found table config. config is `%s`", c)
+			}
+			tc.ConfigIndex = configIndex
+			tableConfigsList = append(tableConfigsList, tc)
+		}
+		t.TargetTableConfigs = tableConfigsList
+	}
+
+	hash, err := t.ComputeConfigHash()
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	ok, err = pathExists(t.OutputDir)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	if !ok {
+		if err = mkdirAll(t.OutputDir); err != nil {
+			return errors.Trace(err)
+		}
+	}
+	// outputDir exists, we need to check the config hash for checkpoint.
+	t.CheckpointDir = filepath.Join(t.OutputDir, "checkpoint")
+	ok, err = pathExists(t.CheckpointDir)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	if !ok {
+		// no checkpoint, we can use this outputDir directly.
+		if err = mkdirAll(t.CheckpointDir); err != nil {
+			return errors.Trace(err)
+		}
+		// create config hash in checkpointDir.
+		err = os.WriteFile(filepath.Join(t.CheckpointDir, hash), []byte{}, LocalFilePerm)
+		if err != nil {
+			return errors.Trace(err)
+		}
+	} else {
+		// checkpoint exists, we need compare the config hash.
+		ok, err = pathExists(filepath.Join(t.CheckpointDir, hash))
+		if err != nil {
+			return errors.Trace(err)
+		}
+		if !ok {
+			// not match, raise error
+			return errors.Errorf("config changes breaking the checkpoint, please use another outputDir and start over again!")
+		}
+	}
+
+	t.FixDir = filepath.Join(t.OutputDir, fmt.Sprintf("fix-on-%s", t.Target))
+	if err = mkdirAll(t.FixDir); err != nil {
+		return errors.Trace(err)
+	}
+
+	return nil
+}
+
+// ComputeConfigHash compute the hash according to the task
+// if ConfigHash is as same as checkpoint.hash
+// we think the second sync diff can use the checkpoint.
+func (t *TaskConfig) ComputeConfigHash() (string, error) {
+	hash := make([]byte, 0)
+	// compute sources
+	for _, c := range t.SourceInstances {
+		configBytes, err := json.Marshal(c)
+		if err != nil {
+			return "", errors.Trace(err)
+		}
+		hash = append(hash, configBytes...)
+	}
+	// compute target
+	configBytes, err := json.Marshal(t.TargetInstance)
+	if err != nil {
+		return "", errors.Trace(err)
+	}
+	hash = append(hash, configBytes...)
+	// compute check-tables and table config
+	for _, c := range t.TargetTableConfigs {
+		configBytes, err = json.Marshal(c)
+		if err != nil {
+			return "", errors.Trace(err)
+		}
+		hash = append(hash, configBytes...)
+	}
+	targetCheckTables := t.CheckTables
+	for _, c := range targetCheckTables {
+		hash = append(hash, []byte(c)...)
+	}
+
+	return fmt.Sprintf("%x", sha256.Sum256(hash)), nil
+}
+
+// Config is the configuration.
+type Config struct {
+	*flag.FlagSet `json:"-"`
+
+	// log level
+	LogLevel string `toml:"-" json:"-"`
+	// how many goroutines are created to check data
+	CheckThreadCount int `toml:"check-thread-count" json:"check-thread-count"`
+	// how many goroutines are created to split chunk. A goroutine splits one table at a time.
+	SplitThreadCount int `toml:"-" json:"split-thread-count"`
+	// set true if want to compare rows
+	// set false won't compare rows.
+	ExportFixSQL bool `toml:"export-fix-sql" json:"export-fix-sql"`
+	// only check table struct without table data.
+	CheckStructOnly bool `toml:"check-struct-only" json:"check-struct-only"`
+	// experimental feature: only check table data without table struct
+	CheckDataOnly bool `toml:"check-data-only" json:"-"`
+	// skip validation for tables that don't exist upstream or downstream
+	SkipNonExistingTable bool `toml:"skip-non-existing-table" json:"-"`
+	// DMAddr is dm-master's address, the format should like "http://127.0.0.1:8261"
+	DMAddr string `toml:"dm-addr" json:"dm-addr"`
+	// DMTask string `toml:"dm-task" json:"dm-task"`
+	DMTask string `toml:"dm-task" json:"dm-task"`
+
+	DataSources map[string]*DataSource `toml:"data-sources" json:"data-sources"`
+
+	Routes map[string]*router.TableRule `toml:"routes" json:"routes"`
+
+	TableConfigs map[string]*TableConfig `toml:"table-configs" json:"table-configs"`
+
+	Task TaskConfig `toml:"task" json:"task"`
+	// config file
+	ConfigFile string
+
+	// export a template config file
+	Template string `toml:"-" json:"-"`
+
+	// print version if set true
+	PrintVersion bool
+}
+
+// NewConfig creates a new config.
+func NewConfig() *Config {
+	cfg := &Config{}
+	cfg.FlagSet = flag.NewFlagSet("diff", flag.ContinueOnError)
+	fs := cfg.FlagSet
+
+	fs.BoolVarP(&cfg.PrintVersion, "version", "V", false, "print version of sync_diff_inspector")
+	fs.StringVarP(&cfg.LogLevel, "log-level", "L", "info", "log level: debug, info, warn, error, fatal")
+	fs.StringVarP(&cfg.ConfigFile, "config", "C", "", "Config file")
+	fs.StringVarP(&cfg.Template, "template", "T", "", "<dm|norm> export a template config file")
+	fs.StringVar(&cfg.DMAddr, "dm-addr", "", "the address of DM")
+	fs.StringVar(&cfg.DMTask, "dm-task", "", "identifier of dm task")
+	fs.IntVar(&cfg.CheckThreadCount, "check-thread-count", 4, "how many goroutines are created to check data")
+	fs.BoolVar(&cfg.ExportFixSQL, "export-fix-sql", true, "set true if want to compare rows or set to false will only compare checksum")
+	fs.BoolVar(&cfg.CheckStructOnly, "check-struct-only", false, "ignore check table's data")
+	fs.BoolVar(&cfg.SkipNonExistingTable, "skip-non-existing-table", false, "skip validation for tables that don't exist upstream or downstream")
+	fs.BoolVar(&cfg.CheckDataOnly, "check-data-only", false, "ignore check table's struct")
+
+	_ = fs.MarkHidden("check-data-only")
+
+	fs.SortFlags = false
+	return cfg
+}
+
+// Parse parses flag definitions from the argument list.
+func (c *Config) Parse(arguments []string) error {
+	// Parse first to get config file.
+	err := c.FlagSet.Parse(arguments)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	if c.PrintVersion {
+		return nil
+	}
+
+	if c.Template != "" {
+		return nil
+	}
+
+	// Load config file if specified.
+	if c.ConfigFile == "" {
+		return errors.Errorf("argument --config is required")
+	}
+	err = c.configFromFile(c.ConfigFile)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	// Parse again to replace with command line options.
+	err = c.FlagSet.Parse(arguments)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	if len(c.FlagSet.Args()) != 0 {
+		return errors.Errorf("'%s' is an invalid flag", c.FlagSet.Arg(0))
+	}
+
+	// Set default value when output is empty
+	if c.Task.OutputDir == "" {
+		c.Task.OutputDir = timestampOutputDir()
+		if err := os.RemoveAll(c.Task.OutputDir); err != nil && !os.IsNotExist(err) {
+			log.Fatal("fail to remove the temp directory", zap.String("path", c.Task.OutputDir), zap.String("error", err.Error()))
+		}
+	}
+
+	c.SplitThreadCount = baseSplitThreadCount + c.CheckThreadCount/2
+
+	return nil
+}
+
+func (c *Config) String() string {
+	cfg, err := json.Marshal(c)
+	if err != nil {
+		return err.Error()
+	}
+	return string(cfg)
+}
+
+// configFromFile loads config from file.
+func (c *Config) configFromFile(path string) error {
+	meta, err := toml.DecodeFile(path, c)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	if len(meta.Undecoded()) > 0 {
+		return errors.Errorf("unknown keys in config file %s: %v", path, meta.Undecoded())
+	}
+	return nil
+}
+
+func parseTLSFromDMConfig(config *security.Security) *Security {
+	if config == nil {
+		return nil
+	}
+	return &Security{
+		CAPath:   config.SSLCA,
+		CertPath: config.SSLCert,
+		KeyPath:  config.SSLKey,
+
+		CABytes:   string(config.SSLCABytes),
+		CertBytes: string(config.SSLCertBytes),
+		KeyBytes:  string(config.SSLKeyBytes),
+	}
+}
+
+func (c *Config) adjustConfigByDMSubTasks() (err error) {
+	// DM's subtask config
+	subTaskCfgs, err := getDMTaskCfg(c.DMAddr, c.DMTask)
+	if err != nil {
+		log.Warn("failed to get config from DM tasks")
+		return errors.Trace(err)
+	}
+	sqlMode := ""
+	if subTaskCfgs[0].EnableANSIQuotes {
+		sqlMode = "ANSI_QUOTES"
+	}
+	dataSources := make(map[string]*DataSource)
+	dataSources["target"] = &DataSource{
+		Host:     subTaskCfgs[0].To.Host,
+		Port:     subTaskCfgs[0].To.Port,
+		User:     subTaskCfgs[0].To.User,
+		Password: utils.SecretString(subTaskCfgs[0].To.Password),
+		SqlMode:  sqlMode,
+		Security: parseTLSFromDMConfig(subTaskCfgs[0].To.Security),
+	}
+	for _, subTaskCfg := range subTaskCfgs {
+		tableRouter, err := router.NewTableRouter(subTaskCfg.CaseSensitive, []*router.TableRule{})
+		routeTargetSet := make(map[string]struct{})
+		if err != nil {
+			return errors.Trace(err)
+		}
+		for _, rule := range subTaskCfg.RouteRules {
+			err := tableRouter.AddRule(rule)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			routeTargetSet[dbutil.TableName(rule.TargetSchema, rule.TargetTable)] = struct{}{}
+		}
+		dataSources[subTaskCfg.SourceID] = &DataSource{
+			Host:     subTaskCfg.From.Host,
+			Port:     subTaskCfg.From.Port,
+			User:     subTaskCfg.From.User,
+			Password: utils.SecretString(subTaskCfg.From.Password),
+			SqlMode:  sqlMode,
+			Security: parseTLSFromDMConfig(subTaskCfg.From.Security),
+			Router:   tableRouter,
+
+			RouteTargetSet: routeTargetSet,
+		}
+	}
+	c.DataSources = dataSources
+	c.Task.Target = "target"
+	for id := range dataSources {
+		if id == "target" {
+			continue
+		}
+		c.Task.Source = append(c.Task.Source, id)
+	}
+	return nil
+}
+
+func (c *Config) Init() (err error) {
+	if len(c.DMAddr) > 0 {
+		err := c.adjustConfigByDMSubTasks()
+		if err != nil {
+			return errors.Annotate(err, "failed to init Task")
+		}
+		err = c.Task.Init(c.DataSources, c.TableConfigs)
+		if err != nil {
+			return errors.Annotate(err, "failed to init Task")
+		}
+		return nil
+	}
+	for _, d := range c.DataSources {
+		routeRuleList := make([]*router.TableRule, 0, len(c.Routes))
+		d.RouteTargetSet = make(map[string]struct{})
+		// if we had rules
+		for _, r := range d.RouteRules {
+			rr, ok := c.Routes[r]
+			if !ok {
+				return errors.Errorf("not found source routes for rule %s, please correct the config", r)
+			}
+			d.RouteTargetSet[dbutil.TableName(rr.TargetSchema, rr.TargetTable)] = struct{}{}
+			routeRuleList = append(routeRuleList, rr)
+		}
+		// t.SourceRoute can be nil, the caller should check it.
+		d.Router, err = router.NewTableRouter(false, routeRuleList)
+		if err != nil {
+			return errors.Annotate(err, "failed to build route config")
+		}
+	}
+
+	err = c.Task.Init(c.DataSources, c.TableConfigs)
+	if err != nil {
+		return errors.Annotate(err, "failed to init Task")
+	}
+	return nil
+}
+
+func (c *Config) CheckConfig() bool {
+	if c.CheckThreadCount <= 0 {
+		log.Error("check-thread-count must greater than 0!")
+		return false
+	}
+	if len(c.DMAddr) != 0 {
+		u, err := url.Parse(c.DMAddr)
+		if err != nil || u.Scheme == "" || u.Host == "" {
+			log.Error("dm-addr's format should like 'http://127.0.0.1:8261'")
+			return false
+		}
+
+		if len(c.DMTask) == 0 {
+			log.Error("must set the `dm-task` if set `dm-addr`")
+			return false
+		}
+	}
+	return true
+}
+
+func timestampOutputDir() string {
+	return filepath.Join(os.TempDir(), time.Now().Format("sync-diff.output.2006-01-02T15.04.05Z0700"))
+}
+
+func pathExists(_path string) (bool, error) {
+	_, err := os.Stat(_path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return false, nil
+		}
+		return false, errors.Trace(err)
+	}
+	return true, nil
+}
+
+func mkdirAll(base string) error {
+	mask := syscall.Umask(0)
+	err := os.MkdirAll(base, LocalDirPerm)
+	syscall.Umask(mask)
+	return errors.Trace(err)
+}
diff --git a/sync_diff_inspector/config/config.toml b/sync_diff_inspector/config/config.toml
new file mode 100644
index 00000000000..b5557216188
--- /dev/null
+++ b/sync_diff_inspector/config/config.toml
@@ -0,0 +1,73 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.mysql1]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+# MySQL doesn't has snapshot config
+
+[data-sources.tidb0]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+
+# Support tls connection
+    # security.ca-path = "..."
+	# security.cert-path = "..."
+	# security.key-path = "..."
+
+# Remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+    # snapshot = "386902609362944000"
+# When using TiCDC syncpoint source and target can be set to auto
+    # snapshot = "auto"
+
+######################### Task config #########################
+# Required
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/output/config"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb0"
+
+    # tables need to check. *Include `schema` and `table`. Use `.` to split*
+    target-check-tables = ["schema*.table*", "!c.*", "test2.t2"]
+
+    # extra table config
+    target-configs= ["config1"]
+
+######################### Tables config #########################
+# Optional
+[table-configs]
+[table-configs.config1]
+# tables need to use this specified config.
+# if use this config. target-tables should be a subset of #target-check-tables
+target-tables = ["schema*.table*", "test2.t2"]
+
+range = "age > 10 AND age < 20"
+index-fields = [""]
+ignore-columns = ["",""]
+chunk-size = 0
+collation = ""
diff --git a/sync_diff_inspector/config/config_conflict.toml b/sync_diff_inspector/config/config_conflict.toml
new file mode 100644
index 00000000000..416c29b1f9e
--- /dev/null
+++ b/sync_diff_inspector/config/config_conflict.toml
@@ -0,0 +1,72 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.mysql1]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+    # mysql doesn't has snapshot config
+
+[data-sources.tidb0]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+    # snapshot = "386902609362944000"
+
+######################### Task config #########################
+# Required
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/output/config_conflict"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb0"
+
+    # tables need to check. *Include `schema` and `table`. Use `.` to split*
+    target-check-tables = ["schema*.table*", "!c.*", "test2.t2"]
+
+    # extra table config
+    target-configs= ["config1", "config2"]
+
+# Optional
+[table-configs]
+[table-configs.config1]
+# tables need to use this specified config.
+# if use this config. target-tables should be a subset of #target-check-tables
+target-tables = ["schema*.table*", "test2.t2"]
+range = "age > 10 AND age < 20"
+index-fields = [""]
+ignore-columns = ["",""]
+chunk-size = 0
+collation = ""
+
+[table-configs.config2]
+# conflict config with config1
+target-tables = ["schema*.table*", "test2.t2"]
+range = "age > 10 AND age < 20"
+index-fields = [""]
+ignore-columns = ["",""]
+chunk-size = 0
+collation = ""
diff --git a/sync_diff_inspector/config/config_dm.toml b/sync_diff_inspector/config/config_dm.toml
new file mode 100644
index 00000000000..39d5b9eff0e
--- /dev/null
+++ b/sync_diff_inspector/config/config_dm.toml
@@ -0,0 +1,31 @@
+# Diff Configuration. This config file shows how to check data for DM's task.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+# dm-master's address, the format should like "http://127.0.0.1:8261"
+dm-addr = "http://127.0.0.1:8261"
+
+# the DM's task name which is willing to check data
+dm-task = "test"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/output/config"
+
+    # tables need to check. *Include `schema` and `table`. Use `.` to split*
+    target-check-tables = ["hb_test.*"]
+
diff --git a/sync_diff_inspector/config/config_sharding.toml b/sync_diff_inspector/config/config_sharding.toml
new file mode 100644
index 00000000000..59a70c5b115
--- /dev/null
+++ b/sync_diff_inspector/config/config_sharding.toml
@@ -0,0 +1,99 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1", "rule2"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.mysql2]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1", "rule2"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.mysql3]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1", "rule3"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb0]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[routes.rule1]
+schema-pattern = "test_*"      # schema to match. Support wildcard characters * and ?.
+table-pattern = "t_*"          # table to match. Support wildcard characters * and ?.
+target-schema = "test"         # target schema
+target-table = "t"             # target table
+
+[routes.rule2]
+schema-pattern = "test2_*"     # schema to match. Support wildcard characters * and ?.
+table-pattern = "t2_*"         # table to match. Support wildcard characters * and ?.
+target-schema = "test2"        # target schema
+target-table = "t2"            # target table
+
+[routes.rule3]
+schema-pattern = "test2_*"     # schema to match. Support wildcard characters * and ?.
+table-pattern = "t2_*"         # table to match. Support wildcard characters * and ?.
+target-schema = "test"         # target schema
+target-table = "t"             # target table
+
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/output/config"
+
+    source-instances = ["mysql1", "mysql2", "mysql3"]
+
+    target-instance = "tidb0"
+
+    # tables need to check. *Include `schema` and `table`. Use `.` to split*
+    target-check-tables = ["schema*.table*", "!c.*", "test2.t2"]
+
+    # extra table config
+    target-configs= ["config1"]
+
+[table-configs.config1]
+# tables need to use this specified config.
+# if use this config. target-tables should be a subset of #target-check-tables
+target-tables = ["schema*.table*", "test2.t2"]
+
+range = "age > 10 AND age < 20"
+index-fields = [""]
+ignore-columns = ["",""]
\ No newline at end of file
diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go
new file mode 100644
index 00000000000..7c12c260bb2
--- /dev/null
+++ b/sync_diff_inspector/config/config_test.go
@@ -0,0 +1,112 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"encoding/json"
+	"os"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestParseConfig(t *testing.T) {
+	cfg := NewConfig()
+	require.Nil(t, cfg.Parse([]string{"-L", "info", "--config", "config.toml"}))
+	cfg = NewConfig()
+	require.Contains(t, cfg.Parse([]string{"-L", "info"}).Error(), "argument --config is required")
+
+	unknownFlag := []string{"--LL", "info"}
+	err := cfg.Parse(unknownFlag)
+	require.Contains(t, err.Error(), "LL")
+
+	require.Nil(t, cfg.Parse([]string{"--config", "config.toml"}))
+	require.Nil(t, cfg.Init())
+	require.Nil(t, cfg.Task.Init(cfg.DataSources, cfg.TableConfigs))
+
+	require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"}))
+	// we change the config from config.toml to config_sharding.toml
+	// this action will raise error.
+	require.Contains(t, cfg.Init().Error(), "failed to init Task: config changes breaking the checkpoint, please use another outputDir and start over again!")
+
+	require.NoError(t, os.RemoveAll(cfg.Task.OutputDir))
+	require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"}))
+	// this time will be ok, because we remove the last outputDir.
+	require.Nil(t, cfg.Init())
+	require.Nil(t, cfg.Task.Init(cfg.DataSources, cfg.TableConfigs))
+
+	require.True(t, cfg.CheckConfig())
+
+	// we might not use the same config to run this test. e.g. MYSQL_PORT can be 4000
+	require.JSONEq(t, cfg.String(),
+		"{\"check-thread-count\":4,\"split-thread-count\":5,\"export-fix-sql\":true,\"check-struct-only\":false,\"dm-addr\":\"\",\"dm-task\":\"\",\"data-sources\":{\"mysql1\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},\"mysql2\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},\"mysql3\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null},\"tidb0\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null}},\"routes\":{\"rule1\":{\"schema-pattern\":\"test_*\",\"table-pattern\":\"t_*\",\"target-schema\":\"test\",\"target-table\":\"t\"},\"rule2\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test2\",\"target-table\":\"t2\"},\"rule3\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test\",\"target-table\":\"t\"}},\"table-configs\":{\"config1\":{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}},\"task\":{\"source-instances\":[\"mysql1\",\"mysql2\",\"mysql3\"],\"source-routes\":null,\"target-instance\":\"tidb0\",\"target-check-tables\":[\"schema*.table*\",\"!c.*\",\"test2.t2\"],\"target-configs\":[\"config1\"],\"output-dir\":\"/tmp/output/config\",\"SourceInstances\":[{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null}],\"TargetInstance\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null},\"TargetTableConfigs\":[{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}],\"TargetCheckTables\":[{},{},{}],\"FixDir\":\"/tmp/output/config/fix-on-tidb0\",\"CheckpointDir\":\"/tmp/output/config/checkpoint\",\"HashFile\":\"\"},\"ConfigFile\":\"config_sharding.toml\",\"PrintVersion\":false}")
+	hash, err := cfg.Task.ComputeConfigHash()
+	require.NoError(t, err)
+	require.Equal(t, hash, "c080f9894ec24aadb4aaec1109cd1951454f09a1233f2034bc3b06e0903cb289")
+
+	require.True(t, cfg.TableConfigs["config1"].Valid())
+
+	require.NoError(t, os.RemoveAll(cfg.Task.OutputDir))
+
+}
+
+func TestError(t *testing.T) {
+	tableConfig := &TableConfig{}
+	require.False(t, tableConfig.Valid())
+	tableConfig.TargetTables = []string{"123", "234"}
+	require.True(t, tableConfig.Valid())
+
+	cfg := NewConfig()
+	// Parse
+	require.Contains(t, cfg.Parse([]string{"--config", "no_exist.toml"}).Error(), "no_exist.toml: no such file or directory")
+
+	// CheckConfig
+	cfg.CheckThreadCount = 0
+	require.False(t, cfg.CheckConfig())
+	cfg.CheckThreadCount = 1
+	require.True(t, cfg.CheckConfig())
+
+	// Init
+	cfg.DataSources = make(map[string]*DataSource)
+	cfg.DataSources["123"] = &DataSource{
+		RouteRules: []string{"111"},
+	}
+	err := cfg.Init()
+	require.Contains(t, err.Error(), "not found source routes for rule 111, please correct the config")
+}
+
+func TestNoSecretLeak(t *testing.T) {
+	source := &DataSource{
+		Host:     "127.0.0.1",
+		Port:     5432,
+		User:     "postgres",
+		Password: "AVeryV#ryStr0ngP@ssw0rd",
+		SqlMode:  "MYSQL",
+		Snapshot: "2022/10/24",
+	}
+	cfg := &Config{}
+	cfg.DataSources = map[string]*DataSource{"pg-1": source}
+	require.NotContains(t, cfg.String(), "AVeryV#ryStr0ngP@ssw0rd", "%s", cfg.String())
+	sourceJSON := []byte(`
+		{
+			"host": "127.0.0.1",
+			"port": 5432,
+			"user": "postgres",
+			"password": "meow~~~"
+		}
+	`)
+	s := DataSource{}
+	json.Unmarshal(sourceJSON, &s)
+	require.Equal(t, string(s.Password), "meow~~~")
+}
diff --git a/sync_diff_inspector/config/dm.go b/sync_diff_inspector/config/dm.go
new file mode 100644
index 00000000000..68564910e4b
--- /dev/null
+++ b/sync_diff_inspector/config/dm.go
@@ -0,0 +1,264 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	"encoding/base64"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+
+	"github.com/BurntSushi/toml"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/util/filter"
+	router "github.com/pingcap/tidb/pkg/util/table-router"
+	"github.com/pingcap/tiflow/dm/config/security"
+	"github.com/pingcap/tiflow/dm/pb"
+	"github.com/pingcap/tiflow/dm/pkg/terror"
+	"github.com/pingcap/tiflow/pkg/column-mapping"
+	flag "github.com/spf13/pflag"
+	"go.uber.org/zap"
+)
+
+const (
+	// dm's http api version, define in https://github.com/pingcap/dm/blob/master/dm/proto/dmmaster.proto
+	apiVersion = "v1alpha1"
+)
+
+func getDMTaskCfgURL(dmAddr, task string) string {
+	return fmt.Sprintf("%s/apis/%s/subtasks/%s", dmAddr, apiVersion, task)
+}
+
+// getDMTaskCfg gets dm's sub task config
+func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) {
+	tr := &http.Transport{
+		// TODO: support tls
+		//TLSClientConfig: tlsCfg,
+	}
+	client := &http.Client{Transport: tr}
+	req, err := http.NewRequest("GET", getDMTaskCfgURL(dmAddr, task), nil)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	getSubTaskCfgResp := &pb.GetSubTaskCfgResponse{}
+	err = json.Unmarshal(body, getSubTaskCfgResp)
+	if err != nil {
+		return nil, err
+	}
+
+	if !getSubTaskCfgResp.Result {
+		return nil, errors.Errorf("fail to get sub task config from DM, %s", getSubTaskCfgResp.Msg)
+	}
+
+	subTaskCfgs := make([]*SubTaskConfig, 0, len(getSubTaskCfgResp.Cfgs))
+	for _, cfgBytes := range getSubTaskCfgResp.Cfgs {
+		subtaskCfg := &SubTaskConfig{}
+		err = subtaskCfg.Decode(cfgBytes, false)
+		if err != nil {
+			return nil, err
+		}
+		subtaskCfg.To.Password = DecryptOrPlaintext(subtaskCfg.To.Password)
+		subtaskCfg.From.Password = DecryptOrPlaintext(subtaskCfg.From.Password)
+		subTaskCfgs = append(subTaskCfgs, subtaskCfg)
+	}
+
+	log.Info("dm sub task configs", zap.Reflect("cfgs", subTaskCfgs))
+	return subTaskCfgs, nil
+}
+
+// SubTaskConfig is the configuration for SubTask.
+type SubTaskConfig struct {
+	// BurntSushi/toml seems have a bug for flag "-"
+	// when doing encoding, if we use `toml:"-"`, it still try to encode it
+	// and it will panic because of unsupported type (reflect.Func)
+	// so we should not export flagSet
+	flagSet *flag.FlagSet
+
+	// when in sharding, multi dm-workers do one task
+	IsSharding                bool   `toml:"is-sharding" json:"is-sharding"`
+	ShardMode                 string `toml:"shard-mode" json:"shard-mode"`
+	StrictOptimisticShardMode bool   `toml:"strict-optimistic-shard-mode" json:"strict-optimistic-shard-mode"`
+	OnlineDDL                 bool   `toml:"online-ddl" json:"online-ddl"`
+
+	// pt/gh-ost name rule, support regex
+	ShadowTableRules []string `yaml:"shadow-table-rules" toml:"shadow-table-rules" json:"shadow-table-rules"`
+	TrashTableRules  []string `yaml:"trash-table-rules" toml:"trash-table-rules" json:"trash-table-rules"`
+
+	// deprecated
+	OnlineDDLScheme string `toml:"online-ddl-scheme" json:"online-ddl-scheme"`
+
+	// handle schema/table name mode, and only for schema/table name/pattern
+	// if case insensitive, we would convert schema/table name/pattern to lower case
+	CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
+
+	// default "loose" handle create sql by original sql, will not add default collation as upstream
+	// "strict" will add default collation as upstream, and downstream will occur error when downstream don't support
+	CollationCompatible string `yaml:"collation_compatible" toml:"collation_compatible" json:"collation_compatible"`
+
+	Name string `toml:"name" json:"name"`
+	Mode string `toml:"mode" json:"mode"`
+	//  treat it as hidden configuration
+	IgnoreCheckingItems []string `toml:"ignore-checking-items" json:"ignore-checking-items"`
+	// it represents a MySQL/MariaDB instance or a replica group
+	SourceID   string `toml:"source-id" json:"source-id"`
+	ServerID   uint32 `toml:"server-id" json:"server-id"`
+	Flavor     string `toml:"flavor" json:"flavor"`
+	MetaSchema string `toml:"meta-schema" json:"meta-schema"`
+	// deprecated
+	HeartbeatUpdateInterval int `toml:"heartbeat-update-interval" json:"heartbeat-update-interval"`
+	// deprecated
+	HeartbeatReportInterval int `toml:"heartbeat-report-interval" json:"heartbeat-report-interval"`
+	// deprecated
+	EnableHeartbeat bool   `toml:"enable-heartbeat" json:"enable-heartbeat"`
+	Timezone        string `toml:"timezone" json:"timezone"`
+
+	// RelayDir get value from dm-worker config
+	RelayDir string `toml:"relay-dir" json:"relay-dir"`
+
+	// UseRelay get value from dm-worker's relayEnabled
+	UseRelay bool     `toml:"use-relay" json:"use-relay"`
+	From     DBConfig `toml:"from" json:"from"`
+	To       DBConfig `toml:"to" json:"to"`
+
+	RouteRules []*router.TableRule `toml:"route-rules" json:"route-rules"`
+	// FilterRules []*bf.BinlogEventRule `toml:"filter-rules" json:"filter-rules"`
+	// deprecated
+	ColumnMappingRules []*column.Rule `toml:"mapping-rule" json:"mapping-rule"`
+	// ExprFilter         []*ExpressionFilter `yaml:"expression-filter" toml:"expression-filter" json:"expression-filter"`
+
+	// black-white-list is deprecated, use block-allow-list instead
+	BWList *filter.Rules `toml:"black-white-list" json:"black-white-list"`
+	BAList *filter.Rules `toml:"block-allow-list" json:"block-allow-list"`
+
+	// compatible with standalone dm unit
+	LogLevel  string `toml:"log-level" json:"log-level"`
+	LogFile   string `toml:"log-file" json:"log-file"`
+	LogFormat string `toml:"log-format" json:"log-format"`
+	LogRotate string `toml:"log-rotate" json:"log-rotate"`
+
+	PprofAddr  string `toml:"pprof-addr" json:"pprof-addr"`
+	StatusAddr string `toml:"status-addr" json:"status-addr"`
+
+	ConfigFile string `toml:"-" json:"config-file"`
+
+	CleanDumpFile bool `toml:"clean-dump-file" json:"clean-dump-file"`
+
+	// deprecated, will auto discover SQL mode
+	EnableANSIQuotes bool `toml:"ansi-quotes" json:"ansi-quotes"`
+
+	// still needed by Syncer / Loader bin
+	printVersion bool
+
+	// which DM worker is running the subtask, this will be injected when the real worker starts running the subtask(StartSubTask).
+	WorkerName string `toml:"-" json:"-"`
+	// task experimental configs
+	Experimental struct {
+		AsyncCheckpointFlush bool `yaml:"async-checkpoint-flush" toml:"async-checkpoint-flush" json:"async-checkpoint-flush"`
+	} `yaml:"experimental" toml:"experimental" json:"experimental"`
+}
+
+// DBConfig is the DB configuration.
+type DBConfig struct {
+	Host     string `toml:"host" json:"host" yaml:"host"`
+	Port     int    `toml:"port" json:"port" yaml:"port"`
+	User     string `toml:"user" json:"user" yaml:"user"`
+	Password string `toml:"password" json:"-" yaml:"password"` // omit it for privacy
+	// deprecated, mysql driver could automatically fetch this value
+	MaxAllowedPacket *int              `toml:"max-allowed-packet" json:"max-allowed-packet" yaml:"max-allowed-packet"`
+	Session          map[string]string `toml:"session" json:"session" yaml:"session"`
+
+	// security config
+	Security *security.Security `toml:"security" json:"security" yaml:"security"`
+
+	// RawDBCfg *RawDBConfig `toml:"-" json:"-" yaml:"-"`
+	// Net      string       `toml:"-" json:"-" yaml:"-"`
+}
+
+// Decode loads config from file data.
+func (c *SubTaskConfig) Decode(data string, verifyDecryptPassword bool) error {
+	if _, err := toml.Decode(data, c); err != nil {
+		return errors.New("decode subtask config from data")
+	}
+
+	return nil
+}
+
+// DecryptOrPlaintext tries to decrypt base64 encoded ciphertext to plaintext or return plaintext.
+func DecryptOrPlaintext(ciphertextB64 string) string {
+	plaintext, err := Decrypt(ciphertextB64)
+	if err != nil {
+		return ciphertextB64
+	}
+	return plaintext
+}
+
+// Decrypt tries to decrypt base64 encoded ciphertext to plaintext.
+func Decrypt(ciphertextB64 string) (string, error) {
+	ciphertext, err := base64.StdEncoding.DecodeString(ciphertextB64)
+	if err != nil {
+		return "", err
+	}
+
+	plaintext, err := decrypt(ciphertext)
+	if err != nil {
+		return "", err
+	}
+	return string(plaintext), nil
+}
+
+var (
+	secretKey, _ = hex.DecodeString("a529b7665997f043a30ac8fadcb51d6aa032c226ab5b7750530b12b8c1a16a48")
+	ivSep        = []byte("@") // ciphertext format: iv + ivSep + encrypted-plaintext
+)
+
+// decrypt decrypts ciphertext to plaintext.
+func decrypt(ciphertext []byte) ([]byte, error) {
+	block, err := aes.NewCipher(secretKey)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(ciphertext) < block.BlockSize()+len(ivSep) {
+		return nil, terror.ErrCiphertextLenNotValid.Generate(block.BlockSize()+len(ivSep), len(ciphertext))
+	}
+
+	if !bytes.Equal(ciphertext[block.BlockSize():block.BlockSize()+len(ivSep)], ivSep) {
+		return nil, terror.ErrCiphertextContextNotValid.Generate()
+	}
+
+	iv := ciphertext[:block.BlockSize()]
+	ciphertext = ciphertext[block.BlockSize()+len(ivSep):]
+	plaintext := make([]byte, len(ciphertext))
+
+	stream := cipher.NewCFBDecrypter(block, iv)
+	stream.XORKeyStream(plaintext, ciphertext)
+
+	return plaintext, nil
+}
diff --git a/sync_diff_inspector/config/dm_test.go b/sync_diff_inspector/config/dm_test.go
new file mode 100644
index 00000000000..83587f6274f
--- /dev/null
+++ b/sync_diff_inspector/config/dm_test.go
@@ -0,0 +1,72 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"github.com/stretchr/testify/require"
+)
+
+func testHandler(w http.ResponseWriter, req *http.Request) {
+	fmt.Fprintln(w, `{"result":true,"cfgs":["import-mode = \"logical\"\nis-sharding = true\nshard-mode = \"pessimistic\"\nonline-ddl-scheme = \"\"\ncase-sensitive = false\nname = \"test\"\nmode = \"all\"\nsource-id = \"mysql-replica-01\"\nserver-id = 0\nflavor = \"\"\nmeta-schema = \"dm_meta\"\nheartbeat-update-interval = 1\nheartbeat-report-interval = 10\nenable-heartbeat = false\ntimezone = \"Asia/Shanghai\"\nrelay-dir = \"\"\nuse-relay = false\nfilter-rules = []\nmydumper-path = \"./bin/mydumper\"\nthreads = 4\nchunk-filesize = \"64\"\nstatement-size = 0\nrows = 0\nwhere = \"\"\nskip-tz-utc = true\nextra-args = \"\"\npool-size = 16\ndir = \"./dumped_data.test\"\nmeta-file = \"\"\nworker-count = 16\nbatch = 100\nqueue-size = 1024\ncheckpoint-flush-interval = 30\nmax-retry = 0\nauto-fix-gtid = false\nenable-gtid = false\ndisable-detect = false\nsafe-mode = false\nenable-ansi-quotes = false\nlog-level = \"\"\nlog-file = \"\"\nlog-format = \"\"\nlog-rotate = \"\"\npprof-addr = \"\"\nstatus-addr = \"\"\nclean-dump-file = true\n\n[from]\n  host = \"127.0.0.1\"\n  port = 3306\n  user = \"root\"\n  password = \"/Q7B9DizNLLTTfiZHv9WoEAKamfpIUs=\"\n  max-allowed-packet = 67108864\n\n[to]\n  host = \"127.0.0.1\"\n  port = 4000\n  user = \"root\"\n  password = \"\"\n  max-allowed-packet = 67108864\n\n[[route-rules]]\n  schema-pattern = \"sharding*\"\n  table-pattern = \"t*\"\n  target-schema = \"db_target\"\n  target-table = \"t_target\"\n\n[[route-rules]]\n  schema-pattern = \"sharding*\"\n  table-pattern = \"\"\n  target-schema = \"db_target\"\n  target-table = \"\"\n\n[block-allow-list]\n  do-dbs = [\"~^sharding[\\\\d]+\"]\n\n  [[block-allow-list.do-tables]]\n    db-name = \"~^sharding[\\\\d]+\"\n    tbl-name = \"~^t[\\\\d]+\"\n","is-sharding = true\nshard-mode = \"pessimistic\"\nonline-ddl-scheme = \"\"\ncase-sensitive = false\nname = \"test\"\nmode = \"all\"\nsource-id = \"mysql-replica-02\"\nserver-id = 0\nflavor = \"\"\nmeta-schema = \"dm_meta\"\nheartbeat-update-interval = 1\nheartbeat-report-interval = 10\nenable-heartbeat = false\ntimezone = \"Asia/Shanghai\"\nrelay-dir = \"\"\nuse-relay = false\nfilter-rules = []\nmydumper-path = \"./bin/mydumper\"\nthreads = 4\nchunk-filesize = \"64\"\nstatement-size = 0\nrows = 0\nwhere = \"\"\nskip-tz-utc = true\nextra-args = \"\"\npool-size = 16\ndir = \"./dumped_data.test\"\nmeta-file = \"\"\nworker-count = 16\nbatch = 100\nqueue-size = 1024\ncheckpoint-flush-interval = 30\nmax-retry = 0\nauto-fix-gtid = false\nenable-gtid = false\ndisable-detect = false\nsafe-mode = false\nenable-ansi-quotes = false\nlog-level = \"\"\nlog-file = \"\"\nlog-format = \"\"\nlog-rotate = \"\"\npprof-addr = \"\"\nstatus-addr = \"\"\nclean-dump-file = true\n\n[from]\n  host = \"127.0.0.1\"\n  port = 3307\n  user = \"root\"\n  password = \"/Q7B9DizNLLTTfiZHv9WoEAKamfpIUs=\"\n  max-allowed-packet = 67108864\n\n[to]\n  host = \"127.0.0.1\"\n  port = 4000\n  user = \"root\"\n  password = \"\"\n  max-allowed-packet = 67108864\n\n[[route-rules]]\n  schema-pattern = \"sharding*\"\n  table-pattern = \"t*\"\n  target-schema = \"db_target\"\n  target-table = \"t_target\"\n\n[[route-rules]]\n  schema-pattern = \"sharding*\"\n  table-pattern = \"\"\n  target-schema = \"db_target\"\n  target-table = \"\"\n\n[block-allow-list]\n  do-dbs = [\"~^sharding[\\\\d]+\"]\n\n  [[block-allow-list.do-tables]]\n    db-name = \"~^sharding[\\\\d]+\"\n    tbl-name = \"~^t[\\\\d]+\"\n"]}`)
+}
+
+func equal(a *DataSource, b *DataSource) bool {
+	return a.Host == b.Host && a.Port == b.Port && a.Password == b.Password && a.User == b.User
+}
+
+func TestGetDMTaskCfg(t *testing.T) {
+	mockServer := httptest.NewServer(http.HandlerFunc(testHandler))
+	defer mockServer.Close()
+
+	dmTaskCfg, err := getDMTaskCfg(mockServer.URL, "test")
+	require.NoError(t, err)
+	require.Equal(t, len(dmTaskCfg), 2)
+	require.Equal(t, dmTaskCfg[0].SourceID, "mysql-replica-01")
+	require.Equal(t, dmTaskCfg[1].SourceID, "mysql-replica-02")
+
+	cfg := NewConfig()
+	cfg.DMAddr = mockServer.URL
+	cfg.DMTask = "test"
+	err = cfg.adjustConfigByDMSubTasks()
+	require.NoError(t, err)
+
+	// after adjust config, will generate source tables for target table
+	require.Equal(t, len(cfg.DataSources), 3)
+	require.True(t, equal(cfg.DataSources["target"], &DataSource{
+		Host:     dmTaskCfg[0].To.Host,
+		Port:     dmTaskCfg[0].To.Port,
+		Password: utils.SecretString(dmTaskCfg[0].To.Password),
+		User:     dmTaskCfg[0].To.User,
+	}))
+
+	require.True(t, equal(cfg.DataSources["mysql-replica-01"], &DataSource{
+		Host:     dmTaskCfg[0].From.Host,
+		Port:     dmTaskCfg[0].From.Port,
+		Password: utils.SecretString(dmTaskCfg[0].From.Password),
+		User:     dmTaskCfg[0].From.User,
+	}))
+
+	require.True(t, equal(cfg.DataSources["mysql-replica-02"], &DataSource{
+		Host:     dmTaskCfg[1].From.Host,
+		Port:     dmTaskCfg[1].From.Port,
+		Password: utils.SecretString(dmTaskCfg[1].From.Password),
+		User:     dmTaskCfg[1].From.User,
+	}))
+}
diff --git a/sync_diff_inspector/config/template.go b/sync_diff_inspector/config/template.go
new file mode 100644
index 00000000000..0296856520e
--- /dev/null
+++ b/sync_diff_inspector/config/template.go
@@ -0,0 +1,124 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package config
+
+import (
+	"fmt"
+
+	"github.com/pingcap/errors"
+)
+
+const (
+	dmConfig = `# Diff Configuration.
+
+######################### Global config #########################
+
+check-thread-count = 4
+
+export-fix-sql = true
+
+check-struct-only = false
+
+dm-addr = "http://127.0.0.1:8261"
+
+dm-task = "test"
+
+######################### Task config #########################
+[task]
+	output-dir = "./output"
+
+	target-check-tables = ["hb_test.*"]
+
+`
+
+	normConfig = `# Diff Configuration.
+
+######################### Global config #########################
+
+check-thread-count = 4
+
+export-fix-sql = true
+
+check-struct-only = false
+
+
+######################### Datasource config #########################
+[data-sources]
+[data-sources.mysql1]
+	host = "127.0.0.1"
+	port = 3306
+	user = "root"
+	password = ""
+
+	route-rules = ["rule1", "rule2"]
+
+[data-sources.tidb0]
+	host = "127.0.0.1"
+	port = 4000
+	user = "root"
+	password = ""
+	# security.ca-path = "..."
+	# security.cert-path = "..."
+	# security.key-path = "..."
+	# snapshot = "386902609362944000"
+
+########################### Routes ###########################
+[routes]
+[routes.rule1]
+schema-pattern = "test_*"
+table-pattern = "t_*"
+target-schema = "test"
+target-table = "t"
+
+[routes.rule2]
+schema-pattern = "test2_*"
+table-pattern = "t2_*"
+target-schema = "test2"
+target-table = "t2"
+
+######################### Task config #########################
+[task]
+	output-dir = "./output"
+
+	source-instances = ["mysql1"]
+
+	target-instance = "tidb0"
+
+	target-check-tables = ["schema*.table*", "!c.*", "test2.t2"]
+
+	target-configs = ["config1"]
+
+######################### Table config #########################
+[table-configs.config1]
+target-tables = ["schema*.test*", "test2.t2"]
+range = "age > 10 AND age < 20"
+index-fields = ["col1","col2"]
+ignore-columns = ["",""]
+chunk-size = 0
+collation = ""
+
+`
+)
+
+func ExportTemplateConfig(configType string) error {
+	switch configType {
+	case "dm", "DM", "Dm", "dM":
+		fmt.Print(dmConfig)
+	case "norm", "normal", "Norm", "Normal":
+		fmt.Print(normConfig)
+	default:
+		return errors.Errorf("Error: unexpect template name: %s\n-T dm: export a dm config\n-T norm: export a normal config\n", configType)
+	}
+	return nil
+}
diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go
new file mode 100644
index 00000000000..6ebc69f65cd
--- /dev/null
+++ b/sync_diff_inspector/diff/diff.go
@@ -0,0 +1,844 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package diff
+
+import (
+	"bytes"
+	"context"
+	"database/sql"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/BurntSushi/toml"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/failpoint"
+	"github.com/pingcap/log"
+	tidbconfig "github.com/pingcap/tidb/pkg/config"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/checkpoints"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
+	"github.com/pingcap/tiflow/sync_diff_inspector/report"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"github.com/siddontang/go/ioutil2"
+	"go.uber.org/zap"
+)
+
+const (
+	// checkpointFile represents the checkpoints' file name which used for save and loads chunks
+	checkpointFile = "sync_diff_checkpoints.pb"
+)
+
+// ChunkDML SQL struct for each chunk
+type ChunkDML struct {
+	node      *checkpoints.Node
+	sqls      []string
+	rowAdd    int
+	rowDelete int
+}
+
+// Diff contains two sql DB, used for comparing.
+type Diff struct {
+	// we may have multiple sources in dm sharding sync.
+	upstream   source.Source
+	downstream source.Source
+
+	// workSource is one of upstream/downstream by some policy in #pickSource.
+	workSource source.Source
+
+	checkThreadCount int
+	splitThreadCount int
+	exportFixSQL     bool
+	sqlWg            sync.WaitGroup
+	checkpointWg     sync.WaitGroup
+
+	FixSQLDir     string
+	CheckpointDir string
+
+	sqlCh      chan *ChunkDML
+	cp         *checkpoints.Checkpoint
+	startRange *splitter.RangeInfo
+	report     *report.Report
+}
+
+// NewDiff returns a Diff instance.
+func NewDiff(ctx context.Context, cfg *config.Config) (diff *Diff, err error) {
+	diff = &Diff{
+		checkThreadCount: cfg.CheckThreadCount,
+		splitThreadCount: cfg.SplitThreadCount,
+		exportFixSQL:     cfg.ExportFixSQL,
+		sqlCh:            make(chan *ChunkDML, splitter.DefaultChannelBuffer),
+		cp:               new(checkpoints.Checkpoint),
+		report:           report.NewReport(&cfg.Task),
+	}
+	if err = diff.init(ctx, cfg); err != nil {
+		diff.Close()
+		return nil, errors.Trace(err)
+	}
+
+	return diff, nil
+}
+
+func (df *Diff) PrintSummary(ctx context.Context) bool {
+	// Stop updating progress bar so that summary won't be flushed.
+	progress.Close()
+	df.report.CalculateTotalSize(ctx, df.downstream.GetDB())
+	err := df.report.CommitSummary()
+	if err != nil {
+		log.Fatal("failed to commit report", zap.Error(err))
+	}
+	df.report.Print(os.Stdout)
+	return df.report.Result == report.Pass
+}
+
+func (df *Diff) Close() {
+	if df.upstream != nil {
+		df.upstream.Close()
+	}
+	if df.downstream != nil {
+		df.downstream.Close()
+	}
+
+	failpoint.Inject("wait-for-checkpoint", func() {
+		log.Info("failpoint wait-for-checkpoint injected, skip delete checkpoint file.")
+		failpoint.Return()
+	})
+
+	if err := os.Remove(filepath.Join(df.CheckpointDir, checkpointFile)); err != nil && !os.IsNotExist(err) {
+		log.Fatal("fail to remove the checkpoint file", zap.String("error", err.Error()))
+	}
+}
+
+func (df *Diff) init(ctx context.Context, cfg *config.Config) (err error) {
+	// TODO adjust config
+	setTiDBCfg()
+
+	df.downstream, df.upstream, err = source.NewSources(ctx, cfg)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	df.workSource = df.pickSource(ctx)
+	df.FixSQLDir = cfg.Task.FixDir
+	df.CheckpointDir = cfg.Task.CheckpointDir
+
+	sourceConfigs, targetConfig, err := getConfigsForReport(cfg)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	df.report.Init(df.downstream.GetTables(), sourceConfigs, targetConfig)
+	if err := df.initCheckpoint(); err != nil {
+		return errors.Trace(err)
+	}
+	return nil
+}
+
+func (df *Diff) initCheckpoint() error {
+	df.cp.Init()
+
+	finishTableNums := 0
+	path := filepath.Join(df.CheckpointDir, checkpointFile)
+	if ioutil2.FileExists(path) {
+		node, reportInfo, err := df.cp.LoadChunk(path)
+		if err != nil {
+			return errors.Annotate(err, "the checkpoint load process failed")
+		} else {
+			// this need not be synchronized, because at the moment, the is only one thread access the section
+			log.Info("load checkpoint",
+				zap.Any("chunk index", node.GetID()),
+				zap.Reflect("chunk", node),
+				zap.String("state", node.GetState()))
+			df.cp.InitCurrentSavedID(node)
+		}
+
+		if node != nil {
+			// remove the sql file that ID bigger than node.
+			// cause we will generate these sql again.
+			err = df.removeSQLFiles(node.GetID())
+			if err != nil {
+				return errors.Trace(err)
+			}
+			df.startRange = splitter.FromNode(node)
+			df.report.LoadReport(reportInfo)
+			finishTableNums = df.startRange.GetTableIndex()
+			if df.startRange.ChunkRange.Type == chunk.Empty {
+				// chunk_iter will skip this table directly
+				finishTableNums++
+			}
+		}
+	} else {
+		log.Info("not found checkpoint file, start from beginning")
+		id := &chunk.ChunkID{TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, ChunkIndex: -1, ChunkCnt: 0}
+		err := df.removeSQLFiles(id)
+		if err != nil {
+			return errors.Trace(err)
+		}
+	}
+	progress.Init(len(df.workSource.GetTables()), finishTableNums)
+	return nil
+}
+
+func encodeReportConfig(config *report.ReportConfig) ([]byte, error) {
+	buf := new(bytes.Buffer)
+	if err := toml.NewEncoder(buf).Encode(config); err != nil {
+		return nil, errors.Trace(err)
+	}
+	return buf.Bytes(), nil
+}
+
+func getConfigsForReport(cfg *config.Config) ([][]byte, []byte, error) {
+	sourceConfigs := make([]*report.ReportConfig, len(cfg.Task.SourceInstances))
+	for i := 0; i < len(cfg.Task.SourceInstances); i++ {
+		instance := cfg.Task.SourceInstances[i]
+
+		sourceConfigs[i] = &report.ReportConfig{
+			Host:     instance.Host,
+			Port:     instance.Port,
+			User:     instance.User,
+			Snapshot: instance.Snapshot,
+			SqlMode:  instance.SqlMode,
+		}
+	}
+	instance := cfg.Task.TargetInstance
+	targetConfig := &report.ReportConfig{
+		Host:     instance.Host,
+		Port:     instance.Port,
+		User:     instance.User,
+		Snapshot: instance.Snapshot,
+		SqlMode:  instance.SqlMode,
+	}
+	sourceBytes := make([][]byte, len(sourceConfigs))
+	var err error
+	for i := range sourceBytes {
+		sourceBytes[i], err = encodeReportConfig(sourceConfigs[i])
+		if err != nil {
+			return nil, nil, errors.Trace(err)
+		}
+	}
+	targetBytes, err := encodeReportConfig(targetConfig)
+	if err != nil {
+		return nil, nil, errors.Trace(err)
+	}
+	return sourceBytes, targetBytes, nil
+}
+
+// Equal tests whether two database have same data and schema.
+func (df *Diff) Equal(ctx context.Context) error {
+	chunksIter, err := df.generateChunksIterator(ctx)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	defer chunksIter.Close()
+	pool := utils.NewWorkerPool(uint(df.checkThreadCount), "consumer")
+	stopCh := make(chan struct{})
+
+	df.checkpointWg.Add(1)
+	go df.handleCheckpoints(ctx, stopCh)
+	df.sqlWg.Add(1)
+	go df.writeSQLs(ctx)
+
+	defer func() {
+		pool.WaitFinished()
+		log.Debug("all consume tasks finished")
+		// close the sql channel
+		close(df.sqlCh)
+		df.sqlWg.Wait()
+		stopCh <- struct{}{}
+		df.checkpointWg.Wait()
+	}()
+
+	for {
+		c, err := chunksIter.Next(ctx)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		if c == nil {
+			// finish read the tables
+			break
+		}
+		log.Info("global consume chunk info", zap.Any("chunk index", c.ChunkRange.Index), zap.Any("chunk bound", c.ChunkRange.Bounds))
+		pool.Apply(func() {
+			isEqual := df.consume(ctx, c)
+			if !isEqual {
+				progress.FailTable(c.ProgressID)
+			}
+			progress.Inc(c.ProgressID)
+		})
+	}
+
+	return nil
+}
+
+func (df *Diff) StructEqual(ctx context.Context) error {
+	tables := df.downstream.GetTables()
+	tableIndex := 0
+	if df.startRange != nil {
+		tableIndex = df.startRange.ChunkRange.Index.TableIndex
+	}
+	for ; tableIndex < len(tables); tableIndex++ {
+		isEqual, isSkip, isAllTableExist := false, true, tables[tableIndex].TableLack
+		if common.AllTableExist(isAllTableExist) {
+			var err error
+			isEqual, isSkip, err = df.compareStruct(ctx, tableIndex)
+			if err != nil {
+				return errors.Trace(err)
+			}
+		}
+		progress.RegisterTable(dbutil.TableName(tables[tableIndex].Schema, tables[tableIndex].Table), !isEqual, isSkip, isAllTableExist)
+		df.report.SetTableStructCheckResult(tables[tableIndex].Schema, tables[tableIndex].Table, isEqual, isSkip, isAllTableExist)
+	}
+	return nil
+}
+
+func (df *Diff) compareStruct(ctx context.Context, tableIndex int) (isEqual bool, isSkip bool, err error) {
+	sourceTableInfos, err := df.upstream.GetSourceStructInfo(ctx, tableIndex)
+	if err != nil {
+		return false, true, errors.Trace(err)
+	}
+	table := df.downstream.GetTables()[tableIndex]
+	isEqual, isSkip = utils.CompareStruct(sourceTableInfos, table.Info)
+	table.IgnoreDataCheck = isSkip
+	return isEqual, isSkip, nil
+}
+
+func (df *Diff) startGCKeeperForTiDB(ctx context.Context, db *sql.DB, snap string) {
+	pdCli, _ := utils.GetPDClientForGC(ctx, db)
+	if pdCli != nil {
+		// Get latest snapshot
+		latestSnap, err := utils.GetSnapshot(ctx, db)
+		if err != nil {
+			log.Info("failed to get snapshot, user should guarantee the GC stopped during diff progress.")
+			return
+		}
+
+		if len(latestSnap) == 1 {
+			if len(snap) == 0 {
+				snap = latestSnap[0]
+			}
+			// compare the snapshot and choose the small one to lock
+			if strings.Compare(latestSnap[0], snap) < 0 {
+				snap = latestSnap[0]
+			}
+		}
+
+		err = utils.StartGCSavepointUpdateService(ctx, pdCli, db, snap)
+		if err != nil {
+			log.Info("failed to keep snapshot, user should guarantee the GC stopped during diff progress.")
+		} else {
+			log.Info("start update service to keep GC stopped automatically")
+		}
+	}
+}
+
+// pickSource pick one proper source to do some work. e.g. generate chunks
+func (df *Diff) pickSource(ctx context.Context) source.Source {
+	workSource := df.downstream
+	if ok, _ := dbutil.IsTiDB(ctx, df.upstream.GetDB()); ok {
+		log.Info("The upstream is TiDB. pick it as work source candidate")
+		df.startGCKeeperForTiDB(ctx, df.upstream.GetDB(), df.upstream.GetSnapshot())
+		workSource = df.upstream
+	}
+	if ok, _ := dbutil.IsTiDB(ctx, df.downstream.GetDB()); ok {
+		log.Info("The downstream is TiDB. pick it as work source first")
+		df.startGCKeeperForTiDB(ctx, df.downstream.GetDB(), df.downstream.GetSnapshot())
+		workSource = df.downstream
+	}
+	return workSource
+}
+
+func (df *Diff) generateChunksIterator(ctx context.Context) (source.RangeIterator, error) {
+	return df.workSource.GetRangeIterator(ctx, df.startRange, df.workSource.GetTableAnalyzer(), df.splitThreadCount)
+}
+
+func (df *Diff) handleCheckpoints(ctx context.Context, stopCh chan struct{}) {
+	// a background goroutine which will insert the verified chunk,
+	// and periodically save checkpoint
+	log.Info("start handleCheckpoint goroutine")
+	defer func() {
+		log.Info("close handleCheckpoint goroutine")
+		df.checkpointWg.Done()
+	}()
+	flush := func() {
+		chunk := df.cp.GetChunkSnapshot()
+		if chunk != nil {
+			tableDiff := df.downstream.GetTables()[chunk.GetTableIndex()]
+			schema, table := tableDiff.Schema, tableDiff.Table
+			r, err := df.report.GetSnapshot(chunk.GetID(), schema, table)
+			if err != nil {
+				log.Warn("fail to save the report", zap.Error(err))
+			}
+			_, err = df.cp.SaveChunk(ctx, filepath.Join(df.CheckpointDir, checkpointFile), chunk, r)
+			if err != nil {
+				log.Warn("fail to save the chunk", zap.Error(err))
+				// maybe we should panic, because SaveChunk method should not failed.
+			}
+		}
+	}
+	defer flush()
+	for {
+		select {
+		case <-ctx.Done():
+			log.Info("Stop do checkpoint by context done")
+			return
+		case <-stopCh:
+			log.Info("Stop do checkpoint")
+			return
+		case <-time.After(10 * time.Second):
+			flush()
+		}
+	}
+}
+
+func (df *Diff) consume(ctx context.Context, rangeInfo *splitter.RangeInfo) bool {
+	dml := &ChunkDML{
+		node: rangeInfo.ToNode(),
+	}
+	defer func() { df.sqlCh <- dml }()
+	tableDiff := df.downstream.GetTables()[rangeInfo.GetTableIndex()]
+	schema, table := tableDiff.Schema, tableDiff.Table
+	id := rangeInfo.ChunkRange.Index
+	if rangeInfo.ChunkRange.Type == chunk.Empty {
+		dml.node.State = checkpoints.IgnoreState
+		// for tables that don't exist upstream or downstream
+		if !common.AllTableExist(tableDiff.TableLack) {
+			upCount := df.upstream.GetCountForLackTable(ctx, rangeInfo)
+			downCount := df.downstream.GetCountForLackTable(ctx, rangeInfo)
+			df.report.SetTableDataCheckResult(schema, table, false, int(upCount), int(downCount), upCount, downCount, id)
+			return false
+		}
+		return true
+	}
+
+	var state string = checkpoints.SuccessState
+
+	isEqual, upCount, downCount, err := df.compareChecksumAndGetCount(ctx, rangeInfo)
+	if err != nil {
+		// If an error occurs during the checksum phase, skip the data compare phase.
+		state = checkpoints.FailedState
+		df.report.SetTableMeetError(schema, table, err)
+	} else if !isEqual && df.exportFixSQL {
+		state = checkpoints.FailedState
+		// if the chunk's checksum differ, try to do binary check
+		info := rangeInfo
+		if upCount > splitter.SplitThreshold {
+			log.Debug("count greater than threshold, start do bingenerate", zap.Any("chunk id", rangeInfo.ChunkRange.Index), zap.Int64("upstream chunk size", upCount))
+			info, err = df.BinGenerate(ctx, df.workSource, rangeInfo, upCount)
+			if err != nil {
+				log.Error("fail to do binary search.", zap.Error(err))
+				df.report.SetTableMeetError(schema, table, err)
+				// reuse rangeInfo to compare data
+				info = rangeInfo
+			} else {
+				log.Debug("bin generate finished", zap.Reflect("chunk", info.ChunkRange), zap.Any("chunk id", info.ChunkRange.Index))
+			}
+		}
+		isDataEqual, err := df.compareRows(ctx, info, dml)
+		if err != nil {
+			df.report.SetTableMeetError(schema, table, err)
+		}
+		isEqual = isDataEqual
+	}
+	dml.node.State = state
+	df.report.SetTableDataCheckResult(schema, table, isEqual, dml.rowAdd, dml.rowDelete, upCount, downCount, id)
+	return isEqual
+}
+
+func (df *Diff) BinGenerate(ctx context.Context, targetSource source.Source, tableRange *splitter.RangeInfo, count int64) (*splitter.RangeInfo, error) {
+	if count <= splitter.SplitThreshold {
+		return tableRange, nil
+	}
+	tableDiff := targetSource.GetTables()[tableRange.GetTableIndex()]
+	indices := dbutil.FindAllIndex(tableDiff.Info)
+	// if no index, do not split
+	if len(indices) == 0 {
+		log.Warn("cannot found an index to split and disable the BinGenerate",
+			zap.String("table", dbutil.TableName(tableDiff.Schema, tableDiff.Table)))
+		return tableRange, nil
+	}
+	var index *model.IndexInfo
+	// using the index
+	for _, i := range indices {
+		if tableRange.IndexID == i.ID {
+			index = i
+			break
+		}
+	}
+	if index == nil {
+		log.Warn("have indices but cannot found a proper index to split and disable the BinGenerate",
+			zap.String("table", dbutil.TableName(tableDiff.Schema, tableDiff.Table)))
+		return tableRange, nil
+	}
+	// TODO use selectivity from utils.GetBetterIndex
+	// only support PK/UK
+	if !(index.Primary || index.Unique) {
+		log.Warn("BinGenerate only support PK/UK")
+		return tableRange, nil
+	}
+
+	log.Debug("index for BinGenerate", zap.String("index", index.Name.O))
+	indexColumns := utils.GetColumnsFromIndex(index, tableDiff.Info)
+	if len(indexColumns) == 0 {
+		log.Warn("fail to get columns of the selected index, directly return the origin chunk")
+		return tableRange, nil
+	}
+
+	return df.binSearch(ctx, targetSource, tableRange, count, tableDiff, indexColumns)
+}
+
+func (df *Diff) binSearch(ctx context.Context, targetSource source.Source, tableRange *splitter.RangeInfo, count int64, tableDiff *common.TableDiff, indexColumns []*model.ColumnInfo) (*splitter.RangeInfo, error) {
+	if count <= splitter.SplitThreshold {
+		return tableRange, nil
+	}
+	var (
+		isEqual1, isEqual2 bool
+		count1, count2     int64
+	)
+	tableRange1 := tableRange.Copy()
+	tableRange2 := tableRange.Copy()
+
+	chunkLimits, args := tableRange.ChunkRange.ToString(tableDiff.Collation)
+	limitRange := fmt.Sprintf("(%s) AND (%s)", chunkLimits, tableDiff.Range)
+	midValues, err := utils.GetApproximateMidBySize(ctx, targetSource.GetDB(), tableDiff.Schema, tableDiff.Table, indexColumns, limitRange, args, count)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	if midValues == nil {
+		// TODO Since the count is from upstream,
+		// the midValues may be empty when downstream has much less rows in this chunk.
+		return tableRange, nil
+	}
+	log.Debug("mid values", zap.Reflect("mid values", midValues), zap.Reflect("indices", indexColumns), zap.Reflect("bounds", tableRange.ChunkRange.Bounds))
+	log.Debug("table ranges", zap.Reflect("original range", tableRange))
+	for i := range indexColumns {
+		log.Debug("update tableRange", zap.String("field", indexColumns[i].Name.O), zap.String("value", midValues[indexColumns[i].Name.O]))
+		tableRange1.Update(indexColumns[i].Name.O, "", midValues[indexColumns[i].Name.O], false, true, tableDiff.Collation, tableDiff.Range)
+		tableRange2.Update(indexColumns[i].Name.O, midValues[indexColumns[i].Name.O], "", true, false, tableDiff.Collation, tableDiff.Range)
+	}
+	log.Debug("table ranges", zap.Reflect("tableRange 1", tableRange1), zap.Reflect("tableRange 2", tableRange2))
+	isEqual1, count1, _, err = df.compareChecksumAndGetCount(ctx, tableRange1)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	isEqual2, count2, _, err = df.compareChecksumAndGetCount(ctx, tableRange2)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	if count1+count2 != count {
+		log.Fatal("the count is not correct",
+			zap.Int64("count1", count1),
+			zap.Int64("count2", count2),
+			zap.Int64("count", count))
+	}
+	log.Info("chunk split successfully",
+		zap.Any("chunk id", tableRange.ChunkRange.Index),
+		zap.Int64("count1", count1),
+		zap.Int64("count2", count2))
+
+	// If there is a count zero, we think the range is very small.
+	if (!isEqual1 && !isEqual2) || (count1 == 0 || count2 == 0) {
+		return tableRange, nil
+	} else if !isEqual1 {
+		c, err := df.binSearch(ctx, targetSource, tableRange1, count1, tableDiff, indexColumns)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		return c, nil
+	} else if !isEqual2 {
+		c, err := df.binSearch(ctx, targetSource, tableRange2, count2, tableDiff, indexColumns)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		return c, nil
+	} else {
+		// TODO: handle the error to foreground
+		log.Fatal("the isEqual1 and isEqual2 cannot be both true")
+		return nil, nil
+	}
+}
+
+func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *splitter.RangeInfo) (bool, int64, int64, error) {
+	var wg sync.WaitGroup
+	var upstreamInfo, downstreamInfo *source.ChecksumInfo
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		upstreamInfo = df.upstream.GetCountAndMd5(ctx, tableRange)
+	}()
+	downstreamInfo = df.downstream.GetCountAndMd5(ctx, tableRange)
+	wg.Wait()
+
+	if upstreamInfo.Err != nil {
+		log.Warn("failed to compare upstream checksum")
+		return false, -1, -1, errors.Trace(upstreamInfo.Err)
+	}
+	if downstreamInfo.Err != nil {
+		log.Warn("failed to compare downstream checksum")
+		return false, -1, -1, errors.Trace(downstreamInfo.Err)
+
+	}
+
+	if upstreamInfo.Count == downstreamInfo.Count && upstreamInfo.Checksum == downstreamInfo.Checksum {
+		return true, upstreamInfo.Count, downstreamInfo.Count, nil
+	}
+	log.Debug("checksum doesn't match", zap.Any("chunk id", tableRange.ChunkRange.Index), zap.String("table", df.workSource.GetTables()[tableRange.GetTableIndex()].Table), zap.Int64("upstream chunk size", upstreamInfo.Count), zap.Int64("downstream chunk size", downstreamInfo.Count), zap.Uint64("upstream checksum", upstreamInfo.Checksum), zap.Uint64("downstream checksum", downstreamInfo.Checksum))
+	return false, upstreamInfo.Count, downstreamInfo.Count, nil
+}
+
+func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo, dml *ChunkDML) (bool, error) {
+	rowsAdd, rowsDelete := 0, 0
+	upstreamRowsIterator, err := df.upstream.GetRowsIterator(ctx, rangeInfo)
+	if err != nil {
+		return false, errors.Trace(err)
+	}
+	defer upstreamRowsIterator.Close()
+	downstreamRowsIterator, err := df.downstream.GetRowsIterator(ctx, rangeInfo)
+	if err != nil {
+		return false, errors.Trace(err)
+	}
+	defer downstreamRowsIterator.Close()
+
+	var lastUpstreamData, lastDownstreamData map[string]*dbutil.ColumnData
+	equal := true
+
+	tableInfo := df.workSource.GetTables()[rangeInfo.GetTableIndex()].Info
+	_, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo)
+	for {
+		if lastUpstreamData == nil {
+			lastUpstreamData, err = upstreamRowsIterator.Next()
+			if err != nil {
+				return false, err
+			}
+		}
+
+		if lastDownstreamData == nil {
+			lastDownstreamData, err = downstreamRowsIterator.Next()
+			if err != nil {
+				return false, err
+			}
+		}
+
+		if lastUpstreamData == nil {
+			// don't have source data, so all the targetRows's data is redundant, should be deleted
+			for lastDownstreamData != nil {
+				sql := df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
+				rowsDelete++
+				log.Debug("[delete]", zap.String("sql", sql))
+
+				dml.sqls = append(dml.sqls, sql)
+				equal = false
+				lastDownstreamData, err = downstreamRowsIterator.Next()
+				if err != nil {
+					return false, err
+				}
+			}
+			break
+		}
+
+		if lastDownstreamData == nil {
+			// target lack some data, should insert the last source datas
+			for lastUpstreamData != nil {
+				sql := df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
+				rowsAdd++
+				log.Debug("[insert]", zap.String("sql", sql))
+
+				dml.sqls = append(dml.sqls, sql)
+				equal = false
+
+				lastUpstreamData, err = upstreamRowsIterator.Next()
+				if err != nil {
+					return false, err
+				}
+			}
+			break
+		}
+
+		eq, cmp, err := utils.CompareData(lastUpstreamData, lastDownstreamData, orderKeyCols, tableInfo.Columns)
+		if err != nil {
+			return false, errors.Trace(err)
+		}
+		if eq {
+			lastDownstreamData = nil
+			lastUpstreamData = nil
+			continue
+		}
+
+		equal = false
+		sql := ""
+
+		switch cmp {
+		case 1:
+			// delete
+			sql = df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
+			rowsDelete++
+			log.Debug("[delete]", zap.String("sql", sql))
+			lastDownstreamData = nil
+		case -1:
+			// insert
+			sql = df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
+			rowsAdd++
+			log.Debug("[insert]", zap.String("sql", sql))
+			lastUpstreamData = nil
+		case 0:
+			// update
+			sql = df.downstream.GenerateFixSQL(source.Replace, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
+			rowsAdd++
+			rowsDelete++
+			log.Debug("[update]", zap.String("sql", sql))
+			lastUpstreamData = nil
+			lastDownstreamData = nil
+		}
+
+		dml.sqls = append(dml.sqls, sql)
+	}
+	dml.rowAdd = rowsAdd
+	dml.rowDelete = rowsDelete
+	return equal, nil
+}
+
+// WriteSQLs write sqls to file
+func (df *Diff) writeSQLs(ctx context.Context) {
+	log.Info("start writeSQLs goroutine")
+	defer func() {
+		log.Info("close writeSQLs goroutine")
+		df.sqlWg.Done()
+	}()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case dml, ok := <-df.sqlCh:
+			if !ok && dml == nil {
+				log.Info("write sql channel closed")
+				return
+			}
+			if len(dml.sqls) > 0 {
+				tableDiff := df.downstream.GetTables()[dml.node.GetTableIndex()]
+				fileName := fmt.Sprintf("%s:%s:%s.sql", tableDiff.Schema, tableDiff.Table, utils.GetSQLFileName(dml.node.GetID()))
+				fixSQLPath := filepath.Join(df.FixSQLDir, fileName)
+				if ok := ioutil2.FileExists(fixSQLPath); ok {
+					// unreachable
+					log.Fatal("write sql failed: repeat sql happen", zap.Strings("sql", dml.sqls))
+				}
+				fixSQLFile, err := os.Create(fixSQLPath)
+				if err != nil {
+					log.Fatal("write sql failed: cannot create file", zap.Strings("sql", dml.sqls), zap.Error(err))
+					continue
+				}
+				// write chunk meta
+				chunkRange := dml.node.ChunkRange
+				fixSQLFile.WriteString(fmt.Sprintf("-- table: %s.%s\n-- %s\n", tableDiff.Schema, tableDiff.Table, chunkRange.ToMeta()))
+				if tableDiff.NeedUnifiedTimeZone {
+					fixSQLFile.WriteString(fmt.Sprintf("set @@session.time_zone = \"%s\";\n", config.UnifiedTimeZone))
+				}
+				for _, sql := range dml.sqls {
+					_, err = fixSQLFile.WriteString(fmt.Sprintf("%s\n", sql))
+					if err != nil {
+						log.Fatal("write sql failed", zap.String("sql", sql), zap.Error(err))
+					}
+				}
+				fixSQLFile.Close()
+			}
+			log.Debug("insert node", zap.Any("chunk index", dml.node.GetID()))
+			df.cp.Insert(dml.node)
+		}
+	}
+}
+
+func (df *Diff) removeSQLFiles(checkPointId *chunk.ChunkID) error {
+	ts := time.Now().Format("2006-01-02T15:04:05Z07:00")
+	dirName := fmt.Sprintf(".trash-%s", ts)
+	folderPath := filepath.Join(df.FixSQLDir, dirName)
+
+	if _, err := os.Stat(folderPath); os.IsNotExist(err) {
+		err = os.MkdirAll(folderPath, os.ModePerm)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		defer os.RemoveAll(folderPath)
+	}
+
+	err := filepath.Walk(df.FixSQLDir, func(path string, f fs.FileInfo, err error) error {
+		if os.IsNotExist(err) {
+			// if path not exists, we should return nil to continue.
+			return nil
+		}
+		if err != nil {
+			return errors.Trace(err)
+		}
+
+		if f == nil || f.IsDir() {
+			return nil
+		}
+
+		name := f.Name()
+		// in mac osx, the path parameter is absolute path; in linux, the path is relative path to execution base dir,
+		// so use Rel to convert to relative path to l.base
+		relPath, _ := filepath.Rel(df.FixSQLDir, path)
+		oldPath := filepath.Join(df.FixSQLDir, relPath)
+		newPath := filepath.Join(folderPath, relPath)
+		if strings.Contains(oldPath, ".trash") {
+			return nil
+		}
+
+		if strings.HasSuffix(name, ".sql") {
+			fileIDStr := strings.TrimRight(name, ".sql")
+			fileIDSubstrs := strings.SplitN(fileIDStr, ":", 3)
+			if len(fileIDSubstrs) != 3 {
+				return nil
+			}
+			tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := utils.GetChunkIDFromSQLFileName(fileIDSubstrs[2])
+			if err != nil {
+				return errors.Trace(err)
+			}
+			fileID := &chunk.ChunkID{
+				TableIndex: tableIndex, BucketIndexLeft: bucketIndexLeft, BucketIndexRight: bucketIndexRight, ChunkIndex: chunkIndex, ChunkCnt: 0,
+			}
+			if err != nil {
+				return errors.Trace(err)
+			}
+			if fileID.Compare(checkPointId) > 0 {
+				// move to trash
+				err = os.Rename(oldPath, newPath)
+				if err != nil {
+					return errors.Trace(err)
+				}
+			}
+		}
+		return nil
+	})
+	if err != nil {
+		return errors.Trace(err)
+	}
+	return nil
+}
+
+func setTiDBCfg() {
+	// to support long index key in TiDB
+	tidbCfg := tidbconfig.GetGlobalConfig()
+	// 3027 * 4 is the max value the MaxIndexLength can be set
+	tidbCfg.MaxIndexLength = tidbconfig.DefMaxOfMaxIndexLength
+	tidbconfig.StoreGlobalConfig(tidbCfg)
+
+	log.Debug("set tidb cfg")
+}
diff --git a/sync_diff_inspector/main.go b/sync_diff_inspector/main.go
new file mode 100644
index 00000000000..761fe3f026e
--- /dev/null
+++ b/sync_diff_inspector/main.go
@@ -0,0 +1,150 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	_ "github.com/go-sql-driver/mysql"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/charset"
+	"github.com/pingcap/tidb/pkg/util"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/diff"
+	flag "github.com/spf13/pflag"
+	"go.uber.org/zap"
+)
+
+func init() {
+	c := &charset.Charset{
+		Name:             "gbk",
+		DefaultCollation: "gbk_chinese_ci",
+		Collations:       map[string]*charset.Collation{},
+		Maxlen:           2,
+	}
+	charset.AddCharset(c)
+	for _, coll := range charset.GetSupportedCollations() {
+		if strings.EqualFold(coll.CharsetName, c.Name) {
+			charset.AddCollation(coll)
+		}
+	}
+}
+
+func main() {
+	cfg := config.NewConfig()
+	err := cfg.Parse(os.Args[1:])
+	switch errors.Cause(err) {
+	case nil:
+	case flag.ErrHelp:
+		os.Exit(0)
+	default:
+		fmt.Printf("Error: %s\n", err.Error())
+		cfg.FlagSet.PrintDefaults()
+		os.Exit(2)
+	}
+
+	if cfg.PrintVersion {
+		fmt.Print(util.GetRawInfo("sync_diff_inspector"))
+		return
+	}
+
+	if cfg.Template != "" {
+		if err := config.ExportTemplateConfig(cfg.Template); err != nil {
+			fmt.Printf("%s\n", err.Error())
+			os.Exit(2)
+		}
+		return
+	}
+
+	conf := new(log.Config)
+	conf.Level = cfg.LogLevel
+
+	conf.File.Filename = filepath.Join(cfg.Task.OutputDir, config.LogFileName)
+	lg, p, e := log.InitLogger(conf)
+	if e != nil {
+		log.Error("Log init failed!", zap.String("error", e.Error()))
+		os.Exit(2)
+	}
+	log.ReplaceGlobals(lg, p)
+
+	util.PrintInfo("sync_diff_inspector")
+
+	// Initial config
+	err = cfg.Init()
+	if err != nil {
+		fmt.Printf("Fail to initialize config.\n%s\n", err.Error())
+		os.Exit(2)
+	}
+
+	ok := cfg.CheckConfig()
+	if !ok {
+		fmt.Printf("There is something wrong with your config, please check log info in %s\n", conf.File.Filename)
+		os.Exit(2)
+	}
+
+	log.Info("", zap.Stringer("config", cfg))
+
+	ctx := context.Background()
+	if !checkSyncState(ctx, cfg) {
+		log.Warn("check failed!!!")
+		os.Exit(1)
+	}
+	log.Info("check pass!!!")
+}
+
+func checkSyncState(ctx context.Context, cfg *config.Config) bool {
+	beginTime := time.Now()
+	defer func() {
+		log.Info("check data finished", zap.Duration("cost", time.Since(beginTime)))
+	}()
+
+	d, err := diff.NewDiff(ctx, cfg)
+	if err != nil {
+		fmt.Printf("An error occured while initializing diff: %s, please check log info in %s for full details\n",
+			err, filepath.Join(cfg.Task.OutputDir, config.LogFileName))
+		log.Fatal("failed to initialize diff process", zap.Error(err))
+		return false
+	}
+	defer d.Close()
+
+	if !cfg.CheckDataOnly {
+		err = d.StructEqual(ctx)
+		if err != nil {
+			fmt.Printf("An error occured while comparing table structure: %s, please check log info in %s for full details\n",
+				err, filepath.Join(cfg.Task.OutputDir, config.LogFileName))
+			log.Fatal("failed to check structure difference", zap.Error(err))
+			return false
+		}
+	} else {
+		log.Info("Check table data only, skip struct check")
+	}
+	if !cfg.CheckStructOnly {
+		err = d.Equal(ctx)
+		if err != nil {
+			fmt.Printf("An error occured while comparing table data: %s, please check log info in %s for full details\n",
+				err, filepath.Join(cfg.Task.OutputDir, config.LogFileName))
+			log.Fatal("failed to check data difference", zap.Error(err))
+			return false
+		}
+	} else {
+		log.Info("Check table struct only, skip data check")
+	}
+	return d.PrintSummary(ctx)
+}
diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go
new file mode 100644
index 00000000000..b559ac85937
--- /dev/null
+++ b/sync_diff_inspector/progress/progress.go
@@ -0,0 +1,480 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package progress
+
+import (
+	"container/list"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+)
+
+type TableProgressPrinter struct {
+	tableList     *list.List
+	tableFailList *list.List
+	tableMap      map[string]*list.Element
+	output        io.Writer
+	lines         int
+
+	progressTableNums int
+	finishTableNums   int
+	tableNums         int
+
+	progress int
+	total    int
+
+	optCh    chan Operator
+	finishCh chan struct{}
+}
+
+type table_state_t int
+
+const (
+	TABLE_STATE_REGISTER                       table_state_t = 0x1
+	TABLE_STATE_PRESTART                       table_state_t = 0x2
+	TABLE_STATE_COMPARING                      table_state_t = 0x4
+	TABLE_STATE_FINISH                         table_state_t = 0x8
+	TABLE_STATE_RESULT_OK                      table_state_t = 0x00
+	TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE     table_state_t = 0x10
+	TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE table_state_t = 0x20
+	TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS     table_state_t = 0x40
+	TABLE_STATE_RESULT_DIFFERENT               table_state_t = 0x80
+	TABLE_STATE_HEAD                           table_state_t = 0xff
+	TABLE_STATE_RESULT_MASK                    table_state_t = 0xff0
+	TABLE_STATE_NOT_EXSIT_UPSTREAM             table_state_t = 0x100
+	TABLE_STATE_NOT_EXSIT_DOWNSTREAM           table_state_t = 0x200
+)
+
+type TableProgress struct {
+	name            string
+	progress        int
+	total           int
+	state           table_state_t
+	totalStopUpdate bool
+}
+
+type progress_opt_t int
+
+const (
+	PROGRESS_OPT_INC progress_opt_t = iota
+	PROGRESS_OPT_UPDATE
+	PROGRESS_OPT_REGISTER
+	PROGRESS_OPT_START
+	PROGRESS_OPT_FAIL
+	PROGRESS_OPT_CLOSE
+	PROGRESS_OPT_ERROR
+)
+
+type Operator struct {
+	optType         progress_opt_t
+	name            string
+	total           int
+	state           table_state_t
+	totalStopUpdate bool
+}
+
+func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressPrinter {
+	tpp := &TableProgressPrinter{
+		tableList:     list.New(),
+		tableFailList: list.New(),
+		tableMap:      make(map[string]*list.Element),
+		lines:         0,
+
+		progressTableNums: 0,
+		finishTableNums:   finishTableNums,
+		tableNums:         tableNums,
+
+		progress: 0,
+		total:    0,
+
+		optCh:    make(chan Operator, 16),
+		finishCh: make(chan struct{}),
+	}
+	tpp.init()
+	go tpp.serve()
+	fmt.Fprintf(tpp.output, "A total of %d tables need to be compared\n\n\n", tableNums)
+	return tpp
+}
+
+func (tpp *TableProgressPrinter) SetOutput(output io.Writer) {
+	tpp.output = output
+}
+
+func (tpp *TableProgressPrinter) Inc(name string) {
+	tpp.optCh <- Operator{
+		optType: PROGRESS_OPT_INC,
+		name:    name,
+	}
+}
+
+func (tpp *TableProgressPrinter) UpdateTotal(name string, total int, stopUpdate bool) {
+	tpp.optCh <- Operator{
+		optType:         PROGRESS_OPT_UPDATE,
+		name:            name,
+		total:           total,
+		totalStopUpdate: stopUpdate,
+	}
+}
+
+func (tpp *TableProgressPrinter) RegisterTable(name string, isFailed bool, isDone bool, isExist int) {
+	var state table_state_t
+	if isFailed {
+		if isDone {
+			switch isExist {
+			case common.UpstreamTableLackFlag:
+				state = TABLE_STATE_NOT_EXSIT_UPSTREAM | TABLE_STATE_REGISTER
+			case common.DownstreamTableLackFlag:
+				state = TABLE_STATE_NOT_EXSIT_DOWNSTREAM | TABLE_STATE_REGISTER
+			default:
+				state = TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE | TABLE_STATE_REGISTER
+			}
+		} else {
+			state = TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE | TABLE_STATE_REGISTER
+		}
+	} else {
+		state = TABLE_STATE_REGISTER
+	}
+	tpp.optCh <- Operator{
+		optType: PROGRESS_OPT_REGISTER,
+		name:    name,
+		state:   state,
+	}
+}
+
+func (tpp *TableProgressPrinter) StartTable(name string, total int, stopUpdate bool) {
+	tpp.optCh <- Operator{
+		optType:         PROGRESS_OPT_START,
+		name:            name,
+		total:           total,
+		state:           TABLE_STATE_PRESTART,
+		totalStopUpdate: stopUpdate,
+	}
+}
+
+func (tpp *TableProgressPrinter) FailTable(name string) {
+	tpp.optCh <- Operator{
+		optType: PROGRESS_OPT_FAIL,
+		name:    name,
+		state:   TABLE_STATE_RESULT_DIFFERENT,
+	}
+}
+
+func (tpp *TableProgressPrinter) Close() {
+	tpp.optCh <- Operator{
+		optType: PROGRESS_OPT_CLOSE,
+	}
+	<-tpp.finishCh
+}
+
+func (tpp *TableProgressPrinter) PrintSummary() {
+	var cleanStr, fixStr string
+	cleanStr = "\x1b[1A\x1b[J"
+	fixStr = "\nSummary:\n\n"
+	if tpp.tableFailList.Len() == 0 {
+		fixStr = fmt.Sprintf(
+			"%sA total of %d tables have been compared and all are equal.\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n",
+			fixStr,
+			tpp.tableNums,
+		)
+	} else {
+		SkippedNum := 0
+		for p := tpp.tableFailList.Front(); p != nil; p = p.Next() {
+			tp := p.Value.(*TableProgress)
+			if tp.state&(TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE|TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE) != 0 {
+				fixStr = fmt.Sprintf("%sThe structure of %s is not equal.\n", fixStr, tp.name)
+			}
+			if tp.state&(TABLE_STATE_RESULT_DIFFERENT) != 0 {
+				fixStr = fmt.Sprintf("%sThe data of %s is not equal.\n", fixStr, tp.name)
+			}
+			if tp.state&(TABLE_STATE_NOT_EXSIT_DOWNSTREAM) != 0 {
+				fixStr = fmt.Sprintf("%sThe data of %s does not exist in downstream database.\n", fixStr, tp.name)
+				SkippedNum++
+			}
+			if tp.state&(TABLE_STATE_NOT_EXSIT_UPSTREAM) != 0 {
+				fixStr = fmt.Sprintf("%sThe data of %s does not exist in upstream database.\n", fixStr, tp.name)
+				SkippedNum++
+			}
+		}
+		fixStr = fmt.Sprintf(
+			"%s\nThe rest of the tables are all equal.\nA total of %d tables have been compared, %d tables finished, %d tables failed, %d tables skipped.\nThe patch file has been generated to './output_dir/patch.sql'\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n",
+			fixStr, tpp.tableNums, tpp.tableNums-tpp.tableFailList.Len(), tpp.tableFailList.Len()-SkippedNum, SkippedNum,
+		)
+	}
+
+	fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr)
+
+}
+
+func (tpp *TableProgressPrinter) Error(err error) {
+	tpp.optCh <- Operator{
+		optType: PROGRESS_OPT_ERROR,
+	}
+	<-tpp.finishCh
+	var cleanStr, fixStr string
+	cleanStr = "\x1b[1A\x1b[J"
+	fixStr = fmt.Sprintf("\nError in comparison process:\n%v\n\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n", err)
+	fmt.Fprintf(tpp.output, "%s%s", cleanStr, fixStr)
+}
+
+func (tpp *TableProgressPrinter) init() {
+	tpp.tableList.PushBack(&TableProgress{
+		state: TABLE_STATE_HEAD,
+	})
+
+	tpp.output = os.Stdout
+}
+
+func (tpp *TableProgressPrinter) serve() {
+	tick := time.NewTicker(200 * time.Millisecond)
+
+	for {
+		select {
+		case <-tick.C:
+			tpp.flush(false)
+		case opt := <-tpp.optCh:
+			switch opt.optType {
+			case PROGRESS_OPT_CLOSE:
+				tpp.flush(false)
+				tpp.finishCh <- struct{}{}
+				return
+			case PROGRESS_OPT_ERROR:
+				tpp.finishCh <- struct{}{}
+				return
+			case PROGRESS_OPT_INC:
+				if e, ok := tpp.tableMap[opt.name]; ok {
+					tp := e.Value.(*TableProgress)
+					tp.progress++
+					tpp.progress++
+					if tp.progress >= tp.total && tp.totalStopUpdate {
+						tp.state = (tp.state & TABLE_STATE_RESULT_MASK) | TABLE_STATE_FINISH
+						tpp.progress -= tp.progress
+						tpp.total -= tp.total
+						delete(tpp.tableMap, opt.name)
+						tpp.flush(true)
+					}
+				}
+			case PROGRESS_OPT_REGISTER:
+				if _, ok := tpp.tableMap[opt.name]; !ok {
+					e := tpp.tableList.PushBack(&TableProgress{
+						name:            opt.name,
+						progress:        0,
+						total:           opt.total,
+						state:           opt.state,
+						totalStopUpdate: opt.totalStopUpdate,
+					})
+					tpp.tableMap[opt.name] = e
+				}
+			case PROGRESS_OPT_START:
+				e, ok := tpp.tableMap[opt.name]
+				if !ok {
+					e = tpp.tableList.PushBack(&TableProgress{
+						name:            opt.name,
+						progress:        0,
+						total:           opt.total,
+						state:           opt.state | TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS,
+						totalStopUpdate: opt.totalStopUpdate,
+					})
+					tpp.tableMap[opt.name] = e
+				} else {
+					tp := e.Value.(*TableProgress)
+					tp.state ^= TABLE_STATE_REGISTER | opt.state
+					tp.progress = 0
+					tp.total = opt.total
+					tp.totalStopUpdate = opt.totalStopUpdate
+				}
+				if e.Value.(*TableProgress).state&TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE == 0 {
+					tpp.total += opt.total
+				} else {
+					delete(tpp.tableMap, opt.name)
+				}
+				tpp.flush(true)
+			case PROGRESS_OPT_UPDATE:
+				if e, ok := tpp.tableMap[opt.name]; ok {
+					tp := e.Value.(*TableProgress)
+					tpp.total += opt.total
+					tp.total += opt.total
+					tp.totalStopUpdate = opt.totalStopUpdate
+				}
+			case PROGRESS_OPT_FAIL:
+				if e, ok := tpp.tableMap[opt.name]; ok {
+					tp := e.Value.(*TableProgress)
+					tp.state |= opt.state
+					// continue to increment chunk
+				}
+			}
+		}
+	}
+}
+
+// flush flush info
+func (tpp *TableProgressPrinter) flush(stateIsChanged bool) {
+	/*
+	 * A total of 15 tables need to be compared
+	 *
+	 * Comparing the table structure of `schema1.table1` ... equivalent
+	 * Comparing the table data of `schema1.table1` ... equivalent
+	 * Comparing the table structure of `schema2.table2` ... equivalent
+	 * Comparing the table data of `schema2.table2` ...
+	 * _____________________________________________________________________________
+	 * Progress [===================>-----------------------------------------] 35%
+	 *
+	 */
+
+	if stateIsChanged {
+		var cleanStr, fixStr, dynStr string
+		cleanStr = fmt.Sprintf("\x1b[%dA\x1b[J", tpp.lines)
+		tpp.lines = 2
+		/* PRESTART/COMPARING/FINISH OK/DIFFERENT */
+		for p := tpp.tableList.Front(); p != nil; p = p.Next() {
+			tp := p.Value.(*TableProgress)
+			// There are 5 situations:
+			// 1. structure is same and data is same
+			// 2. structure is same and data is different
+			// 3. structure is different and we won't compare data
+			// 4. structure is different and data is same
+			// 5. structure is different and data is different
+			switch tp.state & 0xf {
+			case TABLE_STATE_PRESTART:
+				switch tp.state & TABLE_STATE_RESULT_MASK {
+				case TABLE_STATE_RESULT_OK:
+					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... equivalent\n", fixStr, tp.name)
+					dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
+					tpp.lines++
+					tpp.progressTableNums++
+					tp.state = TABLE_STATE_COMPARING
+				case TABLE_STATE_NOT_EXSIT_UPSTREAM, TABLE_STATE_NOT_EXSIT_DOWNSTREAM:
+					dynStr = fmt.Sprintf("%sComparing the table data of %s ...skipped\n", dynStr, tp.name)
+					tpp.tableFailList.PushBack(tp)
+					preNode := p.Prev()
+					tpp.tableList.Remove(p)
+					p = preNode
+					tpp.finishTableNums++
+				case TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE:
+					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name)
+					tpp.tableFailList.PushBack(tp)
+					// we have empty node as list head, so p is not nil
+					preNode := p.Prev()
+					tpp.tableList.Remove(p)
+					p = preNode
+					tpp.finishTableNums++
+				case TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE:
+					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name)
+					dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
+					tpp.lines++
+					tpp.progressTableNums++
+					tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART
+				case TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS:
+					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... skip\n", fixStr, tp.name)
+					dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
+					tpp.lines++
+					tpp.progressTableNums++
+					tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART
+				}
+			case TABLE_STATE_COMPARING:
+				dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
+				tpp.lines++
+			case TABLE_STATE_FINISH:
+				if tp.state&TABLE_STATE_RESULT_DIFFERENT == 0 {
+					fixStr = fmt.Sprintf("%sComparing the table data of %s ... equivalent\n", fixStr, tp.name)
+				} else {
+					fixStr = fmt.Sprintf("%sComparing the table data of %s ... failure\n", fixStr, tp.name)
+				}
+				if tp.state&TABLE_STATE_RESULT_MASK != 0 {
+					tpp.tableFailList.PushBack(tp)
+				}
+				// we have empty node as list head, so p is not nil
+				preNode := p.Prev()
+				tpp.tableList.Remove(p)
+				p = preNode
+				tpp.progressTableNums--
+				tpp.finishTableNums++
+			}
+		}
+
+		dynStr = fmt.Sprintf("%s_____________________________________________________________________________\n", dynStr)
+		fmt.Fprintf(tpp.output, "%s%s%s", cleanStr, fixStr, dynStr)
+	} else {
+		fmt.Fprint(tpp.output, "\x1b[1A\x1b[J")
+	}
+	// show bar
+	// 60 '='+'-'
+	coe := float32(tpp.progressTableNums*tpp.progress)/float32(tpp.tableNums*(tpp.total+1)) + float32(tpp.finishTableNums)/float32(tpp.tableNums)
+	numLeft := int(60 * coe)
+	percent := int(100 * coe)
+	fmt.Fprintf(tpp.output, "Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total)
+}
+
+var progress_ *TableProgressPrinter = nil
+
+func Init(tableNums, finishTableNums int) {
+	progress_ = NewTableProgressPrinter(tableNums, finishTableNums)
+}
+
+func Inc(name string) {
+	if progress_ != nil {
+		progress_.Inc(name)
+	}
+}
+
+func UpdateTotal(name string, total int, stopUpdate bool) {
+	if progress_ != nil {
+		progress_.UpdateTotal(name, total, stopUpdate)
+	}
+}
+
+func RegisterTable(name string, isFailed bool, isDone bool, isExist int) {
+	if progress_ != nil {
+		progress_.RegisterTable(name, isFailed, isDone, isExist)
+	}
+}
+
+func StartTable(name string, total int, stopUpdate bool) {
+	if progress_ != nil {
+		progress_.StartTable(name, total, stopUpdate)
+	}
+}
+
+func FailTable(name string) {
+	if progress_ != nil {
+		progress_.FailTable(name)
+	}
+}
+
+func Close() {
+	if progress_ != nil {
+		progress_.Close()
+	}
+}
+
+func PrintSummary() {
+	if progress_ != nil {
+		progress_.PrintSummary()
+	}
+}
+
+func Error(err error) {
+	if progress_ != nil {
+		progress_.Error(err)
+	}
+}
+
+func SetOutput(output io.Writer) {
+	if progress_ != nil {
+		progress_.SetOutput(output)
+	}
+}
diff --git a/sync_diff_inspector/progress/progress_test.go b/sync_diff_inspector/progress/progress_test.go
new file mode 100644
index 00000000000..7393f93c022
--- /dev/null
+++ b/sync_diff_inspector/progress/progress_test.go
@@ -0,0 +1,108 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package progress
+
+import (
+	"bytes"
+	"errors"
+	"testing"
+	"time"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/stretchr/testify/require"
+)
+
+func TestProgress(t *testing.T) {
+	p := NewTableProgressPrinter(6, 0)
+	p.RegisterTable("1", true, true, common.AllTableExistFlag)
+	p.StartTable("1", 50, true)
+	p.RegisterTable("2", true, false, common.AllTableExistFlag)
+	p.StartTable("2", 2, true)
+	p.Inc("2")
+	p.RegisterTable("3", false, false, common.AllTableExistFlag)
+	p.StartTable("3", 1, false)
+	p.Inc("2")
+	p.Inc("3")
+	p.UpdateTotal("3", 1, true)
+	p.Inc("3")
+	p.StartTable("4", 1, true)
+	p.FailTable("4")
+	p.Inc("3")
+	p.Inc("4")
+	p.RegisterTable("5", true, true, common.UpstreamTableLackFlag)
+	p.StartTable("5", 1, true)
+	p.RegisterTable("6", true, true, common.DownstreamTableLackFlag)
+	p.StartTable("6", 1, true)
+	time.Sleep(500 * time.Millisecond)
+	p.Close()
+	buffer := new(bytes.Buffer)
+	p.SetOutput(buffer)
+	p.PrintSummary()
+	require.Equal(
+		t,
+		"\x1b[1A\x1b[J\nSummary:\n\nThe structure of 1 is not equal.\nThe structure of 2 is not equal.\nThe data of 4 is not equal.\nThe data of 5 does not exist in upstream database.\nThe data of 6 does not exist in downstream database.\n"+
+			"\nThe rest of the tables are all equal.\nA total of 6 tables have been compared, 1 tables finished, 3 tables failed, 2 tables skipped.\nThe patch file has been generated to './output_dir/patch.sql'\n"+
+			"You can view the comparison details through './output_dir/sync_diff_inspector.log'\n\n",
+		buffer.String(),
+	)
+}
+
+func TestTableError(t *testing.T) {
+	p := NewTableProgressPrinter(4, 0)
+	p.RegisterTable("1", true, true, common.AllTableExistFlag)
+	p.StartTable("1", 50, true)
+	p.RegisterTable("2", true, true, common.AllTableExistFlag)
+	p.StartTable("2", 1, true)
+	p.RegisterTable("3", true, true, common.DownstreamTableLackFlag)
+	p.StartTable("3", 1, true)
+
+	p.Inc("2")
+	buffer := new(bytes.Buffer)
+	p.SetOutput(buffer)
+	p.Error(errors.New("[aaa]"))
+	time.Sleep(500 * time.Millisecond)
+	require.Equal(
+		t,
+		"\x1b[0A\x1b[JComparing the table structure of 1 ... failure\n"+
+			"_____________________________________________________________________________\n"+
+			"Progress [===============>---------------------------------------------] 25% 0/0\n"+
+			"\x1b[2A\x1b[JComparing the table structure of 2 ... failure\n"+
+			"_____________________________________________________________________________\n"+
+			"Progress [==============================>------------------------------] 50% 0/0\n"+
+			"\x1b[2A\x1b[JComparing the table data of 3 ...skipped\n"+
+			"_____________________________________________________________________________\n"+
+			"Progress [=============================================>---------------] 75% 0/1\n"+
+			"\x1b[1A\x1b[J\nError in comparison process:\n[aaa]\n\n"+
+			"You can view the comparison details through './output_dir/sync_diff_inspector.log'\n",
+		buffer.String(),
+	)
+}
+
+func TestAllSuccess(t *testing.T) {
+	Init(2, 0)
+	RegisterTable("1", false, false, common.AllTableExistFlag)
+	StartTable("1", 1, true)
+	RegisterTable("2", false, false, common.AllTableExistFlag)
+	StartTable("2", 1, true)
+	Inc("1")
+	Inc("2")
+	Close()
+	buf := new(bytes.Buffer)
+	SetOutput(buf)
+	PrintSummary()
+	require.Equal(t, buf.String(), "\x1b[1A\x1b[J\nSummary:\n\n"+
+		"A total of 2 tables have been compared and all are equal.\n"+
+		"You can view the comparison details through './output_dir/sync_diff_inspector.log'\n\n",
+	)
+}
diff --git a/sync_diff_inspector/report/report.go b/sync_diff_inspector/report/report.go
new file mode 100644
index 00000000000..ba58878e8a5
--- /dev/null
+++ b/sync_diff_inspector/report/report.go
@@ -0,0 +1,419 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package report
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/olekukonko/tablewriter"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+const (
+	// Pass means all data and struct of tables are equal
+	Pass = "pass"
+	// Fail means not all data or struct of tables are equal
+	Fail  = "fail"
+	Error = "error"
+)
+
+// ReportConfig stores the config information for the user
+type ReportConfig struct {
+	Host     string `toml:"host"`
+	Port     int    `toml:"port"`
+	User     string `toml:"user"`
+	Snapshot string `toml:"snapshot,omitempty"`
+	SqlMode  string `toml:"sql-mode,omitempty"`
+}
+
+// TableResult saves the check result for every table.
+type TableResult struct {
+	Schema      string                  `json:"schema"`
+	Table       string                  `json:"table"`
+	StructEqual bool                    `json:"struct-equal"`
+	DataSkip    bool                    `json:"data-skip"`
+	DataEqual   bool                    `json:"data-equal"`
+	MeetError   error                   `json:"-"`
+	ChunkMap    map[string]*ChunkResult `json:"chunk-result"` // `ChunkMap` stores the `ChunkResult` of each chunk of the table
+	UpCount     int64                   `json:"up-count"`     // `UpCount` is the number of rows in the table from upstream
+	DownCount   int64                   `json:"down-count"`   // `DownCount` is the number of rows in the table from downstream
+	TableLack   int                     `json:"table-lack"`
+}
+
+// ChunkResult save the necessarily information to provide summary information
+type ChunkResult struct {
+	RowsAdd    int `json:"rows-add"`    // `RowsAdd` is the number of rows needed to add
+	RowsDelete int `json:"rows-delete"` // `RowsDelete` is the number of rows needed to delete
+}
+
+// Report saves the check results.
+type Report struct {
+	sync.RWMutex
+	Result       string                             `json:"-"`             // Result is pass or fail
+	PassNum      int32                              `json:"-"`             // The pass number of tables
+	FailedNum    int32                              `json:"-"`             // The failed number of tables
+	SkippedNum   int32                              `json:"-"`             // The skipped number of tables
+	TableResults map[string]map[string]*TableResult `json:"table-results"` // TableResult saved the map of  `schema` => `table` => `tableResult`
+	StartTime    time.Time                          `json:"start-time"`
+	Duration     time.Duration                      `json:"time-duration"`
+	TotalSize    int64                              `json:"-"` // Total size of the checked tables
+	SourceConfig [][]byte                           `json:"-"`
+	TargetConfig []byte                             `json:"-"`
+
+	task *config.TaskConfig `json:"-"`
+}
+
+// LoadReport loads the report from the checkpoint
+func (r *Report) LoadReport(reportInfo *Report) {
+	r.StartTime = time.Now()
+	r.Duration = reportInfo.Duration
+	r.TotalSize = reportInfo.TotalSize
+	for schema, tableMap := range reportInfo.TableResults {
+		if _, ok := r.TableResults[schema]; !ok {
+			r.TableResults[schema] = make(map[string]*TableResult)
+		}
+		for table, result := range tableMap {
+			r.TableResults[schema][table] = result
+		}
+	}
+}
+
+func (r *Report) getSortedTables() [][]string {
+	equalTables := make([][]string, 0)
+	for schema, tableMap := range r.TableResults {
+		for table, result := range tableMap {
+			if result.StructEqual && result.DataEqual {
+				equalRow := make([]string, 0, 3)
+				equalRow = append(equalRow, dbutil.TableName(schema, table))
+				equalRow = append(equalRow, strconv.FormatInt(result.UpCount, 10))
+				equalRow = append(equalRow, strconv.FormatInt(result.DownCount, 10))
+				equalTables = append(equalTables, equalRow)
+			}
+		}
+	}
+	sort.Slice(equalTables, func(i, j int) bool { return equalTables[i][0] < equalTables[j][0] })
+	return equalTables
+}
+
+func (r *Report) getDiffRows() [][]string {
+	diffRows := make([][]string, 0)
+	for schema, tableMap := range r.TableResults {
+		for table, result := range tableMap {
+			if result.StructEqual && result.DataEqual {
+				continue
+			}
+			diffRow := make([]string, 0)
+			diffRow = append(diffRow, dbutil.TableName(schema, table))
+			if !common.AllTableExist(result.TableLack) {
+				diffRow = append(diffRow, "skipped")
+			} else {
+				diffRow = append(diffRow, "succeed")
+			}
+			if !result.StructEqual {
+				diffRow = append(diffRow, "false")
+			} else {
+				diffRow = append(diffRow, "true")
+			}
+			rowsAdd, rowsDelete := 0, 0
+			for _, chunkResult := range result.ChunkMap {
+				rowsAdd += chunkResult.RowsAdd
+				rowsDelete += chunkResult.RowsDelete
+			}
+			diffRow = append(diffRow, fmt.Sprintf("+%d/-%d", rowsAdd, rowsDelete), strconv.FormatInt(result.UpCount, 10), strconv.FormatInt(result.DownCount, 10))
+			diffRows = append(diffRows, diffRow)
+		}
+	}
+	return diffRows
+}
+
+// CalculateTotalSize calculate the total size of all the checked tables
+// Notice, user should run the analyze table first, when some of tables' size are zero.
+func (r *Report) CalculateTotalSize(ctx context.Context, db *sql.DB) {
+	for schema, tableMap := range r.TableResults {
+		for table := range tableMap {
+			size, err := utils.GetTableSize(ctx, db, schema, table)
+			if size == 0 || err != nil {
+				log.Warn("fail to get the correct size of table, if you want to get the correct size, please analyze the corresponding tables", zap.String("table", dbutil.TableName(schema, table)), zap.Error(err))
+			} else {
+				r.TotalSize += size
+			}
+		}
+	}
+}
+
+// CommitSummary commit summary info
+func (r *Report) CommitSummary() error {
+	passNum, failedNum, skippedNum := int32(0), int32(0), int32(0)
+	for _, tableMap := range r.TableResults {
+		for _, result := range tableMap {
+			if result.StructEqual && result.DataEqual {
+				passNum++
+			} else if !common.AllTableExist(result.TableLack) {
+				skippedNum++
+			} else {
+				failedNum++
+			}
+		}
+	}
+	r.PassNum = passNum
+	r.FailedNum = failedNum
+	r.SkippedNum = skippedNum
+	summaryPath := filepath.Join(r.task.OutputDir, "summary.txt")
+	summaryFile, err := os.Create(summaryPath)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	defer summaryFile.Close()
+	summaryFile.WriteString("Summary\n\n\n\n")
+	summaryFile.WriteString("Source Database\n\n\n\n")
+	for i := 0; i < len(r.SourceConfig); i++ {
+		summaryFile.Write(r.SourceConfig[i])
+		summaryFile.WriteString("\n")
+	}
+	summaryFile.WriteString("Target Databases\n\n\n\n")
+	summaryFile.Write(r.TargetConfig)
+	summaryFile.WriteString("\n")
+
+	summaryFile.WriteString("Comparison Result\n\n\n\n")
+	summaryFile.WriteString("The table structure and data in following tables are equivalent\n\n")
+	equalTables := r.getSortedTables()
+	if len(equalTables) > 0 {
+		tableString := &strings.Builder{}
+		table := tablewriter.NewWriter(tableString)
+		table.SetHeader([]string{"Table", "UpCount", "DownCount"})
+		for _, v := range equalTables {
+			table.Append(v)
+		}
+		table.Render()
+		summaryFile.WriteString(tableString.String())
+		summaryFile.WriteString("\n\n")
+	}
+	if r.Result == Fail || r.SkippedNum != 0 {
+		summaryFile.WriteString("The following tables contains inconsistent data\n\n")
+		tableString := &strings.Builder{}
+		table := tablewriter.NewWriter(tableString)
+		table.SetHeader([]string{"Table", "Result", "Structure equality", "Data diff rows", "UpCount", "DownCount"})
+		diffRows := r.getDiffRows()
+		for _, v := range diffRows {
+			table.Append(v)
+		}
+		table.Render()
+		summaryFile.WriteString(tableString.String())
+	}
+	duration := r.Duration + time.Since(r.StartTime)
+	summaryFile.WriteString(fmt.Sprintf("\nTime Cost: %s\n", duration))
+	summaryFile.WriteString(fmt.Sprintf("Average Speed: %fMB/s\n", float64(r.TotalSize)/(1024.0*1024.0*duration.Seconds())))
+	return nil
+}
+
+func (r *Report) Print(w io.Writer) error {
+	var summary strings.Builder
+	if r.Result == Pass && r.SkippedNum == 0 {
+		summary.WriteString(fmt.Sprintf("A total of %d table have been compared and all are equal.\n", r.FailedNum+r.PassNum+r.SkippedNum))
+		summary.WriteString(fmt.Sprintf("You can view the comparison details through '%s/%s'\n", r.task.OutputDir, config.LogFileName))
+	} else if r.Result == Fail || r.SkippedNum != 0 {
+		for schema, tableMap := range r.TableResults {
+			for table, result := range tableMap {
+				if !result.StructEqual {
+					if result.DataSkip {
+						switch result.TableLack {
+						case common.UpstreamTableLackFlag:
+							summary.WriteString(fmt.Sprintf("The data of %s does not exist in upstream database\n", dbutil.TableName(schema, table)))
+						case common.DownstreamTableLackFlag:
+							summary.WriteString(fmt.Sprintf("The data of %s does not exist in downstream database\n", dbutil.TableName(schema, table)))
+						default:
+							summary.WriteString(fmt.Sprintf("The structure of %s is not equal, and data-check is skipped\n", dbutil.TableName(schema, table)))
+						}
+					} else {
+						summary.WriteString(fmt.Sprintf("The structure of %s is not equal\n", dbutil.TableName(schema, table)))
+					}
+				}
+				if !result.DataEqual && common.AllTableExist(result.TableLack) {
+					summary.WriteString(fmt.Sprintf("The data of %s is not equal\n", dbutil.TableName(schema, table)))
+				}
+			}
+		}
+		summary.WriteString("\n")
+		summary.WriteString("The rest of tables are all equal.\n")
+		summary.WriteString("\n")
+		summary.WriteString(fmt.Sprintf("A total of %d tables have been compared, %d tables finished, %d tables failed, %d tables skipped.\n", r.FailedNum+r.PassNum+r.SkippedNum, r.PassNum, r.FailedNum, r.SkippedNum))
+		summary.WriteString(fmt.Sprintf("The patch file has been generated in \n\t'%s/'\n", r.task.FixDir))
+		summary.WriteString(fmt.Sprintf("You can view the comparison details through '%s/%s'\n", r.task.OutputDir, config.LogFileName))
+	} else {
+		summary.WriteString("Error in comparison process:\n")
+		for schema, tableMap := range r.TableResults {
+			for table, result := range tableMap {
+				if result.MeetError != nil {
+					summary.WriteString(fmt.Sprintf("%s error occured in %s\n", result.MeetError.Error(), dbutil.TableName(schema, table)))
+				}
+			}
+		}
+		summary.WriteString(fmt.Sprintf("You can view the comparison details through '%s/%s'\n", r.task.OutputDir, config.LogFileName))
+	}
+	fmt.Fprint(w, summary.String())
+	return nil
+}
+
+// NewReport returns a new Report.
+func NewReport(task *config.TaskConfig) *Report {
+	return &Report{
+		TableResults: make(map[string]map[string]*TableResult),
+		Result:       Pass,
+		task:         task,
+	}
+}
+
+func (r *Report) Init(tableDiffs []*common.TableDiff, sourceConfig [][]byte, targetConfig []byte) {
+	r.StartTime = time.Now()
+	r.SourceConfig = sourceConfig
+	r.TargetConfig = targetConfig
+	for _, tableDiff := range tableDiffs {
+		schema, table := tableDiff.Schema, tableDiff.Table
+		if _, ok := r.TableResults[schema]; !ok {
+			r.TableResults[schema] = make(map[string]*TableResult)
+		}
+		r.TableResults[schema][table] = &TableResult{
+			Schema:      schema,
+			Table:       table,
+			StructEqual: true,
+			DataEqual:   true,
+			MeetError:   nil,
+			ChunkMap:    make(map[string]*ChunkResult),
+		}
+	}
+}
+
+// SetTableStructCheckResult sets the struct check result for table.
+func (r *Report) SetTableStructCheckResult(schema, table string, equal bool, skip bool, exist int) {
+	r.Lock()
+	defer r.Unlock()
+	tableResult := r.TableResults[schema][table]
+	tableResult.StructEqual = equal
+	tableResult.DataSkip = skip
+	tableResult.TableLack = exist
+	if !equal && common.AllTableExist(tableResult.TableLack) && r.Result != Error {
+		r.Result = Fail
+	}
+}
+
+// SetTableDataCheckResult sets the data check result for table.
+func (r *Report) SetTableDataCheckResult(schema, table string, equal bool, rowsAdd, rowsDelete int, upCount, downCount int64, id *chunk.ChunkID) {
+	r.Lock()
+	defer r.Unlock()
+	result := r.TableResults[schema][table]
+	result.UpCount += upCount
+	result.DownCount += downCount
+	if !equal {
+		result.DataEqual = equal
+		if _, ok := result.ChunkMap[id.ToString()]; !ok {
+			result.ChunkMap[id.ToString()] = &ChunkResult{
+				RowsAdd:    0,
+				RowsDelete: 0,
+			}
+		}
+		result.ChunkMap[id.ToString()].RowsAdd += rowsAdd
+		result.ChunkMap[id.ToString()].RowsDelete += rowsDelete
+		if r.Result != Error && common.AllTableExist(result.TableLack) {
+			r.Result = Fail
+		}
+	}
+	if !equal && common.AllTableExist(result.TableLack) && r.Result != Error {
+		r.Result = Fail
+	}
+}
+
+// SetTableMeetError sets meet error when check the table.
+func (r *Report) SetTableMeetError(schema, table string, err error) {
+	r.Lock()
+	defer r.Unlock()
+	if _, ok := r.TableResults[schema]; !ok {
+		r.TableResults[schema] = make(map[string]*TableResult)
+		r.TableResults[schema][table] = &TableResult{
+			MeetError: err,
+		}
+		return
+	}
+
+	r.TableResults[schema][table].MeetError = err
+	r.Result = Error
+}
+
+// GetSnapshot get the snapshot of the current state of the report, then we can restart the
+// sync-diff and get the correct report state.
+func (r *Report) GetSnapshot(chunkID *chunk.ChunkID, schema, table string) (*Report, error) {
+	r.RLock()
+	defer r.RUnlock()
+	targetID := utils.UniqueID(schema, table)
+	reserveMap := make(map[string]map[string]*TableResult)
+	for schema, tableMap := range r.TableResults {
+		reserveMap[schema] = make(map[string]*TableResult)
+		for table, result := range tableMap {
+			reportID := utils.UniqueID(schema, table)
+			if reportID >= targetID {
+				chunkRes := make(map[string]*ChunkResult)
+				reserveMap[schema][table] = &TableResult{
+					Schema:      result.Schema,
+					Table:       result.Table,
+					StructEqual: result.StructEqual,
+					DataEqual:   result.DataEqual,
+					MeetError:   result.MeetError,
+				}
+				for id, chunkResult := range result.ChunkMap {
+					sid := new(chunk.ChunkID)
+					err := sid.FromString(id)
+					if err != nil {
+						return nil, errors.Trace(err)
+					}
+					if sid.Compare(chunkID) <= 0 {
+						chunkRes[id] = chunkResult
+					}
+				}
+				reserveMap[schema][table].ChunkMap = chunkRes
+			}
+		}
+	}
+
+	result := r.Result
+	totalSize := r.TotalSize
+	duration := time.Since(r.StartTime)
+	task := r.task
+	return &Report{
+		PassNum:      0,
+		FailedNum:    0,
+		Result:       result,
+		TableResults: reserveMap,
+		StartTime:    r.StartTime,
+		Duration:     duration,
+		TotalSize:    totalSize,
+
+		task: task,
+	}, nil
+}
diff --git a/sync_diff_inspector/report/report_test.go b/sync_diff_inspector/report/report_test.go
new file mode 100644
index 00000000000..ce49d1a8016
--- /dev/null
+++ b/sync_diff_inspector/report/report_test.go
@@ -0,0 +1,526 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package report
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"os"
+	"path"
+	"testing"
+
+	"github.com/BurntSushi/toml"
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/stretchr/testify/require"
+)
+
+var task *config.TaskConfig = &config.TaskConfig{
+	OutputDir:     "output_dir",
+	FixDir:        "output_dir/123456/fix-on-tidb1",
+	CheckpointDir: "output_dir/123456/checkpoint",
+}
+
+func TestReport(t *testing.T) {
+	ctx := context.Background()
+
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	report := NewReport(task)
+	createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+	createTableSQL2 := "create table `atest`.`atbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	tableDiffs := []*common.TableDiff{
+		{
+			Schema:    "test",
+			Table:     "tbl",
+			Info:      tableInfo1,
+			Collation: "[123]",
+		},
+		{
+			Schema:    "atest",
+			Table:     "atbl",
+			Info:      tableInfo2,
+			Collation: "[123]",
+		},
+		{
+			Schema:    "ctest",
+			Table:     "atbl",
+			Info:      tableInfo2,
+			Collation: "[123]",
+		},
+		{
+			Schema:    "dtest",
+			Table:     "atbl",
+			Info:      tableInfo2,
+			Collation: "[123]",
+		},
+	}
+	configs := []*ReportConfig{
+		{
+			Host: "127.0.0.1",
+			Port: 3306,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 3307,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 4000,
+			User: "root",
+		},
+	}
+
+	configsBytes := make([][]byte, 3)
+	for i := 0; i < 3; i++ {
+		buf := new(bytes.Buffer)
+		err := toml.NewEncoder(buf).Encode(configs[i])
+		require.NoError(t, err)
+		configsBytes[i] = buf.Bytes()
+	}
+	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
+
+	// Test CalculateTotal
+	mock.ExpectQuery("select sum.*").WillReturnRows(sqlmock.NewRows([]string{"data"}).AddRow("123"))
+	mock.ExpectQuery("select sum.*where table_schema=.*").WillReturnRows(sqlmock.NewRows([]string{"data"}).AddRow("456"))
+	report.CalculateTotalSize(ctx, db)
+
+	// Test Table Report
+	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 222, 222, &chunk.ChunkID{1, 1, 1, 1, 2})
+	report.SetTableMeetError("test", "tbl", errors.New("eeee"))
+
+	new_report := NewReport(task)
+	new_report.LoadReport(report)
+
+	require.Equal(t, new_report.TotalSize, int64(579))
+	result, ok := new_report.TableResults["test"]["tbl"]
+	require.True(t, ok)
+	require.Equal(t, result.MeetError.Error(), "eeee")
+	require.True(t, result.DataEqual)
+	require.True(t, result.StructEqual)
+
+	require.Equal(t, new_report.getSortedTables(), [][]string{{"`atest`.`atbl`", "0", "0"}, {"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
+	require.Equal(t, new_report.getDiffRows(), [][]string{})
+
+	new_report.SetTableStructCheckResult("atest", "atbl", true, false, common.AllTableExistFlag)
+	new_report.SetTableDataCheckResult("atest", "atbl", false, 111, 222, 333, 333, &chunk.ChunkID{1, 1, 1, 1, 2})
+	require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
+	require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "true", "+111/-222", "333", "333"}})
+
+	new_report.SetTableStructCheckResult("atest", "atbl", false, false, common.AllTableExistFlag)
+	require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
+	require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "false", "+111/-222", "333", "333"}})
+
+	new_report.SetTableStructCheckResult("ctest", "atbl", false, true, common.AllTableExistFlag)
+
+	new_report.SetTableStructCheckResult("dtest", "atbl", false, true, common.DownstreamTableLackFlag)
+
+	buf := new(bytes.Buffer)
+	new_report.Print(buf)
+	info := buf.String()
+	require.Contains(t, info, "The structure of `atest`.`atbl` is not equal\n")
+	require.Contains(t, info, "The data of `atest`.`atbl` is not equal\n")
+	require.Contains(t, info, "The structure of `ctest`.`atbl` is not equal, and data-check is skipped\n")
+	require.Contains(t, info, "The data of `dtest`.`atbl` does not exist in downstream database\n")
+	require.Contains(t, info, "\n"+
+		"The rest of tables are all equal.\n\n"+
+		"A total of 0 tables have been compared, 0 tables finished, 0 tables failed, 0 tables skipped.\n"+
+		"The patch file has been generated in \n\t'output_dir/123456/fix-on-tidb1/'\n"+
+		"You can view the comparison details through 'output_dir/sync_diff.log'\n")
+}
+
+func TestCalculateTotal(t *testing.T) {
+	ctx := context.Background()
+
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	report := NewReport(task)
+	createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	tableDiffs := []*common.TableDiff{
+		{
+			Schema:    "test",
+			Table:     "tbl",
+			Info:      tableInfo,
+			Collation: "[123]",
+		},
+	}
+	configs := []*ReportConfig{
+		{
+			Host: "127.0.0.1",
+			Port: 3306,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 3307,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 4000,
+			User: "root",
+		},
+	}
+
+	configsBytes := make([][]byte, 3)
+	for i := 0; i < 3; i++ {
+		buf := new(bytes.Buffer)
+		err := toml.NewEncoder(buf).Encode(configs[i])
+		require.NoError(t, err)
+		configsBytes[i] = buf.Bytes()
+	}
+	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
+
+	// Normal
+	mock.ExpectQuery("select sum.*").WillReturnRows(sqlmock.NewRows([]string{"data"}).AddRow("123"))
+	report.CalculateTotalSize(ctx, db)
+	require.Equal(t, report.TotalSize, int64(123))
+}
+
+func TestPrint(t *testing.T) {
+	report := NewReport(task)
+	createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	tableDiffs := []*common.TableDiff{
+		{
+			Schema:    "test",
+			Table:     "tbl",
+			Info:      tableInfo,
+			Collation: "[123]",
+		},
+		{
+			Schema:    "test",
+			Table:     "tbl1",
+			Info:      tableInfo,
+			Collation: "[123]",
+		},
+	}
+	configs := []*ReportConfig{
+		{
+			Host: "127.0.0.1",
+			Port: 3306,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 3307,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 4000,
+			User: "root",
+		},
+	}
+
+	configsBytes := make([][]byte, 3)
+	for i := 0; i < 3; i++ {
+		buf := new(bytes.Buffer)
+		err := toml.NewEncoder(buf).Encode(configs[i])
+		require.NoError(t, err)
+		configsBytes[i] = buf.Bytes()
+	}
+	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
+
+	var buf *bytes.Buffer
+	// All Pass
+	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 22, 22, &chunk.ChunkID{0, 0, 0, 0, 1})
+	buf = new(bytes.Buffer)
+	report.Print(buf)
+	require.Equal(t, buf.String(), "A total of 0 table have been compared and all are equal.\n"+
+		"You can view the comparison details through 'output_dir/sync_diff.log'\n")
+
+	// Error
+	report.SetTableMeetError("test", "tbl1", errors.New("123"))
+	report.SetTableStructCheckResult("test", "tbl1", false, false, common.AllTableExistFlag)
+	buf = new(bytes.Buffer)
+	report.Print(buf)
+	require.Equal(t, buf.String(), "Error in comparison process:\n"+
+		"123 error occured in `test`.`tbl1`\n"+
+		"You can view the comparison details through 'output_dir/sync_diff.log'\n")
+}
+
+func TestGetSnapshot(t *testing.T) {
+	report := NewReport(task)
+	createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+	createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+	createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New())
+	require.NoError(t, err)
+
+	tableDiffs := []*common.TableDiff{
+		{
+			Schema:    "test",
+			Table:     "tbl",
+			Info:      tableInfo1,
+			Collation: "[123]",
+		}, {
+			Schema:    "atest",
+			Table:     "tbl",
+			Info:      tableInfo2,
+			Collation: "[123]",
+		}, {
+			Schema:    "xtest",
+			Table:     "tbl",
+			Info:      tableInfo3,
+			Collation: "[123]",
+		},
+	}
+	configs := []*ReportConfig{
+		{
+			Host: "127.0.0.1",
+			Port: 3306,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 3307,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 4000,
+			User: "root",
+		},
+	}
+
+	configsBytes := make([][]byte, 3)
+	for i := 0; i < 3; i++ {
+		buf := new(bytes.Buffer)
+		err := toml.NewEncoder(buf).Encode(configs[i])
+		require.NoError(t, err)
+		configsBytes[i] = buf.Bytes()
+	}
+	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
+
+	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("test", "tbl", false, 100, 100, 200, 300, &chunk.ChunkID{0, 0, 0, 1, 10})
+	report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 300, 300, &chunk.ChunkID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("test", "tbl", false, 200, 200, 400, 500, &chunk.ChunkID{0, 0, 0, 3, 10})
+
+	report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("atest", "tbl", false, 100, 100, 500, 600, &chunk.ChunkID{0, 0, 0, 0, 10})
+	report.SetTableDataCheckResult("atest", "tbl", true, 0, 0, 600, 600, &chunk.ChunkID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("atest", "tbl", false, 200, 200, 700, 800, &chunk.ChunkID{0, 0, 0, 3, 10})
+
+	report.SetTableStructCheckResult("xtest", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("xtest", "tbl", false, 100, 100, 800, 900, &chunk.ChunkID{0, 0, 0, 0, 10})
+	report.SetTableDataCheckResult("xtest", "tbl", true, 0, 0, 900, 900, &chunk.ChunkID{0, 0, 0, 1, 10})
+	report.SetTableDataCheckResult("xtest", "tbl", false, 200, 200, 1000, 1100, &chunk.ChunkID{0, 0, 0, 3, 10})
+
+	report_snap, err := report.GetSnapshot(&chunk.ChunkID{0, 0, 0, 1, 10}, "test", "tbl")
+	require.NoError(t, err)
+	require.Equal(t, report_snap.TotalSize, report.TotalSize)
+	require.Equal(t, report_snap.Result, report.Result)
+	for key, value := range report.TableResults {
+		if _, ok := report_snap.TableResults[key]; !ok {
+			v, ok := value["tbl"]
+			require.True(t, ok)
+			require.Equal(t, v.Schema, "atest")
+			continue
+		}
+
+		if _, ok := report_snap.TableResults[key]["tbl"]; !ok {
+			require.Equal(t, key, "atest")
+			continue
+		}
+
+		v1 := value["tbl"]
+		v2 := report_snap.TableResults[key]["tbl"]
+		require.Equal(t, v1.Schema, v2.Schema)
+		require.Equal(t, v1.Table, v2.Table)
+		require.Equal(t, v1.StructEqual, v2.StructEqual)
+		require.Equal(t, v1.DataEqual, v2.DataEqual)
+		require.Equal(t, v1.MeetError, v2.MeetError)
+
+		chunkMap1 := v1.ChunkMap
+		chunkMap2 := v2.ChunkMap
+		for id, r1 := range chunkMap1 {
+			sid := new(chunk.ChunkID)
+			if _, ok := chunkMap2[id]; !ok {
+				require.NoError(t, sid.FromString(id))
+				require.Equal(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 3, 10}), 0)
+				continue
+			}
+			require.NoError(t, sid.FromString(id))
+			require.True(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 1, 10}) <= 0)
+			r2 := chunkMap2[id]
+			require.Equal(t, r1.RowsAdd, r2.RowsAdd)
+			require.Equal(t, r1.RowsDelete, r2.RowsDelete)
+		}
+
+	}
+}
+
+func TestCommitSummary(t *testing.T) {
+	outputDir := "./"
+	report := NewReport(&config.TaskConfig{OutputDir: outputDir, FixDir: task.FixDir})
+	createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+	createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+	createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New())
+	require.NoError(t, err)
+	createTableSQL4 := "create table `xtest`.`tb1`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo4, err := dbutil.GetTableInfoBySQL(createTableSQL4, parser.New())
+	require.NoError(t, err)
+	tableDiffs := []*common.TableDiff{
+		{
+			Schema:    "test",
+			Table:     "tbl",
+			Info:      tableInfo1,
+			Collation: "[123]",
+		}, {
+			Schema:    "atest",
+			Table:     "tbl",
+			Info:      tableInfo2,
+			Collation: "[123]",
+		}, {
+			Schema:    "xtest",
+			Table:     "tbl",
+			Info:      tableInfo3,
+			Collation: "[123]",
+		}, {
+			Schema:    "ytest",
+			Table:     "tbl",
+			Info:      tableInfo3,
+			Collation: "[123]",
+		}, {
+			Schema:    "xtest",
+			Table:     "tb1",
+			Info:      tableInfo4,
+			Collation: "[123]",
+		}, {
+			Schema:    "xtest",
+			Table:     "tb2",
+			Info:      tableInfo4,
+			Collation: "[123]",
+		},
+	}
+	configs := []*ReportConfig{
+		{
+			Host: "127.0.0.1",
+			Port: 3306,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 3307,
+			User: "root",
+		},
+		{
+			Host: "127.0.0.1",
+			Port: 4000,
+			User: "root",
+		},
+	}
+
+	configsBytes := make([][]byte, 3)
+	for i := 0; i < 3; i++ {
+		buf := new(bytes.Buffer)
+		err := toml.NewEncoder(buf).Encode(configs[i])
+		require.NoError(t, err)
+		configsBytes[i] = buf.Bytes()
+	}
+	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
+
+	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 400, 400, &chunk.ChunkID{0, 0, 0, 1, 10})
+
+	report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("atest", "tbl", false, 100, 200, 500, 600, &chunk.ChunkID{0, 0, 0, 2, 10})
+
+	report.SetTableStructCheckResult("xtest", "tbl", false, false, common.AllTableExistFlag)
+	report.SetTableDataCheckResult("xtest", "tbl", false, 100, 200, 600, 700, &chunk.ChunkID{0, 0, 0, 3, 10})
+
+	report.SetTableStructCheckResult("xtest", "tb1", false, true, common.UpstreamTableLackFlag)
+	report.SetTableDataCheckResult("xtest", "tb1", false, 0, 200, 0, 200, &chunk.ChunkID{0, 0, 0, 4, 10})
+
+	report.SetTableStructCheckResult("xtest", "tb2", false, true, common.DownstreamTableLackFlag)
+	report.SetTableDataCheckResult("xtest", "tb2", false, 100, 0, 100, 0, &chunk.ChunkID{0, 0, 0, 5, 10})
+
+	err = report.CommitSummary()
+	require.NoError(t, err)
+	filename := path.Join(outputDir, "summary.txt")
+	file, err := os.Open(filename)
+	require.NoError(t, err)
+
+	p := make([]byte, 2048)
+	file.Read(p)
+	str := string(p)
+	require.Contains(t, str, "Summary\n\n\n\n"+
+		"Source Database\n\n\n\n"+
+		"host = \"127.0.0.1\"\n"+
+		"port = 3306\n"+
+		"user = \"root\"\n\n"+
+		"host = \"127.0.0.1\"\n"+
+		"port = 3307\n"+
+		"user = \"root\"\n\n"+
+		"Target Databases\n\n\n\n"+
+		"host = \"127.0.0.1\"\n"+
+		"port = 4000\n"+
+		"user = \"root\"\n\n"+
+		"Comparison Result\n\n\n\n"+
+		"The table structure and data in following tables are equivalent\n\n"+
+		"+---------------+---------+-----------+\n"+
+		"|     TABLE     | UPCOUNT | DOWNCOUNT |\n"+
+		"+---------------+---------+-----------+\n"+
+		"| `test`.`tbl`  |     400 |       400 |\n"+
+		"| `ytest`.`tbl` |       0 |         0 |\n"+
+		"+---------------+---------+-----------+\n\n\n"+
+		"The following tables contains inconsistent data\n\n"+
+		"+---------------+---------+--------------------+----------------+---------+-----------+\n"+
+		"|     TABLE     | RESULT  | STRUCTURE EQUALITY | DATA DIFF ROWS | UPCOUNT | DOWNCOUNT |\n"+
+		"+---------------+---------+--------------------+----------------+---------+-----------+\n")
+	require.Contains(t, str,
+		"| `atest`.`tbl` | succeed | true               | +100/-200      |     500 |       600 |\n")
+	require.Contains(t, str,
+		"| `xtest`.`tbl` | succeed | false              | +100/-200      |     600 |       700 |\n")
+	require.Contains(t, str,
+		"| `xtest`.`tb1` | skipped | false              | +0/-200        |       0 |       200 |\n")
+	require.Contains(t, str,
+		"| `xtest`.`tb2` | skipped | false              | +100/-0        |     100 |         0 |\n")
+
+	file.Close()
+	err = os.Remove(filename)
+	require.NoError(t, err)
+}
diff --git a/sync_diff_inspector/source/chunks_iter.go b/sync_diff_inspector/source/chunks_iter.go
new file mode 100644
index 00000000000..44b051fcb72
--- /dev/null
+++ b/sync_diff_inspector/source/chunks_iter.go
@@ -0,0 +1,189 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package source
+
+import (
+	"context"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+)
+
+// ChunksIterator is used for single mysql/tidb source.
+type ChunksIterator struct {
+	ID            *chunk.ChunkID
+	tableAnalyzer TableAnalyzer
+
+	TableDiffs       []*common.TableDiff
+	nextTableIndex   int
+	chunksCh         chan *splitter.RangeInfo
+	errCh            chan error
+	splitThreadCount int
+
+	cancel context.CancelFunc
+}
+
+func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs []*common.TableDiff, startRange *splitter.RangeInfo, splitThreadCount int) (*ChunksIterator, error) {
+	ctxx, cancel := context.WithCancel(ctx)
+	iter := &ChunksIterator{
+		splitThreadCount: splitThreadCount,
+		tableAnalyzer:    analyzer,
+		TableDiffs:       tableDiffs,
+
+		// reserve 30 capacity for each goroutine on average
+		chunksCh: make(chan *splitter.RangeInfo, 30*splitThreadCount),
+		errCh:    make(chan error, len(tableDiffs)),
+		cancel:   cancel,
+	}
+	go iter.produceChunks(ctxx, startRange)
+	return iter, nil
+}
+
+func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter.RangeInfo) {
+	defer close(t.chunksCh)
+	pool := utils.NewWorkerPool(uint(t.splitThreadCount), "chunks producer")
+	t.nextTableIndex = 0
+
+	// If chunkRange
+	if startRange != nil {
+		curIndex := startRange.GetTableIndex()
+		curTable := t.TableDiffs[curIndex]
+		t.nextTableIndex = curIndex + 1
+		// if this chunk is empty, data-check for this table should be skipped
+		if startRange.ChunkRange.Type != chunk.Empty {
+			pool.Apply(func() {
+				chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, curTable, startRange)
+				if err != nil {
+					t.errCh <- errors.Trace(err)
+					return
+				}
+				defer chunkIter.Close()
+				for {
+					c, err := chunkIter.Next()
+					if err != nil {
+						t.errCh <- errors.Trace(err)
+						return
+					}
+					if c == nil {
+						break
+					}
+					c.Index.TableIndex = curIndex
+					select {
+					case <-ctx.Done():
+						log.Info("Stop do produce chunks by context done")
+						return
+					case t.chunksCh <- &splitter.RangeInfo{
+						ChunkRange: c,
+						IndexID:    getCurTableIndexID(chunkIter),
+						ProgressID: dbutil.TableName(curTable.Schema, curTable.Table),
+					}:
+					}
+				}
+			})
+		}
+	}
+
+	for ; t.nextTableIndex < len(t.TableDiffs); t.nextTableIndex++ {
+		curTableIndex := t.nextTableIndex
+		// skip data-check, but still need to send a empty chunk to make checkpoint continuous
+		if t.TableDiffs[curTableIndex].IgnoreDataCheck || !common.AllTableExist(t.TableDiffs[curTableIndex].TableLack) {
+			pool.Apply(func() {
+				table := t.TableDiffs[curTableIndex]
+				progressID := dbutil.TableName(table.Schema, table.Table)
+				progress.StartTable(progressID, 1, true)
+				select {
+				case <-ctx.Done():
+					log.Info("Stop do produce chunks by context done")
+					return
+				case t.chunksCh <- &splitter.RangeInfo{
+					ChunkRange: &chunk.Range{
+						Index: &chunk.ChunkID{
+							TableIndex: curTableIndex,
+						},
+						Type:    chunk.Empty,
+						IsFirst: true,
+						IsLast:  true,
+					},
+					ProgressID: progressID,
+				}:
+				}
+			})
+			continue
+		}
+
+		pool.Apply(func() {
+			table := t.TableDiffs[curTableIndex]
+			chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, table, nil)
+			if err != nil {
+				t.errCh <- errors.Trace(err)
+				return
+			}
+			defer chunkIter.Close()
+			for {
+				c, err := chunkIter.Next()
+				if err != nil {
+					t.errCh <- errors.Trace(err)
+					return
+				}
+				if c == nil {
+					break
+				}
+				c.Index.TableIndex = curTableIndex
+				select {
+				case <-ctx.Done():
+					log.Info("Stop do produce chunks by context done")
+					return
+				case t.chunksCh <- &splitter.RangeInfo{
+					ChunkRange: c,
+					IndexID:    getCurTableIndexID(chunkIter),
+					ProgressID: dbutil.TableName(table.Schema, table.Table),
+				}:
+				}
+			}
+		})
+	}
+	pool.WaitFinished()
+}
+
+func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error) {
+	select {
+	case <-ctx.Done():
+		return nil, nil
+	case r, ok := <-t.chunksCh:
+		if !ok && r == nil {
+			return nil, nil
+		}
+		return r, nil
+	case err := <-t.errCh:
+		return nil, errors.Trace(err)
+	}
+}
+
+func (t *ChunksIterator) Close() {
+	t.cancel()
+}
+
+// TODO: getCurTableIndexID only used for binary search, should be optimized later.
+func getCurTableIndexID(tableIter splitter.ChunkIterator) int64 {
+	if bt, ok := tableIter.(*splitter.BucketIterator); ok {
+		return bt.GetIndexID()
+	}
+	return 0
+}
diff --git a/sync_diff_inspector/source/common/common_test.go b/sync_diff_inspector/source/common/common_test.go
new file mode 100644
index 00000000000..467548935bf
--- /dev/null
+++ b/sync_diff_inspector/source/common/common_test.go
@@ -0,0 +1,64 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"container/heap"
+	"testing"
+
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRowData(t *testing.T) {
+	createTableSQL := "create table test.test(id int(24), name varchar(24), age int(24), primary key(id, name));"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	_, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo)
+	require.Equal(t, utils.NeedQuotes(orderKeyCols[1].FieldType.GetType()), true)
+	ids := []string{"3", "2", "2", "2", "4", "1", "NULL"}
+	names := []string{"d", "NULL", "c", "g", "b", "a", "e"}
+	ages := []string{"1", "2", "3", "3", "NULL", "5", "4"}
+
+	expectIDs := []string{"NULL", "1", "2", "2", "2", "3", "4"}
+	expectNames := []string{"e", "a", "NULL", "c", "g", "d", "b"}
+
+	rowDatas := &RowDatas{
+		Rows:         make([]RowData, 0, len(ids)),
+		OrderKeyCols: orderKeyCols,
+	}
+
+	heap.Init(rowDatas)
+	for i, id := range ids {
+		data := map[string]*dbutil.ColumnData{
+			"id":   {Data: []byte(id), IsNull: (id == "NULL")},
+			"name": {Data: []byte(names[i]), IsNull: (names[i] == "NULL")},
+			"age":  {Data: []byte(ages[i]), IsNull: (ages[i] == "NULL")},
+		}
+		heap.Push(rowDatas, RowData{
+			Data: data,
+		})
+	}
+
+	for i := 0; i < len(ids); i++ {
+		rowData := heap.Pop(rowDatas).(RowData)
+		id := string(rowData.Data["id"].Data)
+		name := string(rowData.Data["name"].Data)
+		require.Equal(t, id, expectIDs[i])
+		require.Equal(t, name, expectNames[i])
+	}
+}
diff --git a/sync_diff_inspector/source/common/conn.go b/sync_diff_inspector/source/common/conn.go
new file mode 100755
index 00000000000..286d1f5485e
--- /dev/null
+++ b/sync_diff_inspector/source/common/conn.go
@@ -0,0 +1,74 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"database/sql"
+	"encoding/base64"
+
+	"github.com/go-sql-driver/mysql"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/failpoint"
+	tmysql "github.com/pingcap/tidb/pkg/parser/mysql"
+)
+
+func tryConnectMySQL(cfg *mysql.Config) (*sql.DB, error) {
+	failpoint.Inject("MustMySQLPassword", func(val failpoint.Value) {
+		pwd := val.(string)
+		if cfg.Passwd != pwd {
+			failpoint.Return(nil, &mysql.MySQLError{Number: tmysql.ErrAccessDenied, Message: "access denied"})
+		}
+		failpoint.Return(nil, nil)
+	})
+	c, err := mysql.NewConnector(cfg)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	db := sql.OpenDB(c)
+	if err = db.Ping(); err != nil {
+		_ = db.Close()
+		return nil, errors.Trace(err)
+	}
+	return db, nil
+}
+
+// ConnectMySQL creates sql.DB used for select data
+func ConnectMySQL(cfg *mysql.Config, num int) (db *sql.DB, err error) {
+	defer func() {
+		if err == nil && db != nil {
+			// SetMaxOpenConns and SetMaxIdleConns for connection to avoid error like
+			// `dial tcp 10.26.2.1:3306: connect: cannot assign requested address`
+			db.SetMaxOpenConns(num)
+			db.SetMaxIdleConns(num)
+		}
+	}()
+	// Try plain password first.
+	db, firstErr := tryConnectMySQL(cfg)
+	if firstErr == nil {
+		return db, nil
+	}
+	// If access is denied and password is encoded by base64, try the decoded string as well.
+	if mysqlErr, ok := errors.Cause(firstErr).(*mysql.MySQLError); ok && mysqlErr.Number == tmysql.ErrAccessDenied {
+		// If password is encoded by base64, try the decoded string as well.
+		if password, decodeErr := base64.StdEncoding.DecodeString(cfg.Passwd); decodeErr == nil && string(password) != cfg.Passwd {
+			cfg.Passwd = string(password)
+			db2, err := tryConnectMySQL(cfg)
+			if err == nil {
+				return db2, nil
+			}
+		}
+	}
+	// If we can't connect successfully, return the first error.
+	return nil, errors.Trace(firstErr)
+}
diff --git a/sync_diff_inspector/source/common/conn_test.go b/sync_diff_inspector/source/common/conn_test.go
new file mode 100644
index 00000000000..7509854152a
--- /dev/null
+++ b/sync_diff_inspector/source/common/conn_test.go
@@ -0,0 +1,48 @@
+// Copyright 2022 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"encoding/base64"
+	"fmt"
+	"testing"
+
+	"github.com/pingcap/failpoint"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"github.com/stretchr/testify/require"
+)
+
+func TestConnect(t *testing.T) {
+	plainPsw := "dQAUoDiyb1ucWZk7"
+
+	require.NoError(t, failpoint.Enable(
+		"github.com/pingcap/tiflow/sync_diff_inspector/source/common/MustMySQLPassword",
+		fmt.Sprintf("return(\"%s\")", plainPsw)))
+	defer func() {
+		require.NoError(t, failpoint.Disable("github.com/pingcap/tiflow/sync_diff_inspector/source/common/MustMySQLPassword"))
+	}()
+
+	dataSource := &config.DataSource{
+		Host:     "127.0.0.1",
+		Port:     4000,
+		User:     "root",
+		Password: utils.SecretString(plainPsw),
+	}
+	_, err := ConnectMySQL(dataSource.ToDriverConfig(), 2)
+	require.NoError(t, err)
+	dataSource.Password = utils.SecretString(base64.StdEncoding.EncodeToString([]byte(plainPsw)))
+	_, err = ConnectMySQL(dataSource.ToDriverConfig(), 2)
+	require.NoError(t, err)
+}
diff --git a/sync_diff_inspector/source/common/rows.go b/sync_diff_inspector/source/common/rows.go
new file mode 100644
index 00000000000..a97204881f2
--- /dev/null
+++ b/sync_diff_inspector/source/common/rows.go
@@ -0,0 +1,101 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"strconv"
+
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+type RowData struct {
+	Data   map[string]*dbutil.ColumnData
+	Source int
+}
+
+// RowDatas is a heap of MergeItems.
+type RowDatas struct {
+	Rows         []RowData
+	OrderKeyCols []*model.ColumnInfo
+}
+
+func (r RowDatas) Len() int { return len(r.Rows) }
+func (r RowDatas) Less(i, j int) bool {
+	for _, col := range r.OrderKeyCols {
+		col1, ok := r.Rows[i].Data[col.Name.O]
+		if !ok {
+			log.Fatal("data don't have column", zap.String("column", col.Name.O), zap.Reflect("data", r.Rows[i].Data))
+		}
+		col2, ok := r.Rows[j].Data[col.Name.O]
+		if !ok {
+			log.Fatal("data don't have column", zap.String("column", col.Name.O), zap.Reflect("data", r.Rows[j].Data))
+		}
+
+		switch {
+		case col1.IsNull && col2.IsNull:
+			continue
+		case col1.IsNull:
+			return true
+		case col2.IsNull:
+			return false
+		}
+
+		strData1 := string(col1.Data)
+		strData2 := string(col2.Data)
+
+		if utils.NeedQuotes(col.FieldType.GetType()) {
+			if strData1 == strData2 {
+				continue
+			}
+			return strData1 < strData2
+		}
+
+		num1, err1 := strconv.ParseFloat(strData1, 64)
+		if err1 != nil {
+			log.Fatal("convert string to float failed", zap.String("column", col.Name.O), zap.String("data", strData1), zap.Error(err1))
+		}
+		num2, err2 := strconv.ParseFloat(strData2, 64)
+		if err2 != nil {
+			log.Fatal("convert string to float failed", zap.String("column", col.Name.O), zap.String("data", strData2), zap.Error(err2))
+		}
+
+		if num1 == num2 {
+			continue
+		}
+		return num1 < num2
+
+	}
+
+	return false
+}
+func (r RowDatas) Swap(i, j int) { r.Rows[i], r.Rows[j] = r.Rows[j], r.Rows[i] }
+
+// Push implements heap.Interface's Push function
+func (r *RowDatas) Push(x interface{}) {
+	r.Rows = append(r.Rows, x.(RowData))
+}
+
+// Pop implements heap.Interface's Pop function
+func (r *RowDatas) Pop() (x interface{}) {
+	if len(r.Rows) == 0 {
+		return nil
+	}
+
+	r.Rows, x = r.Rows[:len(r.Rows)-1], r.Rows[len(r.Rows)-1]
+	return
+}
diff --git a/sync_diff_inspector/source/common/table_diff.go b/sync_diff_inspector/source/common/table_diff.go
new file mode 100644
index 00000000000..1d8befb7a11
--- /dev/null
+++ b/sync_diff_inspector/source/common/table_diff.go
@@ -0,0 +1,83 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"database/sql"
+
+	"github.com/pingcap/tidb/pkg/parser/model"
+)
+
+// TableShardSource represents the origin schema and table and DB connection before router.
+// It used for MySQL Shard source.
+type TableShardSource struct {
+	TableSource
+	// DBConn represents the origin DB connection for this TableSource.
+	// This TableSource may exists in different MySQL shard.
+	DBConn *sql.DB
+}
+
+// TableSource represents the origin schema and table before router.
+// It used for TiDB/MySQL source.
+type TableSource struct {
+	OriginSchema string
+	OriginTable  string
+}
+
+// TableDiff saves config for diff table
+type TableDiff struct {
+	// Schema represents the database name.
+	Schema string `json:"schema"`
+
+	// Table represents the table name.
+	Table string `json:"table"`
+
+	// Info is the parser.TableInfo, include some meta infos for this table.
+	// It used for TiDB/MySQL/MySQL Shard sources.
+	Info *model.TableInfo `json:"info"`
+
+	// columns be ignored
+	IgnoreColumns []string `json:"-"`
+
+	// field should be the primary key, unique key or field with index
+	Fields string `json:"fields"`
+
+	// select range, for example: "age > 10 AND age < 20"
+	Range string `json:"range"`
+
+	// ignore check table's data
+	IgnoreDataCheck bool `json:"-"`
+
+	// the table has column timestamp, which need to reset time_zone.
+	NeedUnifiedTimeZone bool `json:"-"`
+
+	Collation string `json:"collation"`
+
+	ChunkSize int64 `json:"chunk-size"`
+
+	// TableLack = 1: the table only exists downstream,
+	// TableLack = -1: the table only exists upstream,
+	// TableLack = 0: the table exists both upstream and downstream.
+	TableLack int `json:"-"`
+}
+
+const (
+	AllTableExistFlag       = 0
+	DownstreamTableLackFlag = -1
+	UpstreamTableLackFlag   = 1
+)
+
+func AllTableExist(tableLack int) bool {
+	return tableLack == AllTableExistFlag
+}
diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go
new file mode 100644
index 00000000000..2a43f48081f
--- /dev/null
+++ b/sync_diff_inspector/source/mysql_shard.go
@@ -0,0 +1,390 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package source
+
+import (
+	"container/heap"
+	"context"
+	"database/sql"
+	"fmt"
+	"time"
+
+	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/filter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+type MySQLTableAnalyzer struct {
+	sourceTableMap map[string][]*common.TableShardSource
+}
+
+func (a *MySQLTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) {
+	matchedSources := getMatchedSourcesForTable(a.sourceTableMap, table)
+
+	// It's useful we are not able to pick shard merge source as workSource to generate ChunksIterator.
+	if len(matchedSources) > 1 {
+		log.Fatal("unreachable, shard merge table cannot generate splitter for now.")
+	}
+	// Shallow Copy
+	originTable := *table
+	originTable.Schema = matchedSources[0].OriginSchema
+	originTable.Table = matchedSources[0].OriginTable
+	progressID := dbutil.TableName(table.Schema, table.Table)
+	// use random splitter if we cannot use bucket splitter, then we can simply choose target table to generate chunks.
+	randIter, err := splitter.NewRandomIteratorWithCheckpoint(ctx, progressID, &originTable, matchedSources[0].DBConn, startRange)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	return randIter, nil
+}
+
+type MySQLSources struct {
+	tableDiffs []*common.TableDiff
+
+	sourceTablesMap map[string][]*common.TableShardSource
+}
+
+func getMatchedSourcesForTable(sourceTablesMap map[string][]*common.TableShardSource, table *common.TableDiff) []*common.TableShardSource {
+	if sourceTablesMap == nil {
+		log.Fatal("unreachable, source tables map shouldn't be nil.")
+	}
+	matchSources, ok := sourceTablesMap[utils.UniqueID(table.Schema, table.Table)]
+	if !ok && common.AllTableExist(table.TableLack) {
+		log.Fatal("unreachable, no match source tables in mysql shard source.")
+	}
+	return matchSources
+}
+
+func (s *MySQLSources) GetTableAnalyzer() TableAnalyzer {
+	return &MySQLTableAnalyzer{
+		s.sourceTablesMap,
+	}
+}
+
+func (s *MySQLSources) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) {
+	return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount)
+}
+
+func (s *MySQLSources) Close() {
+	for _, t := range s.sourceTablesMap {
+		for _, db := range t {
+			db.DBConn.Close()
+		}
+	}
+}
+
+func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
+	beginTime := time.Now()
+	table := s.tableDiffs[tableRange.GetTableIndex()]
+	chunk := tableRange.GetChunk()
+
+	matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table)
+	infoCh := make(chan *ChecksumInfo, len(s.sourceTablesMap))
+
+	for _, ms := range matchSources {
+		go func(ms *common.TableShardSource) {
+			count, checksum, err := utils.GetCountAndMd5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, chunk.Args)
+			infoCh <- &ChecksumInfo{
+				Checksum: checksum,
+				Count:    count,
+				Err:      err,
+			}
+		}(ms)
+	}
+	defer close(infoCh)
+
+	var (
+		err           error
+		totalCount    int64
+		totalChecksum uint64
+	)
+
+	for range matchSources {
+		info := <-infoCh
+		// catch the first error
+		if err == nil && info.Err != nil {
+			err = info.Err
+		}
+		totalCount += info.Count
+		totalChecksum ^= info.Checksum
+	}
+
+	cost := time.Since(beginTime)
+	return &ChecksumInfo{
+		Checksum: totalChecksum,
+		Count:    totalCount,
+		Err:      err,
+		Cost:     cost,
+	}
+}
+
+func (s *MySQLSources) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 {
+	table := s.tableDiffs[tableRange.GetTableIndex()]
+	var totalCount int64
+
+	matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table)
+	if matchSources != nil {
+		for _, ms := range matchSources {
+			count, _ := dbutil.GetRowCount(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, "", nil)
+			totalCount += count
+		}
+	}
+	return totalCount
+}
+
+func (s *MySQLSources) GetTables() []*common.TableDiff {
+	return s.tableDiffs
+}
+
+func (s *MySQLSources) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string {
+	switch t {
+	case Insert:
+		return utils.GenerateReplaceDML(upstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
+	case Delete:
+		return utils.GenerateDeleteDML(downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
+	case Replace:
+		return utils.GenerateReplaceDMLWithAnnotation(upstreamData, downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
+	default:
+		log.Fatal("Don't support this type", zap.Any("dml type", t))
+	}
+	return ""
+}
+
+func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) {
+	chunk := tableRange.GetChunk()
+
+	sourceRows := make(map[int]*sql.Rows)
+
+	table := s.tableDiffs[tableRange.GetTableIndex()]
+	// for tables that do not exist upstream or downstream
+	if !common.AllTableExist(table.TableLack) {
+		return nil, nil
+	}
+	matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table)
+
+	var rowsQuery string
+	var orderKeyCols []*model.ColumnInfo
+	for i, ms := range matchSources {
+		rowsQuery, orderKeyCols = utils.GetTableRowsQueryFormat(ms.OriginSchema, ms.OriginTable, table.Info, table.Collation)
+		query := fmt.Sprintf(rowsQuery, chunk.Where)
+		rows, err := ms.DBConn.QueryContext(ctx, query, chunk.Args...)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		sourceRows[i] = rows
+	}
+
+	sourceRowDatas := &common.RowDatas{
+		Rows:         make([]common.RowData, 0, len(sourceRows)),
+		OrderKeyCols: orderKeyCols,
+	}
+	heap.Init(sourceRowDatas)
+	// first push one row from all the sources into heap
+	for source, sourceRow := range sourceRows {
+		rowData, err := getRowData(sourceRow)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		if rowData != nil {
+			heap.Push(sourceRowDatas, common.RowData{
+				Data:   rowData,
+				Source: source,
+			})
+		} else {
+			if sourceRow.Err() != nil {
+				return nil, sourceRow.Err()
+			}
+		}
+	}
+
+	return &MultiSourceRowsIterator{
+		sourceRows:     sourceRows,
+		sourceRowDatas: sourceRowDatas,
+	}, nil
+}
+
+func (s *MySQLSources) GetDB() *sql.DB {
+	// return any of them is ok
+	for _, st := range s.sourceTablesMap {
+		for _, db := range st {
+			return db.DBConn
+		}
+	}
+	log.Warn("the source has no DB connection.")
+	return nil
+}
+
+func (s *MySQLSources) GetSnapshot() string {
+	log.Fatal("unreachable!, mysql doesn't have the snapshot")
+	return ""
+}
+
+func (s *MySQLSources) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) {
+	tableDiff := s.GetTables()[tableIndex]
+	// for tables that do not exist upstream or downstream
+	if !common.AllTableExist(tableDiff.TableLack) {
+		return nil, nil
+	}
+	tableSources := getMatchedSourcesForTable(s.sourceTablesMap, tableDiff)
+	sourceTableInfos := make([]*model.TableInfo, len(tableSources))
+	for i, tableSource := range tableSources {
+		sourceSchema, sourceTable := tableSource.OriginSchema, tableSource.OriginTable
+		sourceTableInfo, err := utils.GetTableInfo(ctx, tableSource.DBConn, sourceSchema, sourceTable)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		sourceTableInfo, _ = utils.ResetColumns(sourceTableInfo, tableDiff.IgnoreColumns)
+		sourceTableInfos[i] = sourceTableInfo
+	}
+	return sourceTableInfos, nil
+}
+
+type MultiSourceRowsIterator struct {
+	sourceRows     map[int]*sql.Rows
+	sourceRowDatas *common.RowDatas
+}
+
+func getRowData(rows *sql.Rows) (rowData map[string]*dbutil.ColumnData, err error) {
+	for rows.Next() {
+		rowData, err = dbutil.ScanRow(rows)
+		return
+	}
+	return
+}
+
+func (ms *MultiSourceRowsIterator) Next() (map[string]*dbutil.ColumnData, error) {
+	// Before running getSourceRow, heap save one row from all the sources,
+	// otherwise this source has read to the end. Each row should be the smallest in each source.
+	// Once there is one row popped, we need to immediately push one row, which is from the same source, into the heap.
+	// all the sources had read to the end, no data to return
+	if len(ms.sourceRowDatas.Rows) == 0 {
+		return nil, nil
+	}
+	rowData := heap.Pop(ms.sourceRowDatas).(common.RowData)
+	newRowData, err := getRowData(ms.sourceRows[rowData.Source])
+	if err != nil {
+		return nil, err
+	}
+	if newRowData != nil {
+		heap.Push(ms.sourceRowDatas, common.RowData{
+			Data:   newRowData,
+			Source: rowData.Source,
+		})
+	} else {
+		if ms.sourceRows[rowData.Source].Err() != nil {
+			return nil, ms.sourceRows[rowData.Source].Err()
+		}
+	}
+	return rowData.Data, nil
+}
+
+func (ms *MultiSourceRowsIterator) Close() {
+	for _, s := range ms.sourceRows {
+		s.Close()
+	}
+}
+
+func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []*config.DataSource, threadCount int, f tableFilter.Filter, skipNonExistingTable bool) (Source, error) {
+	sourceTablesMap := make(map[string][]*common.TableShardSource)
+	// we should get the real table name
+	// and real table row query from sourceDB.
+	targetUniqueTableMap := make(map[string]struct{})
+	for _, tableDiff := range tableDiffs {
+		targetUniqueTableMap[utils.UniqueID(tableDiff.Schema, tableDiff.Table)] = struct{}{}
+	}
+
+	// only used for check
+	sourceTablesAfterRoute := make(map[string]struct{})
+
+	for i, sourceDB := range ds {
+		sourceSchemas, err := dbutil.GetSchemas(ctx, sourceDB.Conn)
+		if err != nil {
+			return nil, errors.Annotatef(err, "get schemas from %d source", i)
+		}
+
+		// use this map to record max Connection for this source.
+		maxSourceRouteTableCount := make(map[string]int)
+		for _, schema := range sourceSchemas {
+			// Skip system schema.
+			if filter.IsSystemSchema(schema) {
+				continue
+			}
+			allTables, err := dbutil.GetTables(ctx, sourceDB.Conn, schema)
+			if err != nil {
+				return nil, errors.Annotatef(err, "get tables from %d source %s", i, schema)
+			}
+			for _, table := range allTables {
+				targetSchema, targetTable := schema, table
+				if sourceDB.Router != nil {
+					targetSchema, targetTable, err = sourceDB.Router.Route(schema, table)
+					if err != nil {
+						return nil, errors.Errorf("get route result for %d source %s.%s failed, error %v", i, schema, table, err)
+					}
+				}
+				uniqueId := utils.UniqueID(targetSchema, targetTable)
+				isMatched := f.MatchTable(targetSchema, targetTable)
+				if isMatched {
+					// if match the filter, we should respect it and check target has this table later.
+					sourceTablesAfterRoute[uniqueId] = struct{}{}
+				}
+				if _, ok := targetUniqueTableMap[uniqueId]; !ok && !(isMatched && skipNonExistingTable) {
+					continue
+				}
+				maxSourceRouteTableCount[uniqueId]++
+				if _, ok := sourceTablesMap[uniqueId]; !ok {
+					sourceTablesMap[uniqueId] = make([]*common.TableShardSource, 0)
+				}
+				sourceTablesMap[uniqueId] = append(sourceTablesMap[uniqueId], &common.TableShardSource{
+					TableSource: common.TableSource{
+						OriginSchema: schema,
+						OriginTable:  table,
+					},
+					DBConn: sourceDB.Conn,
+				})
+			}
+		}
+		maxConn := 0
+		for _, c := range maxSourceRouteTableCount {
+			if c > maxConn {
+				maxConn = c
+			}
+		}
+		log.Info("will increase connection configurations for DB of instance",
+			zap.Int("connection limit", maxConn*threadCount+1))
+		// Set this conn to max
+		sourceDB.Conn.SetMaxOpenConns(maxConn*threadCount + 1)
+		sourceDB.Conn.SetMaxIdleConns(maxConn*threadCount + 1)
+
+	}
+
+	tableDiffs, err := checkTableMatched(tableDiffs, targetUniqueTableMap, sourceTablesAfterRoute, skipNonExistingTable)
+	if err != nil {
+		return nil, errors.Annotatef(err, "please make sure the filter is correct.")
+	}
+
+	mss := &MySQLSources{
+		tableDiffs:      tableDiffs,
+		sourceTablesMap: sourceTablesMap,
+	}
+	return mss, nil
+}
diff --git a/sync_diff_inspector/source/source.go b/sync_diff_inspector/source/source.go
new file mode 100644
index 00000000000..5e615488886
--- /dev/null
+++ b/sync_diff_inspector/source/source.go
@@ -0,0 +1,429 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package source
+
+import (
+	"context"
+	"database/sql"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/go-sql-driver/mysql"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/filter"
+	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
+	router "github.com/pingcap/tidb/pkg/util/table-router"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+type DMLType int32
+
+const (
+	Insert DMLType = iota + 1
+	Delete
+	Replace
+)
+
+const (
+	ShieldDBName      = "_no__exists__db_"
+	ShieldTableName   = "_no__exists__table_"
+	GetSyncPointQuery = "SELECT primary_ts, secondary_ts FROM tidb_cdc.syncpoint_v1 ORDER BY primary_ts DESC LIMIT 1"
+)
+
+type ChecksumInfo struct {
+	Checksum uint64
+	Count    int64
+	Err      error
+	Cost     time.Duration
+}
+
+// RowDataIterator represents the row data in source.
+type RowDataIterator interface {
+	// Next seeks the next row data, it used when compared rows.
+	Next() (map[string]*dbutil.ColumnData, error)
+	// Close release the resource.
+	Close()
+}
+
+// TableAnalyzer represents the method in different source.
+// each source has its own analyze function.
+type TableAnalyzer interface {
+	// AnalyzeSplitter picks the proper splitter.ChunkIterator according to table and source.
+	AnalyzeSplitter(context.Context, *common.TableDiff, *splitter.RangeInfo) (splitter.ChunkIterator, error)
+}
+
+type Source interface {
+	// GetTableAnalyzer pick the proper analyzer for different source.
+	// the implement of this function is different in mysql/tidb.
+	GetTableAnalyzer() TableAnalyzer
+
+	// GetRangeIterator generates the range iterator with the checkpoint(*splitter.RangeInfo) and analyzer.
+	// this is the mainly iterator across the whole sync diff.
+	// One source has one range iterator to produce the range to channel.
+	// there are many workers consume the range from the channel to compare.
+	GetRangeIterator(context.Context, *splitter.RangeInfo, TableAnalyzer, int) (RangeIterator, error)
+
+	// GetCountAndMd5 gets the md5 result and the count from given range.
+	GetCountAndMd5(context.Context, *splitter.RangeInfo) *ChecksumInfo
+
+	// GetCountForLackTable gets the count for tables that don't exist upstream or downstream.
+	GetCountForLackTable(context.Context, *splitter.RangeInfo) int64
+
+	// GetRowsIterator gets the row data iterator from given range.
+	GetRowsIterator(context.Context, *splitter.RangeInfo) (RowDataIterator, error)
+
+	// GenerateFixSQL generates the fix sql with given type.
+	GenerateFixSQL(DMLType, map[string]*dbutil.ColumnData, map[string]*dbutil.ColumnData, int) string
+
+	// GetTables represents the tableDiffs.
+	GetTables() []*common.TableDiff
+
+	// GetSourceStructInfo get the source table info from a given target table
+	GetSourceStructInfo(context.Context, int) ([]*model.TableInfo, error)
+
+	// GetDB represents the db connection.
+	GetDB() *sql.DB
+
+	// GetSnapshot represents the snapshot of source.
+	// only TiDB source has the snapshot.
+	// TODO refine the interface.
+	GetSnapshot() string
+
+	// Close ...
+	Close()
+}
+
+func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, upstream Source, err error) {
+	// init db connection for upstream / downstream.
+	err = initDBConn(ctx, cfg)
+	if err != nil {
+		return nil, nil, errors.Trace(err)
+	}
+	tablesToBeCheck, err := initTables(ctx, cfg)
+	if err != nil {
+		return nil, nil, errors.Trace(err)
+	}
+
+	tableDiffs := make([]*common.TableDiff, 0, len(tablesToBeCheck))
+	for _, tableConfig := range tablesToBeCheck {
+		newInfo, needUnifiedTimeZone := utils.ResetColumns(tableConfig.TargetTableInfo, tableConfig.IgnoreColumns)
+		tableDiffs = append(tableDiffs, &common.TableDiff{
+			Schema: tableConfig.Schema,
+			Table:  tableConfig.Table,
+			Info:   newInfo,
+			// TODO: field `IgnoreColumns` can be deleted.
+			IgnoreColumns:       tableConfig.IgnoreColumns,
+			Fields:              strings.Join(tableConfig.Fields, ","),
+			Range:               tableConfig.Range,
+			NeedUnifiedTimeZone: needUnifiedTimeZone,
+			Collation:           tableConfig.Collation,
+			ChunkSize:           tableConfig.ChunkSize,
+		})
+
+		// When the router set case-sensitive false,
+		// that add rule match itself will make table case unsensitive.
+		for _, d := range cfg.Task.SourceInstances {
+			if _, ok := d.RouteTargetSet[dbutil.TableName(tableConfig.Schema, tableConfig.Table)]; ok {
+				// There is a user rule routing to `tableConfig.Schema`.`tableConfig.Table`
+				rules := d.Router.Match(tableConfig.Schema, tableConfig.Table)
+
+				if len(rules) == 0 {
+					// There is no self match in these user rules.
+					// Need to shield the table for this source.
+					if d.Router.AddRule(&router.TableRule{
+						SchemaPattern: tableConfig.Schema,
+						TablePattern:  tableConfig.Table,
+						TargetSchema:  ShieldDBName,
+						TargetTable:   ShieldTableName,
+					}) != nil {
+						return nil, nil, errors.Errorf("add shield rule failed [schema =  %s] [table = %s]", tableConfig.Schema, tableConfig.Table)
+					}
+				}
+			} else if _, ok := d.RouteTargetSet[dbutil.TableName(tableConfig.Schema, "")]; ok {
+				// There is a user rule routing to `tableConfig.Schema`
+				rules := d.Router.Match(tableConfig.Schema, tableConfig.Table)
+
+				if len(rules) == 0 {
+					// There is no self match in these user rules.
+					// Need to shield the table for this source.
+					if d.Router.AddRule(&router.TableRule{
+						SchemaPattern: tableConfig.Schema,
+						TablePattern:  tableConfig.Table,
+						TargetSchema:  ShieldDBName,
+						TargetTable:   ShieldTableName,
+					}) != nil {
+						return nil, nil, errors.Errorf("add shield rule failed [schema =  %s] [table = %s]", tableConfig.Schema, tableConfig.Table)
+					}
+				}
+			} else {
+				// Add the default rule to match upper/lower case
+				if d.Router.AddRule(&router.TableRule{
+					SchemaPattern: tableConfig.Schema,
+					TablePattern:  tableConfig.Table,
+					TargetSchema:  tableConfig.Schema,
+					TargetTable:   tableConfig.Table,
+				}) != nil {
+					return nil, nil, errors.Errorf("add rule failed [schema = %s] [table = %s]", tableConfig.Schema, tableConfig.Table)
+				}
+			}
+		}
+	}
+
+	// Sort TableDiff is important!
+	// because we compare table one by one.
+	sort.Slice(tableDiffs, func(i, j int) bool {
+		ti := utils.UniqueID(tableDiffs[i].Schema, tableDiffs[i].Table)
+		tj := utils.UniqueID(tableDiffs[j].Schema, tableDiffs[j].Table)
+		return strings.Compare(ti, tj) > 0
+	})
+
+	// If `bucket size` is much larger than `chunk size`,
+	// we need to split the bucket into some chunks, which wastes much time.
+	// So we use WorkPool to split buckets in parallel.
+	// Besides, bucketSpliters of each table use shared WorkPool
+	bucketSpliterPool := utils.NewWorkerPool(uint(cfg.CheckThreadCount), "bucketIter")
+	// for mysql_shard, it needs `cfg.CheckThreadCount` + `cfg.SplitThreadCount` at most, because it cannot use bucket.
+	mysqlConnCount := cfg.CheckThreadCount + cfg.SplitThreadCount
+	upstream, err = buildSourceFromCfg(ctx, tableDiffs, mysqlConnCount, bucketSpliterPool, cfg.SkipNonExistingTable, cfg.Task.TargetCheckTables, cfg.Task.SourceInstances...)
+	if err != nil {
+		return nil, nil, errors.Annotate(err, "from upstream")
+	}
+	if len(upstream.GetTables()) == 0 {
+		return nil, nil, errors.Errorf("no table need to be compared")
+	}
+	downstream, err = buildSourceFromCfg(ctx, upstream.GetTables(), mysqlConnCount, bucketSpliterPool, cfg.SkipNonExistingTable, cfg.Task.TargetCheckTables, cfg.Task.TargetInstance)
+	if err != nil {
+		return nil, nil, errors.Annotate(err, "from downstream")
+	}
+	return downstream, upstream, nil
+}
+
+func buildSourceFromCfg(
+	ctx context.Context,
+	tableDiffs []*common.TableDiff, connCount int,
+	bucketSpliterPool *utils.WorkerPool,
+	skipNonExistingTable bool,
+	f tableFilter.Filter, dbs ...*config.DataSource,
+) (Source, error) {
+	if len(dbs) < 1 {
+		return nil, errors.Errorf("no db config detected")
+	}
+	ok, err := dbutil.IsTiDB(ctx, dbs[0].Conn)
+	if err != nil {
+		return nil, errors.Annotatef(err, "connect to db failed")
+	}
+
+	if ok {
+		if len(dbs) == 1 {
+			return NewTiDBSource(ctx, tableDiffs, dbs[0], bucketSpliterPool, f, skipNonExistingTable)
+		} else {
+			log.Fatal("Don't support check table in multiple tidb instance, please specify one tidb instance.")
+		}
+	}
+	return NewMySQLSources(ctx, tableDiffs, dbs, connCount, f, skipNonExistingTable)
+}
+
+func getAutoSnapshotPosition(cfg *mysql.Config) (string, string, error) {
+	tmpConn, err := common.ConnectMySQL(cfg, 2)
+	if err != nil {
+		return "", "", errors.Annotatef(err, "connecting to auto-position tidb_snapshot failed")
+	}
+	defer tmpConn.Close()
+	var primaryTs, secondaryTs string
+	err = tmpConn.QueryRow(GetSyncPointQuery).Scan(&primaryTs, &secondaryTs)
+	if err != nil {
+		return "", "", errors.Annotatef(err, "fetching auto-position tidb_snapshot failed")
+	}
+	return primaryTs, secondaryTs, nil
+}
+
+func initDBConn(ctx context.Context, cfg *config.Config) error {
+	// Fill in tidb_snapshot if it is set to AUTO
+	// This is only supported when set to auto on both target/source.
+	if cfg.Task.TargetInstance.IsAutoSnapshot() {
+		if len(cfg.Task.SourceInstances) > 1 {
+			return errors.Errorf("'auto' snapshot only supports one tidb source")
+		}
+		if !cfg.Task.SourceInstances[0].IsAutoSnapshot() {
+			return errors.Errorf("'auto' snapshot should be set on both target and source")
+		}
+		primaryTs, secondaryTs, err := getAutoSnapshotPosition(cfg.Task.TargetInstance.ToDriverConfig())
+		if err != nil {
+			return err
+		}
+		cfg.Task.TargetInstance.SetSnapshot(secondaryTs)
+		cfg.Task.SourceInstances[0].SetSnapshot(primaryTs)
+	}
+	// we had `cfg.SplitThreadCount` producers and `cfg.CheckThreadCount` consumer to use db connections maybe and `cfg.CheckThreadCount` splitter to split buckets.
+	// so the connection count need to be cfg.SplitThreadCount + cfg.CheckThreadCount + cfg.CheckThreadCount.
+	targetConn, err := common.ConnectMySQL(cfg.Task.TargetInstance.ToDriverConfig(), cfg.SplitThreadCount+2*cfg.CheckThreadCount)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	cfg.Task.TargetInstance.Conn = targetConn
+
+	for _, source := range cfg.Task.SourceInstances {
+		// If it is still set to AUTO it means it was not set on the target.
+		// We require it to be set to AUTO on both.
+		if source.IsAutoSnapshot() {
+			return errors.Errorf("'auto' snapshot should be set on both target and source")
+		}
+		// connect source db with target db time_zone
+		conn, err := common.ConnectMySQL(source.ToDriverConfig(), cfg.SplitThreadCount+2*cfg.CheckThreadCount)
+		if err != nil {
+			return errors.Trace(err)
+		}
+		source.Conn = conn
+	}
+	return nil
+}
+
+func initTables(ctx context.Context, cfg *config.Config) (cfgTables []*config.TableConfig, err error) {
+	downStreamConn := cfg.Task.TargetInstance.Conn
+	TargetTablesList := make([]*common.TableSource, 0)
+	targetSchemas, err := dbutil.GetSchemas(ctx, downStreamConn)
+	if err != nil {
+		return nil, errors.Annotatef(err, "get schemas from target source")
+	}
+
+	for _, schema := range targetSchemas {
+		if filter.IsSystemSchema(schema) {
+			continue
+		}
+		allTables, err := dbutil.GetTables(ctx, downStreamConn, schema)
+		if err != nil {
+			return nil, errors.Annotatef(err, "get tables from target source %s", schema)
+		}
+		for _, t := range allTables {
+			TargetTablesList = append(TargetTablesList, &common.TableSource{
+				OriginSchema: schema,
+				OriginTable:  t,
+			})
+		}
+	}
+
+	// fill the table information.
+	// will add default source information, don't worry, we will use table config's info replace this later.
+	// cfg.Tables.Schema => cfg.Tables.Tables => target/source Schema.Table
+	cfgTables = make([]*config.TableConfig, 0, len(TargetTablesList))
+	version := utils.TryToGetVersion(ctx, downStreamConn)
+	for _, tables := range TargetTablesList {
+		if cfg.Task.TargetCheckTables.MatchTable(tables.OriginSchema, tables.OriginTable) {
+			log.Debug("match target table", zap.String("table", dbutil.TableName(tables.OriginSchema, tables.OriginTable)))
+
+			tableInfo, err := utils.GetTableInfoWithVersion(ctx, downStreamConn, tables.OriginSchema, tables.OriginTable, version)
+			if err != nil {
+				return nil, errors.Errorf("get table %s.%s's information error %s", tables.OriginSchema, tables.OriginTable, errors.ErrorStack(err))
+			}
+			// Initialize all the tables that matches the `target-check-tables`[config.toml] and appears in downstream.
+			cfgTables = append(cfgTables, &config.TableConfig{
+				Schema:          tables.OriginSchema,
+				Table:           tables.OriginTable,
+				TargetTableInfo: tableInfo,
+				Range:           "TRUE",
+			})
+		}
+	}
+
+	// Reset fields of some tables of `cfgTables` according to `table-configs`[config.toml].
+	// The table in `table-configs`[config.toml] should exist in both `target-check-tables`[config.toml] and tables from downstream.
+	for i, table := range cfg.Task.TargetTableConfigs {
+		// parse every config to find target table.
+		cfgFilter, err := tableFilter.Parse(table.TargetTables)
+		if err != nil {
+			return nil, errors.Errorf("unable to parse target table for the %dth config", i)
+		}
+		// iterate all target tables to make sure
+		// 1. one table only match at most one config.
+		// 2. config can miss table.
+		for _, cfgTable := range cfgTables {
+			if cfgFilter.MatchTable(cfgTable.Schema, cfgTable.Table) {
+				if cfgTable.HasMatched {
+					return nil, errors.Errorf("different config matched to same target table %s.%s", cfgTable.Schema, cfgTable.Table)
+				}
+				if table.Range != "" {
+					cfgTable.Range = table.Range
+				}
+				cfgTable.IgnoreColumns = table.IgnoreColumns
+				cfgTable.Fields = table.Fields
+				cfgTable.Collation = table.Collation
+				cfgTable.ChunkSize = table.ChunkSize
+				cfgTable.HasMatched = true
+			}
+		}
+	}
+	return cfgTables, nil
+}
+
+// RangeIterator generate next chunk for the whole tables lazily.
+type RangeIterator interface {
+	// Next seeks the next chunk, return nil if seeks to end.
+	Next(context.Context) (*splitter.RangeInfo, error)
+
+	Close()
+}
+
+func checkTableMatched(tableDiffs []*common.TableDiff, targetMap map[string]struct{}, sourceMap map[string]struct{}, skipNonExistingTable bool) ([]*common.TableDiff, error) {
+	tableIndexMap := getIndexMapForTable(tableDiffs)
+	// check target exists but source not found
+	for tableDiff := range targetMap {
+		// target table have all passed in tableFilter
+		if _, ok := sourceMap[tableDiff]; !ok {
+			if !skipNonExistingTable {
+				return tableDiffs, errors.Errorf("the source has no table to be compared. target-table is `%s`", tableDiff)
+			}
+			index := tableIndexMap[tableDiff]
+			if tableDiffs[index].TableLack == 0 {
+				tableDiffs[index].TableLack = common.UpstreamTableLackFlag
+				log.Info("the source has no table to be compared", zap.String("target-table", tableDiff))
+			}
+		}
+	}
+	// check source exists but target not found
+	for tableDiff := range sourceMap {
+		// need check source table have passd in tableFilter here
+		if _, ok := targetMap[tableDiff]; !ok {
+			if !skipNonExistingTable {
+				return tableDiffs, errors.Errorf("the target has no table to be compared. source-table is `%s`", tableDiff)
+			}
+			slice := strings.Split(strings.Replace(tableDiff, "`", "", -1), ".")
+			tableDiffs = append(tableDiffs, &common.TableDiff{
+				Schema:    slice[0],
+				Table:     slice[1],
+				TableLack: common.DownstreamTableLackFlag,
+			})
+			log.Info("the target has no table to be compared", zap.String("source-table", tableDiff))
+		}
+	}
+	log.Info("table match check finished")
+	return tableDiffs, nil
+}
+
+func getIndexMapForTable(tableDiffs []*common.TableDiff) map[string]int {
+	tableIndexMap := make(map[string]int)
+	for i := 0; i < len(tableDiffs); i++ {
+		tableUniqueID := utils.UniqueID(tableDiffs[i].Schema, tableDiffs[i].Table)
+		tableIndexMap[tableUniqueID] = i
+	}
+	return tableIndexMap
+}
diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go
new file mode 100644
index 00000000000..bb3ea1b02ab
--- /dev/null
+++ b/sync_diff_inspector/source/source_test.go
@@ -0,0 +1,955 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package source
+
+import (
+	"context"
+	"database/sql"
+	"database/sql/driver"
+	"fmt"
+	"os"
+	"regexp"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	filter "github.com/pingcap/tidb/pkg/util/table-filter"
+	router "github.com/pingcap/tidb/pkg/util/table-router"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"github.com/stretchr/testify/require"
+
+	_ "github.com/go-sql-driver/mysql"
+)
+
+type tableCaseType struct {
+	schema         string
+	table          string
+	createTableSQL string
+	rangeColumns   []string
+	rangeLeft      []string
+	rangeRight     []string
+	rangeInfo      *splitter.RangeInfo
+	rowQuery       string
+	rowColumns     []string
+	rows           [][]driver.Value
+}
+
+type MockChunkIterator struct {
+	ctx       context.Context
+	tableDiff *common.TableDiff
+	rangeInfo *splitter.RangeInfo
+	index     *chunk.ChunkID
+}
+
+const CHUNKS = 5
+const BUCKETS = 1
+
+func (m *MockChunkIterator) Next() (*chunk.Range, error) {
+	if m.index.ChunkIndex == m.index.ChunkCnt-1 {
+		return nil, nil
+	}
+	m.index.ChunkIndex = m.index.ChunkIndex + 1
+	return &chunk.Range{
+		Index: &chunk.ChunkID{
+			TableIndex:       m.index.TableIndex,
+			BucketIndexLeft:  m.index.BucketIndexLeft,
+			BucketIndexRight: m.index.BucketIndexRight,
+			ChunkIndex:       m.index.ChunkIndex,
+			ChunkCnt:         m.index.ChunkCnt,
+		},
+	}, nil
+}
+
+func (m *MockChunkIterator) Close() {
+
+}
+
+type MockAnalyzer struct {
+}
+
+func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.TableDiff, rangeInfo *splitter.RangeInfo) (splitter.ChunkIterator, error) {
+	i := &chunk.ChunkID{
+		TableIndex:       0,
+		BucketIndexLeft:  0,
+		BucketIndexRight: 0,
+		ChunkIndex:       -1,
+		ChunkCnt:         CHUNKS,
+	}
+	return &MockChunkIterator{
+		ctx,
+		tableDiff,
+		rangeInfo,
+		i,
+	}, nil
+}
+
+func TestTiDBSource(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	tableCases := []*tableCaseType{
+		{
+			schema:         "source_test",
+			table:          "test1",
+			createTableSQL: "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, `d` binary(1), `e` varbinary(1), PRIMARY KEY(`a`)\n)",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+			rowQuery:       "SELECT",
+			rowColumns:     []string{"a", "b", "c", "d", "e"},
+			rows: [][]driver.Value{
+				{"1", "a", "1.2", []byte{0xaa}, []byte{0xaa}},
+				{"2", "b", "3.4", []byte{0xbb}, []byte{0xbb}},
+				{"3", "c", "5.6", []byte{0xcc}, []byte{0xcc}},
+				{"4", "d", "6.7", []byte{0xdd}, []byte{0xdd}},
+			},
+		},
+		{
+			schema:         "source_test",
+			table:          "test2",
+			createTableSQL: "CREATE TABLE `source_test`.`test2` (`a` int, `b` varchar(24), `c` float, `d` datetime, PRIMARY KEY(`a`)\n)",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+		},
+	}
+
+	tableDiffs := prepareTiDBTables(t, tableCases)
+
+	mock.ExpectQuery("SHOW DATABASES").WillReturnRows(sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("source_test"))
+	mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(sqlmock.NewRows([]string{"Table", "type"}).AddRow("test1", "base").AddRow("test2", "base"))
+	mock.ExpectQuery("SELECT version()*").WillReturnRows(sqlmock.NewRows([]string{"version()"}).AddRow("5.7.25-TiDB-v4.0.12"))
+
+	f, err := filter.Parse([]string{"source_test.*"})
+	require.NoError(t, err)
+	tidb, err := NewTiDBSource(ctx, tableDiffs, &config.DataSource{Conn: conn}, utils.NewWorkerPool(1, "bucketIter"), f, false)
+	require.NoError(t, err)
+
+	caseFn := []struct {
+		check func(sqlmock.Sqlmock, Source) (bool, error)
+	}{
+		{
+			check: func(mock sqlmock.Sqlmock, source Source) (bool, error) {
+				mock.ExpectQuery("SHOW CREATE TABLE*").WillReturnRows(sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow(tableCases[0].table, tableCases[0].createTableSQL))
+				mock.ExpectQuery("SELECT _tidb_rowid FROM*").WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid"}))
+				mock.ExpectQuery("SHOW VARIABLES LIKE 'sql_mode'*").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION"))
+				tableInfo, err := source.GetSourceStructInfo(ctx, 0)
+				if err != nil {
+					return false, err
+				}
+				return !tableInfo[0].PKIsHandle, nil
+			},
+		},
+		{
+			check: func(mock sqlmock.Sqlmock, source Source) (bool, error) {
+				mock.ExpectQuery("SHOW CREATE TABLE*").WillReturnRows(sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow(tableCases[1].table, tableCases[1].createTableSQL))
+				mock.ExpectQuery("SELECT _tidb_rowid FROM*").WillReturnError(fmt.Errorf("ERROR 1054 (42S22): Unknown column '_tidb_rowid' in 'field list'"))
+				mock.ExpectQuery("SHOW VARIABLES LIKE 'sql_mode'*").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION"))
+				tableInfo, err := source.GetSourceStructInfo(ctx, 0)
+				if err != nil {
+					return false, err
+				}
+				return tableInfo[0].PKIsHandle, nil
+			},
+		},
+	}
+
+	for n, tableCase := range tableCases {
+		t.Log(n)
+		check, err := caseFn[n].check(mock, tidb)
+		require.NoError(t, err)
+		require.True(t, check)
+		require.Equal(t, n, tableCase.rangeInfo.GetTableIndex())
+		countRows := sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456)
+		mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
+		checksum := tidb.GetCountAndMd5(ctx, tableCase.rangeInfo)
+		require.NoError(t, checksum.Err)
+		require.Equal(t, checksum.Count, int64(123))
+		require.Equal(t, checksum.Checksum, uint64(456))
+	}
+
+	// Test ChunkIterator
+	iter, err := tidb.GetRangeIterator(ctx, tableCases[0].rangeInfo, &MockAnalyzer{}, 3)
+	require.NoError(t, err)
+	resRecords := [][]bool{
+		{false, false, false, false, false},
+		{false, false, false, false, false},
+	}
+	for {
+		ch, err := iter.Next(ctx)
+		require.NoError(t, err)
+		if ch == nil {
+			break
+		}
+		require.Equal(t, ch.ChunkRange.Index.ChunkCnt, 5)
+		require.Equal(t, resRecords[ch.ChunkRange.Index.TableIndex][ch.ChunkRange.Index.ChunkIndex], false)
+		resRecords[ch.ChunkRange.Index.TableIndex][ch.ChunkRange.Index.ChunkIndex] = true
+	}
+	iter.Close()
+	require.Equal(t, resRecords, [][]bool{
+		{true, true, true, true, true},
+		{true, true, true, true, true},
+	})
+
+	// Test RowIterator
+	tableCase := tableCases[0]
+	dataRows := sqlmock.NewRows(tableCase.rowColumns)
+	for _, row := range tableCase.rows {
+		dataRows.AddRow(row...)
+	}
+	mock.ExpectQuery(tableCase.rowQuery).WillReturnRows(dataRows)
+	rowIter, err := tidb.GetRowsIterator(ctx, tableCase.rangeInfo)
+	require.NoError(t, err)
+
+	row := 0
+	var firstRow, secondRow map[string]*dbutil.ColumnData
+	for {
+		columns, err := rowIter.Next()
+		require.NoError(t, err)
+		if columns == nil {
+			require.Equal(t, row, len(tableCase.rows))
+			break
+		}
+		for j, value := range tableCase.rows[row] {
+			require.Equal(t, columns[tableCase.rowColumns[j]].IsNull, false)
+			if _, ok := value.(string); ok {
+				require.Equal(t, columns[tableCase.rowColumns[j]].Data, []byte(value.(string)))
+			}
+		}
+		if row == 0 {
+			firstRow = columns
+		} else if row == 1 {
+			secondRow = columns
+		}
+		row++
+	}
+	require.Equal(t, tidb.GenerateFixSQL(Insert, firstRow, secondRow, 0), "REPLACE INTO `source_test`.`test1`(`a`,`b`,`c`,`d`,`e`) VALUES (1,'a',1.2,x'aa',x'aa');")
+	require.Equal(t, tidb.GenerateFixSQL(Delete, firstRow, secondRow, 0), "DELETE FROM `source_test`.`test1` WHERE `a` = 2 AND `b` = 'b' AND `c` = 3.4 AND `d` = x'bb' AND `e` = x'bb' LIMIT 1;")
+	require.Equal(t, tidb.GenerateFixSQL(Replace, firstRow, secondRow, 0),
+		"/*\n"+
+			"  DIFF COLUMNS ╏ `A` ╏ `B` ╏ `C` ╏  `D`  ╏  `E`   \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍╋╍╍╍╍╍╍╍╍\n"+
+			"  source data  ╏ 1   ╏ 'a' ╏ 1.2 ╏ x'aa' ╏ x'aa'  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍╋╍╍╍╍╍╍╍╍\n"+
+			"  target data  ╏ 2   ╏ 'b' ╏ 3.4 ╏ x'aa' ╏ x'aa'  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍╋╍╍╍╍╍╍╍╍\n"+
+			"*/\n"+
+			"REPLACE INTO `source_test`.`test1`(`a`,`b`,`c`,`d`,`e`) VALUES (1,'a',1.2,x'aa',x'aa');")
+
+	rowIter.Close()
+
+	analyze := tidb.GetTableAnalyzer()
+	countRows := sqlmock.NewRows([]string{"Cnt"}).AddRow(0)
+	mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
+	chunkIter, err := analyze.AnalyzeSplitter(ctx, tableDiffs[0], tableCase.rangeInfo)
+	require.NoError(t, err)
+	chunkIter.Close()
+	tidb.Close()
+}
+
+func TestFallbackToRandomIfRangeIsSet(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	mock.ExpectQuery("SHOW DATABASES").WillReturnRows(sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("source_test"))
+	mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(sqlmock.NewRows([]string{"Table", "type"}).AddRow("test1", "base"))
+	statsRows := sqlmock.NewRows([]string{"Db_name", "Table_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound"})
+	for i := 0; i < 5; i++ {
+		statsRows.AddRow("source_test", "test1", "PRIMARY", 1, (i+1)*64, (i+1)*64, 1,
+			fmt.Sprintf("(%d, %d)", i*64, i*12), fmt.Sprintf("(%d, %d)", (i+1)*64-1, (i+1)*12-1))
+	}
+	mock.ExpectQuery("SELECT version()*").WillReturnRows(sqlmock.NewRows([]string{"version()"}).AddRow("5.7.25-TiDB-v4.0.12"))
+	mock.ExpectQuery(regexp.QuoteMeta("SELECT COUNT(1) cnt")).WillReturnRows(sqlmock.NewRows([]string{"cnt"}).AddRow(100))
+
+	f, err := filter.Parse([]string{"source_test.*"})
+	require.NoError(t, err)
+
+	createTableSQL1 := "CREATE TABLE `test1` " +
+		"(`id` int(11) NOT NULL AUTO_INCREMENT, " +
+		" `k` int(11) NOT NULL DEFAULT '0', " +
+		"`c` char(120) NOT NULL DEFAULT '', " +
+		"PRIMARY KEY (`id`), KEY `k_1` (`k`))"
+
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+
+	table1 := &common.TableDiff{
+		Schema: "source_test",
+		Table:  "test1",
+		Info:   tableInfo,
+		Range:  "id < 10", // This should prevent using BucketIterator
+	}
+
+	tidb, err := NewTiDBSource(ctx, []*common.TableDiff{table1}, &config.DataSource{Conn: conn}, utils.NewWorkerPool(1, "bucketIter"), f, false)
+	require.NoError(t, err)
+
+	analyze := tidb.GetTableAnalyzer()
+	chunkIter, err := analyze.AnalyzeSplitter(ctx, table1, nil)
+	require.NoError(t, err)
+	require.IsType(t, &splitter.RandomIterator{}, chunkIter)
+
+	chunkIter.Close()
+	tidb.Close()
+}
+
+func TestMysqlShardSources(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	tableCases := []*tableCaseType{
+		{
+			schema:         "source_test",
+			table:          "test1",
+			createTableSQL: "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+			rowQuery:       "SELECT.*",
+			rowColumns:     []string{"a", "b", "c"},
+			rows: [][]driver.Value{
+				{"1", "a", "1.2"},
+				{"2", "b", "2.2"},
+				{"3", "c", "3.2"},
+				{"4", "d", "4.2"},
+				{"5", "e", "5.2"},
+				{"6", "f", "6.2"},
+				{"7", "g", "7.2"},
+				{"8", "h", "8.2"},
+				{"9", "i", "9.2"},
+				{"10", "j", "10.2"},
+				{"11", "k", "11.2"},
+				{"12", "l", "12.2"},
+			},
+		},
+		{
+			schema:         "source_test",
+			table:          "test2",
+			createTableSQL: "CREATE TABLE `source_test`.`test2` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+		},
+	}
+
+	tableDiffs := prepareTiDBTables(t, tableCases)
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	dbs := []*sql.DB{
+		conn, conn, conn, conn,
+	}
+
+	cs := make([]*config.DataSource, 4)
+	for i := range dbs {
+		mock.ExpectQuery("SHOW DATABASES").WillReturnRows(sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("source_test"))
+		mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(sqlmock.NewRows([]string{"Table", "type"}).AddRow("test1", "base").AddRow("test2", "base"))
+		cs[i] = &config.DataSource{Conn: conn}
+	}
+
+	f, err := filter.Parse([]string{"source_test.*"})
+	require.NoError(t, err)
+	shard, err := NewMySQLSources(ctx, tableDiffs, cs, 4, f, false)
+	require.NoError(t, err)
+
+	for i := 0; i < len(dbs); i++ {
+		infoRows := sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow("test_t", "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))")
+		variableRows := sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION")
+
+		mock.ExpectQuery("SHOW CREATE TABLE.*").WillReturnRows(infoRows)
+		mock.ExpectQuery("SHOW VARIABLE.*").WillReturnRows(variableRows)
+	}
+	info, err := shard.GetSourceStructInfo(ctx, 0)
+	require.NoError(t, err)
+	require.Equal(t, info[0].Name.O, "test1")
+
+	for n, tableCase := range tableCases {
+		require.Equal(t, n, tableCase.rangeInfo.GetTableIndex())
+		var resChecksum uint64 = 0
+		for i := 0; i < len(dbs); i++ {
+			resChecksum = resChecksum + 1<<i
+			countRows := sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(1, 1<<i)
+			mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
+		}
+
+		checksum := shard.GetCountAndMd5(ctx, tableCase.rangeInfo)
+		require.NoError(t, checksum.Err)
+		require.Equal(t, checksum.Count, int64(len(dbs)))
+		require.Equal(t, checksum.Checksum, resChecksum)
+	}
+
+	// Test RowIterator
+	tableCase := tableCases[0]
+	rowNums := len(tableCase.rows) / len(dbs)
+	i := 0
+	for j := 0; j < len(dbs); j++ {
+		dataRows := sqlmock.NewRows(tableCase.rowColumns)
+		for k := 0; k < rowNums; k++ {
+			dataRows.AddRow(tableCase.rows[i]...)
+			i++
+		}
+		mock.ExpectQuery(tableCase.rowQuery).WillReturnRows(dataRows)
+	}
+
+	rowIter, err := shard.GetRowsIterator(ctx, tableCase.rangeInfo)
+	require.NoError(t, err)
+
+	i = 0
+	for {
+		columns, err := rowIter.Next()
+		require.NoError(t, err)
+		if columns == nil {
+			require.Equal(t, i, len(tableCase.rows))
+			break
+		}
+		for j, value := range tableCase.rows[i] {
+			//c.Log(j)
+			require.Equal(t, columns[tableCase.rowColumns[j]].IsNull, false)
+			require.Equal(t, columns[tableCase.rowColumns[j]].Data, []byte(value.(string)))
+		}
+
+		i++
+	}
+	rowIter.Close()
+
+	shard.Close()
+}
+
+func TestMysqlRouter(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	tableCases := []*tableCaseType{
+		{
+			schema:         "source_test",
+			table:          "test1",
+			createTableSQL: "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+			rowQuery:       "SELECT",
+			rowColumns:     []string{"a", "b", "c"},
+			rows: [][]driver.Value{
+				{"1", "a", "1.2"},
+				{"2", "b", "3.4"},
+				{"3", "c", "5.6"},
+				{"4", "d", "6.7"},
+			},
+		},
+		{
+			schema:         "source_test",
+			table:          "test2",
+			createTableSQL: "CREATE TABLE `source_test`.`test2` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+		},
+	}
+
+	tableDiffs := prepareTiDBTables(t, tableCases)
+
+	routeRuleList := []*router.TableRule{
+		{
+			SchemaPattern: "source_test_t",
+			TablePattern:  "test_t",
+			TargetSchema:  "source_test",
+			TargetTable:   "test1",
+		},
+	}
+	router, err := router.NewTableRouter(false, routeRuleList)
+	require.NoError(t, err)
+	ds := &config.DataSource{
+		Router: router,
+		Conn:   conn,
+	}
+
+	databasesRows := sqlmock.NewRows([]string{"Database"}).AddRow("source_test").AddRow("source_test_t")
+	mock.ExpectQuery("SHOW DATABASES").WillReturnRows(databasesRows)
+	tablesRows := sqlmock.NewRows([]string{"Tables_in_test", "Table_type"}).AddRow("test2", "BASE TABLE")
+	mock.ExpectQuery("SHOW FULL TABLES IN.*").WillReturnRows(tablesRows)
+	tablesRows = sqlmock.NewRows([]string{"Tables_in_test", "Table_type"}).AddRow("test_t", "BASE TABLE")
+	mock.ExpectQuery("SHOW FULL TABLES IN.*").WillReturnRows(tablesRows)
+
+	f, err := filter.Parse([]string{"*.*"})
+	require.NoError(t, err)
+	mysql, err := NewMySQLSources(ctx, tableDiffs, []*config.DataSource{ds}, 4, f, false)
+	require.NoError(t, err)
+
+	// random splitter
+	// query 1: SELECT COUNT(1) cnt FROM `source_test`.`test2`
+	countRows := sqlmock.NewRows([]string{"Cnt"}).AddRow(0)
+	mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
+	// query 2: SELECT COUNT(1) cnt FROM `source_test_t`.`test_t`
+	countRows = sqlmock.NewRows([]string{"Cnt"}).AddRow(0)
+	mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
+	rangeIter, err := mysql.GetRangeIterator(ctx, nil, mysql.GetTableAnalyzer(), 3)
+	require.NoError(t, err)
+	_, err = rangeIter.Next(ctx)
+	require.NoError(t, err)
+	rangeIter.Close()
+
+	rangeIter, err = mysql.GetRangeIterator(ctx, tableCases[0].rangeInfo, mysql.GetTableAnalyzer(), 3)
+	require.NoError(t, err)
+	rangeIter.Close()
+
+	// row Iterator
+	dataRows := sqlmock.NewRows(tableCases[0].rowColumns)
+	for k := 0; k < 2; k++ {
+		dataRows.AddRow(tableCases[0].rows[k]...)
+	}
+	mock.ExpectQuery(tableCases[0].rowQuery).WillReturnRows(dataRows)
+
+	rowIter, err := mysql.GetRowsIterator(ctx, tableCases[0].rangeInfo)
+	require.NoError(t, err)
+	firstRow, err := rowIter.Next()
+	require.NoError(t, err)
+	require.NotNil(t, firstRow)
+	secondRow, err := rowIter.Next()
+	require.NoError(t, err)
+	require.NotNil(t, secondRow)
+	require.Equal(t, mysql.GenerateFixSQL(Insert, firstRow, secondRow, 0), "REPLACE INTO `source_test`.`test1`(`a`,`b`,`c`) VALUES (1,'a',1.2);")
+	require.Equal(t, mysql.GenerateFixSQL(Delete, firstRow, secondRow, 0), "DELETE FROM `source_test`.`test1` WHERE `a` = 2 AND `b` = 'b' AND `c` = 3.4 LIMIT 1;")
+	require.Equal(t, mysql.GenerateFixSQL(Replace, firstRow, secondRow, 0),
+		"/*\n"+
+			"  DIFF COLUMNS ╏ `A` ╏ `B` ╏ `C`  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍\n"+
+			"  source data  ╏ 1   ╏ 'a' ╏ 1.2  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍\n"+
+			"  target data  ╏ 2   ╏ 'b' ╏ 3.4  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍\n"+
+			"*/\n"+
+			"REPLACE INTO `source_test`.`test1`(`a`,`b`,`c`) VALUES (1,'a',1.2);")
+	rowIter.Close()
+
+	mysql.Close()
+}
+
+func TestTiDBRouter(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	tableCases := []*tableCaseType{
+		{
+			schema:         "source_test",
+			table:          "test1",
+			createTableSQL: "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+			rowQuery:       "SELECT",
+			rowColumns:     []string{"a", "b", "c"},
+			rows: [][]driver.Value{
+				{"1", "a", "1.2"},
+				{"2", "b", "3.4"},
+				{"3", "c", "5.6"},
+				{"4", "d", "6.7"},
+			},
+		},
+		{
+			schema:         "source_test",
+			table:          "test2",
+			createTableSQL: "CREATE TABLE `source_test`.`test2` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))",
+			rangeColumns:   []string{"a", "b"},
+			rangeLeft:      []string{"3", "b"},
+			rangeRight:     []string{"5", "f"},
+		},
+	}
+
+	tableDiffs := prepareTiDBTables(t, tableCases)
+
+	routeRuleList := []*router.TableRule{
+		{
+			SchemaPattern: "source_test_t",
+			TablePattern:  "test_t",
+			TargetSchema:  "source_test",
+			TargetTable:   "test1",
+		},
+	}
+	router, err := router.NewTableRouter(false, routeRuleList)
+	require.NoError(t, err)
+	ds := &config.DataSource{
+		Router: router,
+		Conn:   conn,
+	}
+
+	databasesRows := sqlmock.NewRows([]string{"Database"}).AddRow("source_test_t").AddRow("source_test")
+	mock.ExpectQuery("SHOW DATABASES").WillReturnRows(databasesRows)
+	tablesRows := sqlmock.NewRows([]string{"Tables_in_test", "Table_type"}).AddRow("test_t", "BASE TABLE")
+	mock.ExpectQuery("SHOW FULL TABLES IN.*").WillReturnRows(tablesRows)
+	tablesRows = sqlmock.NewRows([]string{"Tables_in_test", "Table_type"}).AddRow("test2", "BASE TABLE")
+	mock.ExpectQuery("SHOW FULL TABLES IN.*").WillReturnRows(tablesRows)
+	mock.ExpectQuery("SELECT version()*").WillReturnRows(sqlmock.NewRows([]string{"version()"}).AddRow("5.7.25-TiDB-v4.0.12"))
+
+	f, err := filter.Parse([]string{"*.*"})
+	require.NoError(t, err)
+	tidb, err := NewTiDBSource(ctx, tableDiffs, ds, utils.NewWorkerPool(1, "bucketIter"), f, false)
+	require.NoError(t, err)
+	infoRows := sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow("test_t", "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))")
+	mock.ExpectQuery("SHOW CREATE TABLE.*").WillReturnRows(infoRows)
+	variableRows := sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION")
+	mock.ExpectQuery("SHOW VARIABLE.*").WillReturnRows(variableRows)
+	info, err := tidb.GetSourceStructInfo(ctx, 0)
+	require.NoError(t, err)
+	require.Equal(t, info[0].Name.O, "test1")
+}
+
+func prepareTiDBTables(t *testing.T, tableCases []*tableCaseType) []*common.TableDiff {
+	tableDiffs := make([]*common.TableDiff, 0, len(tableCases))
+	for n, tableCase := range tableCases {
+		tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
+		require.NoError(t, err)
+		tableDiffs = append(tableDiffs, &common.TableDiff{
+			Schema: "source_test",
+			Table:  fmt.Sprintf("test%d", n+1),
+			Info:   tableInfo,
+		})
+
+		chunkRange := chunk.NewChunkRange()
+		for i, column := range tableCase.rangeColumns {
+			chunkRange.Update(column, tableCase.rangeLeft[i], tableCase.rangeRight[i], true, true)
+		}
+
+		chunk.InitChunk(chunkRange, chunk.Bucket, 0, 0, "", "")
+		chunkRange.Index.TableIndex = n
+		rangeInfo := &splitter.RangeInfo{
+			ChunkRange: chunkRange,
+		}
+		tableCase.rangeInfo = rangeInfo
+	}
+
+	return tableDiffs
+}
+
+func TestSource(t *testing.T) {
+	host, isExist := os.LookupEnv("MYSQL_HOST")
+	if host == "" || !isExist {
+		return
+	}
+	portstr, isExist := os.LookupEnv("MYSQL_PORT")
+	if portstr == "" || !isExist {
+		return
+	}
+
+	port, err := strconv.Atoi(portstr)
+	require.NoError(t, err)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	router, err := router.NewTableRouter(false, nil)
+	require.NoError(t, err)
+	cfg := &config.Config{
+		LogLevel:         "debug",
+		CheckThreadCount: 4,
+		ExportFixSQL:     true,
+		CheckStructOnly:  false,
+		DataSources: map[string]*config.DataSource{
+			"mysql1": {
+				Host: host,
+				Port: port,
+				User: "root",
+			},
+			"tidb": {
+				Host: host,
+				Port: port,
+				User: "root",
+			},
+		},
+		Routes: nil,
+		TableConfigs: map[string]*config.TableConfig{
+			"config1": {
+				Schema:          "schama1",
+				Table:           "tbl",
+				IgnoreColumns:   []string{"", ""},
+				Fields:          []string{""},
+				Range:           "a > 10 AND a < 20",
+				TargetTableInfo: nil,
+				Collation:       "",
+			},
+		},
+		Task: config.TaskConfig{
+			Source:       []string{"mysql1"},
+			Routes:       nil,
+			Target:       "tidb",
+			CheckTables:  []string{"schema*.tbl"},
+			TableConfigs: []string{"config1"},
+			OutputDir:    "./output",
+			SourceInstances: []*config.DataSource{
+				{
+					Host:   host,
+					Port:   port,
+					User:   "root",
+					Router: router,
+				},
+			},
+			TargetInstance: &config.DataSource{
+				Host: host,
+				Port: port,
+				User: "root",
+			},
+			TargetTableConfigs: []*config.TableConfig{
+				{
+					Schema:          "schema1",
+					Table:           "tbl",
+					IgnoreColumns:   []string{"", ""},
+					Fields:          []string{""},
+					Range:           "a > 10 AND a < 20",
+					TargetTableInfo: nil,
+					Collation:       "",
+				},
+			},
+			TargetCheckTables: nil,
+			FixDir:            "output/fix-on-tidb0",
+			CheckpointDir:     "output/checkpoint",
+			HashFile:          "",
+		},
+		ConfigFile:   "config.toml",
+		PrintVersion: false,
+	}
+	cfg.Task.TargetCheckTables, err = filter.Parse([]string{"schema*.tbl"})
+	require.NoError(t, err)
+
+	// create table
+	conn, err := sql.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/?charset=utf8mb4", host, port))
+	require.NoError(t, err)
+
+	conn.Exec("CREATE DATABASE IF NOT EXISTS schema1")
+	conn.Exec("CREATE TABLE IF NOT EXISTS `schema1`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))")
+	// create db connections refused.
+	// TODO unit_test covers source.go
+	_, _, err = NewSources(ctx, cfg)
+	require.NoError(t, err)
+}
+
+func TestRouterRules(t *testing.T) {
+	host, isExist := os.LookupEnv("MYSQL_HOST")
+	if host == "" || !isExist {
+		return
+	}
+	portStr, isExist := os.LookupEnv("MYSQL_PORT")
+	if portStr == "" || !isExist {
+		//return
+	}
+	port, err := strconv.Atoi(portStr)
+	require.NoError(t, err)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	r, err := router.NewTableRouter(false, []*router.TableRule{
+		// make sure this rule works
+		{
+			SchemaPattern: "schema1",
+			TablePattern:  "tbl",
+			TargetSchema:  "schema2",
+			TargetTable:   "tbl",
+		},
+	})
+	cfg := &config.Config{
+		LogLevel:         "debug",
+		CheckThreadCount: 4,
+		ExportFixSQL:     true,
+		CheckStructOnly:  false,
+		DataSources: map[string]*config.DataSource{
+			"mysql1": {
+				Host: host,
+				Port: port,
+				User: "root",
+			},
+			"tidb": {
+				Host: host,
+				Port: port,
+				User: "root",
+			},
+		},
+		Routes: nil,
+		Task: config.TaskConfig{
+			Source:      []string{"mysql1"},
+			Routes:      nil,
+			Target:      "tidb",
+			CheckTables: []string{"schema2.tbl"},
+			OutputDir:   "./output",
+			SourceInstances: []*config.DataSource{
+				{
+					Host:           host,
+					Port:           port,
+					User:           "root",
+					Router:         r,
+					RouteTargetSet: make(map[string]struct{}),
+				},
+			},
+			TargetInstance: &config.DataSource{
+				Host: host,
+				Port: port,
+				User: "root",
+			},
+			TargetCheckTables: nil,
+			FixDir:            "output/fix-on-tidb0",
+			CheckpointDir:     "output/checkpoint",
+			HashFile:          "",
+		},
+		ConfigFile:   "config.toml",
+		PrintVersion: false,
+	}
+	cfg.Task.TargetCheckTables, err = filter.Parse([]string{"schema2.tbl", "schema_test.tbl"})
+	require.NoError(t, err)
+	cfg.Task.SourceInstances[0].RouteTargetSet[dbutil.TableName("schema2", "tbl")] = struct{}{}
+
+	// create table
+	conn, err := sql.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/?charset=utf8mb4", host, port))
+	require.NoError(t, err)
+
+	conn.Exec("CREATE DATABASE IF NOT EXISTS schema1")
+	conn.Exec("CREATE TABLE IF NOT EXISTS `schema1`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))")
+	conn.Exec("CREATE DATABASE IF NOT EXISTS schema2")
+	conn.Exec("CREATE TABLE IF NOT EXISTS `schema2`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))")
+	conn.Exec("CREATE DATABASE IF NOT EXISTS schema_test")
+	conn.Exec("CREATE TABLE IF NOT EXISTS `schema_test`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))")
+
+	_, _, err = NewSources(ctx, cfg)
+	require.NoError(t, err)
+
+	require.Equal(t, 1, len(cfg.Task.SourceInstances))
+	targetSchema, targetTable, err := cfg.Task.SourceInstances[0].Router.Route("schema1", "tbl")
+	require.NoError(t, err)
+	require.Equal(t, "schema2", targetSchema)
+	require.Equal(t, "tbl", targetTable)
+	targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema2", "tbl")
+	require.NoError(t, err)
+	require.Equal(t, ShieldDBName, targetSchema)
+	require.Equal(t, ShieldTableName, targetTable)
+	targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema_test", "tbl")
+	require.NoError(t, err)
+	require.Equal(t, "schema_test", targetSchema)
+	require.Equal(t, "tbl", targetTable)
+	_, tableRules := cfg.Task.SourceInstances[0].Router.AllRules()
+	require.Equal(t, 1, len(tableRules["schema1"]))
+	require.Equal(t, 1, len(tableRules["schema2"]))
+	require.Equal(t, 1, len(tableRules["schema_test"]))
+}
+
+func TestInitTables(t *testing.T) {
+	ctx := context.Background()
+	cfg := config.NewConfig()
+	// Test case 1: test2.t2 will parse after filter.
+	require.NoError(t, cfg.Parse([]string{"--config", "../config/config.toml"}))
+	require.NoError(t, cfg.Init())
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	cfg.Task.TargetInstance.Conn = conn
+
+	rows := sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("test2")
+	mock.ExpectQuery("SHOW DATABASES").WillReturnRows(rows)
+	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t1", "t1").AddRow("t2", "t2")
+	mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(rows)
+	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t2", "CREATE TABLE `t2` (\n\t\t\t`id` int(11) DEFAULT NULL,\n\t\t  \t`name` varchar(24) DEFAULT NULL\n\t\t\t) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin")
+	mock.ExpectQuery("SHOW CREATE TABLE *").WillReturnRows(rows)
+	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("", "")
+	mock.ExpectQuery("SHOW VARIABLES LIKE*").WillReturnRows(rows)
+
+	tablesToBeCheck, err := initTables(ctx, cfg)
+	require.NoError(t, err)
+
+	require.Len(t, tablesToBeCheck, 1)
+	require.Equal(t, tablesToBeCheck[0].Schema, "test2")
+	require.Equal(t, tablesToBeCheck[0].Table, "t2")
+	// Range can be replaced during initTables
+	require.Equal(t, tablesToBeCheck[0].Range, "age > 10 AND age < 20")
+
+	require.NoError(t, mock.ExpectationsWereMet())
+
+	// Test case 2: init failed due to conflict table config point to one table.
+	cfg = config.NewConfig()
+	require.NoError(t, cfg.Parse([]string{"--config", "../config/config_conflict.toml"}))
+	require.NoError(t, cfg.Init())
+	cfg.Task.TargetInstance.Conn = conn
+
+	rows = sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("test2")
+	mock.ExpectQuery("SHOW DATABASES").WillReturnRows(rows)
+	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t1", "t1").AddRow("t2", "t2")
+	mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(rows)
+	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t2", "CREATE TABLE `t2` (\n\t\t\t`id` int(11) DEFAULT NULL,\n\t\t  \t`name` varchar(24) DEFAULT NULL\n\t\t\t) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin")
+	mock.ExpectQuery("SHOW CREATE TABLE *").WillReturnRows(rows)
+	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("", "")
+	mock.ExpectQuery("SHOW VARIABLES LIKE*").WillReturnRows(rows)
+
+	tablesToBeCheck, err = initTables(ctx, cfg)
+	require.Contains(t, err.Error(), "different config matched to same target table")
+	require.NoError(t, mock.ExpectationsWereMet())
+}
+
+func TestCheckTableMatched(t *testing.T) {
+	var tableDiffs []*common.TableDiff
+	tableDiffs = append(tableDiffs, &common.TableDiff{
+		Schema: "test",
+		Table:  "t1",
+	})
+	tableDiffs = append(tableDiffs, &common.TableDiff{
+		Schema: "test",
+		Table:  "t2",
+	})
+
+	tmap := make(map[string]struct{})
+	smap := make(map[string]struct{})
+
+	smap["`test`.`t1`"] = struct{}{}
+	smap["`test`.`t2`"] = struct{}{}
+
+	tmap["`test`.`t1`"] = struct{}{}
+	tmap["`test`.`t2`"] = struct{}{}
+
+	tables, err := checkTableMatched(tableDiffs, tmap, smap, false)
+	require.NoError(t, err)
+
+	smap["`test`.`t3`"] = struct{}{}
+	tables, err = checkTableMatched(tableDiffs, tmap, smap, false)
+	require.Contains(t, err.Error(), "the target has no table to be compared. source-table is ``test`.`t3``")
+
+	delete(smap, "`test`.`t2`")
+	tables, err = checkTableMatched(tableDiffs, tmap, smap, false)
+	require.Contains(t, err.Error(), "the source has no table to be compared. target-table is ``test`.`t2``")
+
+	tables, err = checkTableMatched(tableDiffs, tmap, smap, true)
+	require.NoError(t, err)
+	require.Equal(t, 0, tables[0].TableLack)
+	require.Equal(t, 1, tables[1].TableLack)
+	require.Equal(t, -1, tables[2].TableLack)
+}
diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go
new file mode 100644
index 00000000000..0af78384cc1
--- /dev/null
+++ b/sync_diff_inspector/source/tidb.go
@@ -0,0 +1,285 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package source
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"time"
+
+	"github.com/coreos/go-semver/semver"
+	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/filter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/config"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+type TiDBTableAnalyzer struct {
+	dbConn            *sql.DB
+	bucketSpliterPool *utils.WorkerPool
+	sourceTableMap    map[string]*common.TableSource
+}
+
+func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) {
+	matchedSource := getMatchSource(a.sourceTableMap, table)
+	// Shallow Copy
+	originTable := *table
+	originTable.Schema = matchedSource.OriginSchema
+	originTable.Table = matchedSource.OriginTable
+	progressID := dbutil.TableName(table.Schema, table.Table)
+	// if we decide to use bucket to split chunks
+	// we always use bucksIter even we load from checkpoint is not bucketNode
+	// TODO check whether we can use bucket for this table to split chunks.
+	// NOTICE: If checkpoint use random splitter, it will also fail the next time build bucket splitter.
+	bucketIter, err := splitter.NewBucketIteratorWithCheckpoint(ctx, progressID, &originTable, a.dbConn, startRange, a.bucketSpliterPool)
+	if err == nil {
+		return bucketIter, nil
+	}
+	log.Info("failed to build bucket iterator, fall back to use random iterator", zap.Error(err))
+	// fall back to random splitter
+
+	// use random splitter if we cannot use bucket splitter, then we can simply choose target table to generate chunks.
+	randIter, err := splitter.NewRandomIteratorWithCheckpoint(ctx, progressID, &originTable, a.dbConn, startRange)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	return randIter, nil
+
+}
+
+type TiDBRowsIterator struct {
+	rows *sql.Rows
+}
+
+func (s *TiDBRowsIterator) Close() {
+	s.rows.Close()
+}
+
+func (s *TiDBRowsIterator) Next() (map[string]*dbutil.ColumnData, error) {
+	if s.rows.Next() {
+		return dbutil.ScanRow(s.rows)
+	}
+	return nil, nil
+}
+
+type TiDBSource struct {
+	tableDiffs     []*common.TableDiff
+	sourceTableMap map[string]*common.TableSource
+	snapshot       string
+	// bucketSpliterPool is the shared pool to produce chunks using bucket
+	bucketSpliterPool *utils.WorkerPool
+	dbConn            *sql.DB
+
+	version *semver.Version
+}
+
+func (s *TiDBSource) GetTableAnalyzer() TableAnalyzer {
+	return &TiDBTableAnalyzer{
+		s.dbConn,
+		s.bucketSpliterPool,
+		s.sourceTableMap,
+	}
+}
+
+func getMatchSource(sourceTableMap map[string]*common.TableSource, table *common.TableDiff) *common.TableSource {
+	if len(sourceTableMap) == 0 {
+		// no sourceTableMap, return the origin table name
+		return &common.TableSource{
+			OriginSchema: table.Schema,
+			OriginTable:  table.Table,
+		}
+	}
+	uniqueID := utils.UniqueID(table.Schema, table.Table)
+	return sourceTableMap[uniqueID]
+}
+
+func (s *TiDBSource) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) {
+	return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount)
+}
+
+func (s *TiDBSource) Close() {
+	s.dbConn.Close()
+}
+func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
+	beginTime := time.Now()
+	table := s.tableDiffs[tableRange.GetTableIndex()]
+	chunk := tableRange.GetChunk()
+
+	matchSource := getMatchSource(s.sourceTableMap, table)
+	count, checksum, err := utils.GetCountAndMd5Checksum(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, chunk.Where, chunk.Args)
+
+	cost := time.Since(beginTime)
+	return &ChecksumInfo{
+		Checksum: checksum,
+		Count:    count,
+		Err:      err,
+		Cost:     cost,
+	}
+}
+
+func (s *TiDBSource) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 {
+	table := s.tableDiffs[tableRange.GetTableIndex()]
+	matchSource := getMatchSource(s.sourceTableMap, table)
+	if matchSource != nil {
+		count, _ := dbutil.GetRowCount(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, "", nil)
+		return count
+	}
+	return 0
+}
+
+func (s *TiDBSource) GetTables() []*common.TableDiff {
+	return s.tableDiffs
+}
+
+func (s *TiDBSource) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) {
+	var err error
+	tableInfos := make([]*model.TableInfo, 1)
+	tableDiff := s.GetTables()[tableIndex]
+	source := getMatchSource(s.sourceTableMap, tableDiff)
+	tableInfos[0], err = utils.GetTableInfoWithVersion(ctx, s.GetDB(), source.OriginSchema, source.OriginTable, s.version)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	tableInfos[0], _ = utils.ResetColumns(tableInfos[0], tableDiff.IgnoreColumns)
+	return tableInfos, nil
+}
+
+func (s *TiDBSource) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string {
+	if t == Insert {
+		return utils.GenerateReplaceDML(upstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
+	}
+	if t == Delete {
+		return utils.GenerateDeleteDML(downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
+	}
+	if t == Replace {
+		return utils.GenerateReplaceDMLWithAnnotation(upstreamData, downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
+	}
+	log.Fatal("Don't support this type", zap.Any("dml type", t))
+	return ""
+}
+
+func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) {
+	chunk := tableRange.GetChunk()
+
+	table := s.tableDiffs[tableRange.GetTableIndex()]
+	matchedSource := getMatchSource(s.sourceTableMap, table)
+	rowsQuery, _ := utils.GetTableRowsQueryFormat(matchedSource.OriginSchema, matchedSource.OriginTable, table.Info, table.Collation)
+	query := fmt.Sprintf(rowsQuery, chunk.Where)
+
+	log.Debug("select data", zap.String("sql", query), zap.Reflect("args", chunk.Args))
+	rows, err := s.dbConn.QueryContext(ctx, query, chunk.Args...)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	return &TiDBRowsIterator{
+		rows,
+	}, nil
+}
+
+func (s *TiDBSource) GetDB() *sql.DB {
+	return s.dbConn
+}
+
+func (s *TiDBSource) GetSnapshot() string {
+	return s.snapshot
+}
+
+func NewTiDBSource(
+	ctx context.Context,
+	tableDiffs []*common.TableDiff, ds *config.DataSource,
+	bucketSpliterPool *utils.WorkerPool,
+	f tableFilter.Filter, skipNonExistingTable bool,
+) (Source, error) {
+	sourceTableMap := make(map[string]*common.TableSource)
+	log.Info("find router for tidb source")
+	// we should get the real table name
+	// and real table row query from source.
+	targetUniqueTableMap := make(map[string]struct{})
+	for _, tableDiff := range tableDiffs {
+		targetUniqueTableMap[utils.UniqueID(tableDiff.Schema, tableDiff.Table)] = struct{}{}
+	}
+	sourceTablesAfterRoute := make(map[string]struct{})
+
+	// instance -> db -> table
+	allTablesMap := make(map[string]map[string]interface{})
+	sourceSchemas, err := dbutil.GetSchemas(ctx, ds.Conn)
+
+	if err != nil {
+		return nil, errors.Annotatef(err, "get schemas from database")
+	}
+
+	for _, schema := range sourceSchemas {
+		if filter.IsSystemSchema(schema) {
+			// ignore system schema
+			continue
+		}
+		allTables, err := dbutil.GetTables(ctx, ds.Conn, schema)
+		if err != nil {
+			return nil, errors.Annotatef(err, "get tables from %s", schema)
+		}
+		allTablesMap[schema] = utils.SliceToMap(allTables)
+	}
+
+	for schema, allTables := range allTablesMap {
+		for table := range allTables {
+			targetSchema, targetTable := schema, table
+			if ds.Router != nil {
+				targetSchema, targetTable, err = ds.Router.Route(schema, table)
+				if err != nil {
+					return nil, errors.Errorf("get route result for %s.%s failed, error %v", schema, table, err)
+				}
+			}
+
+			uniqueId := utils.UniqueID(targetSchema, targetTable)
+			isMatched := f.MatchTable(targetSchema, targetTable)
+			if isMatched {
+				// if match the filter, we should respect it and check target has this table later.
+				sourceTablesAfterRoute[uniqueId] = struct{}{}
+			}
+			if _, ok := targetUniqueTableMap[uniqueId]; ok || (isMatched && skipNonExistingTable) {
+				if _, ok := sourceTableMap[uniqueId]; ok {
+					log.Error("TiDB source don't support compare multiple source tables with one downstream table," +
+						" if this happening when diff on same instance is fine. otherwise we are not guarantee this diff result is right")
+				}
+				sourceTableMap[uniqueId] = &common.TableSource{
+					OriginSchema: schema,
+					OriginTable:  table,
+				}
+			}
+		}
+	}
+
+	tableDiffs, err = checkTableMatched(tableDiffs, targetUniqueTableMap, sourceTablesAfterRoute, skipNonExistingTable)
+	if err != nil {
+		return nil, errors.Annotatef(err, "please make sure the filter is correct.")
+	}
+	ts := &TiDBSource{
+		tableDiffs:        tableDiffs,
+		sourceTableMap:    sourceTableMap,
+		snapshot:          ds.Snapshot,
+		dbConn:            ds.Conn,
+		bucketSpliterPool: bucketSpliterPool,
+		version:           utils.TryToGetVersion(ctx, ds.Conn),
+	}
+	return ts, nil
+}
diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go
new file mode 100644
index 00000000000..b64b3ae68bd
--- /dev/null
+++ b/sync_diff_inspector/splitter/bucket.go
@@ -0,0 +1,365 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"context"
+	"database/sql"
+	"sync"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/failpoint"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+const DefaultChannelBuffer = 1024
+
+type BucketIterator struct {
+	buckets      []dbutil.Bucket
+	table        *common.TableDiff
+	indexColumns []*model.ColumnInfo
+
+	chunkPool *utils.WorkerPool
+	wg        sync.WaitGroup // control for one bucket in shared chunkPool
+
+	chunkSize int64
+	chunks    []*chunk.Range
+	nextChunk uint
+
+	chunksCh   chan []*chunk.Range
+	errCh      chan error
+	cancel     context.CancelFunc
+	indexID    int64
+	progressID string
+
+	dbConn *sql.DB
+}
+
+func NewBucketIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*BucketIterator, error) {
+	return NewBucketIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil, utils.NewWorkerPool(1, "bucketIter"))
+}
+
+func NewBucketIteratorWithCheckpoint(
+	ctx context.Context,
+	progressID string,
+	table *common.TableDiff,
+	dbConn *sql.DB,
+	startRange *RangeInfo,
+	bucketSpliterPool *utils.WorkerPool,
+) (*BucketIterator, error) {
+	if !utils.IsRangeTrivial(table.Range) {
+		return nil, errors.Errorf(
+			"BucketIterator does not support user configured Range. Range: %s",
+			table.Range)
+	}
+
+	bctx, cancel := context.WithCancel(ctx)
+	bs := &BucketIterator{
+		table:     table,
+		chunkPool: bucketSpliterPool,
+		chunkSize: table.ChunkSize,
+		chunksCh:  make(chan []*chunk.Range, DefaultChannelBuffer),
+		errCh:     make(chan error, 1),
+		cancel:    cancel,
+		dbConn:    dbConn,
+
+		progressID: progressID,
+	}
+
+	if err := bs.init(ctx, startRange); err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	// Let the progress bar begins to record the table.
+	progress.StartTable(bs.progressID, 0, false)
+	go bs.produceChunks(bctx, startRange)
+
+	return bs, nil
+}
+
+func (s *BucketIterator) GetIndexID() int64 {
+	return s.indexID
+}
+
+func (s *BucketIterator) Next() (*chunk.Range, error) {
+	var ok bool
+	if uint(len(s.chunks)) <= s.nextChunk {
+		select {
+		case err := <-s.errCh:
+			return nil, errors.Trace(err)
+		case s.chunks, ok = <-s.chunksCh:
+			if !ok && s.chunks == nil {
+				log.Info("close chunks channel for table",
+					zap.String("schema", s.table.Schema), zap.String("table", s.table.Table))
+				return nil, nil
+			}
+		}
+		s.nextChunk = 0
+		failpoint.Inject("ignore-last-n-chunk-in-bucket", func(v failpoint.Value) {
+			log.Info("failpoint ignore-last-n-chunk-in-bucket injected (bucket splitter)", zap.Int("n", v.(int)))
+			if len(s.chunks) <= 1+v.(int) {
+				failpoint.Return(nil, nil)
+			}
+			s.chunks = s.chunks[:(len(s.chunks) - v.(int))]
+		})
+	}
+
+	c := s.chunks[s.nextChunk]
+	s.nextChunk = s.nextChunk + 1
+	failpoint.Inject("print-chunk-info", func() {
+		lowerBounds := make([]string, len(c.Bounds))
+		upperBounds := make([]string, len(c.Bounds))
+		for i, bound := range c.Bounds {
+			lowerBounds[i] = bound.Lower
+			upperBounds[i] = bound.Upper
+		}
+		log.Info("failpoint print-chunk-info injected (bucket splitter)", zap.Strings("lowerBounds", lowerBounds), zap.Strings("upperBounds", upperBounds), zap.String("indexCode", c.Index.ToString()))
+	})
+	return c, nil
+}
+
+func (s *BucketIterator) init(ctx context.Context, startRange *RangeInfo) error {
+	fields, err := indexFieldsFromConfigString(s.table.Fields, s.table.Info)
+	if err != nil {
+		return err
+	}
+
+	s.nextChunk = 0
+	buckets, err := dbutil.GetBucketsInfo(ctx, s.dbConn, s.table.Schema, s.table.Table, s.table.Info)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	var indices []*model.IndexInfo
+	if fields.IsEmpty() {
+		indices, err = utils.GetBetterIndex(context.Background(), s.dbConn, s.table.Schema, s.table.Table, s.table.Info)
+		if err != nil {
+			return errors.Trace(err)
+		}
+	} else {
+		// There are user configured "index-fields", so we will try to match from all indices.
+		indices = dbutil.FindAllIndex(s.table.Info)
+	}
+
+NEXTINDEX:
+	for _, index := range indices {
+		if index == nil {
+			continue
+		}
+		if startRange != nil && startRange.IndexID != index.ID {
+			continue
+		}
+
+		indexColumns := utils.GetColumnsFromIndex(index, s.table.Info)
+
+		if len(indexColumns) < len(index.Columns) {
+			// some column in index is ignored.
+			continue
+		}
+
+		if !fields.MatchesIndex(index) {
+			// We are enforcing user configured "index-fields" settings.
+			continue
+		}
+
+		// skip the index that has expression column
+		for _, col := range indexColumns {
+			if col.Hidden {
+				continue NEXTINDEX
+			}
+		}
+
+		bucket, ok := buckets[index.Name.O]
+		if !ok {
+			// We found an index matching the "index-fields", but no bucket is found
+			// for that index. Returning an error here will make the caller retry with
+			// the random splitter.
+			return errors.NotFoundf("index %s in buckets info", index.Name.O)
+		}
+		log.Debug("buckets for index", zap.String("index", index.Name.O), zap.Reflect("buckets", buckets))
+
+		s.buckets = bucket
+		s.indexColumns = indexColumns
+		s.indexID = index.ID
+		break
+	}
+
+	if s.buckets == nil || s.indexColumns == nil {
+		return errors.NotFoundf("no index to split buckets")
+	}
+
+	// Notice: `cnt` is only an estimated value
+	cnt := s.buckets[len(s.buckets)-1].Count
+	// We can use config file to fix chunkSize,
+	// otherwise chunkSize is 0.
+	if s.chunkSize <= 0 {
+		s.chunkSize = utils.CalculateChunkSize(cnt)
+	}
+
+	log.Info("get chunk size for table", zap.Int64("chunk size", s.chunkSize),
+		zap.String("db", s.table.Schema), zap.String("table", s.table.Table))
+	return nil
+}
+
+func (s *BucketIterator) Close() {
+	s.cancel()
+}
+
+func (s *BucketIterator) splitChunkForBucket(ctx context.Context, firstBucketID, lastBucketID int, beginIndex int, bucketChunkCnt int, splitChunkCnt int, chunkRange *chunk.Range) {
+	s.wg.Add(1)
+	s.chunkPool.Apply(func() {
+		defer s.wg.Done()
+		chunks, err := splitRangeByRandom(ctx, s.dbConn, chunkRange, splitChunkCnt, s.table.Schema, s.table.Table, s.indexColumns, s.table.Range, s.table.Collation)
+		if err != nil {
+			select {
+			case <-ctx.Done():
+			case s.errCh <- errors.Trace(err):
+			}
+			return
+		}
+		chunk.InitChunks(chunks, chunk.Bucket, firstBucketID, lastBucketID, beginIndex, s.table.Collation, s.table.Range, bucketChunkCnt)
+		progress.UpdateTotal(s.progressID, len(chunks), false)
+		s.chunksCh <- chunks
+	})
+}
+
+func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInfo) {
+	defer func() {
+		s.wg.Wait()
+		progress.UpdateTotal(s.progressID, 0, true)
+		close(s.chunksCh)
+	}()
+	var (
+		lowerValues, upperValues []string
+		latestCount              int64
+		err                      error
+	)
+	firstBucket := 0
+	if startRange != nil {
+		c := startRange.GetChunk()
+		if c.IsLastChunkForTable() {
+			// the last checkpoint range is the last chunk so return
+			return
+		}
+		// init values for the next bucket
+		firstBucket = c.Index.BucketIndexRight + 1
+		// Note: Since this chunk is not the last one,
+		//       its bucketID is less than len(s.buckets)
+		if c.Index.BucketIndexRight >= len(s.buckets) {
+			select {
+			case <-ctx.Done():
+			case s.errCh <- errors.New("Wrong Bucket: Bucket index of the checkpoint node is larger than buckets' size"):
+			}
+			return
+		}
+		latestCount = s.buckets[c.Index.BucketIndexRight].Count
+		nextUpperValues, err := dbutil.AnalyzeValuesFromBuckets(s.buckets[c.Index.BucketIndexRight].UpperBound, s.indexColumns)
+		if err != nil {
+			select {
+			case <-ctx.Done():
+			case s.errCh <- errors.Trace(err):
+			}
+			return
+		}
+		lowerValues = nextUpperValues
+
+		// build left chunks for this bucket
+		leftCnt := c.Index.ChunkCnt - c.Index.ChunkIndex - 1
+		if leftCnt > 0 {
+			chunkRange := chunk.NewChunkRange()
+
+			for i, column := range s.indexColumns {
+				chunkRange.Update(column.Name.O, "", nextUpperValues[i], false, true)
+			}
+
+			for _, bound := range c.Bounds {
+				chunkRange.Update(bound.Column, bound.Upper, "", true, false)
+			}
+
+			s.splitChunkForBucket(ctx, c.Index.BucketIndexLeft, c.Index.BucketIndexRight, c.Index.ChunkIndex+1, c.Index.ChunkCnt, leftCnt, chunkRange)
+		}
+	}
+	halfChunkSize := s.chunkSize >> 1
+	// `firstBucket` is the first bucket of one chunk.
+	// It is equivalent to `BucketLeftIndex` of the chunk's ID.
+	for i := firstBucket; i < len(s.buckets); i++ {
+		count := s.buckets[i].Count - latestCount
+		if count < s.chunkSize {
+			// merge more buckets into one chunk
+			continue
+		}
+
+		upperValues, err = dbutil.AnalyzeValuesFromBuckets(s.buckets[i].UpperBound, s.indexColumns)
+		if err != nil {
+			select {
+			case <-ctx.Done():
+			case s.errCh <- errors.Trace(err):
+			}
+			return
+		}
+
+		chunkRange := chunk.NewChunkRange()
+		for j, column := range s.indexColumns {
+			var lowerValue, upperValue string
+			if len(lowerValues) > 0 {
+				lowerValue = lowerValues[j]
+			}
+			if len(upperValues) > 0 {
+				upperValue = upperValues[j]
+			}
+			chunkRange.Update(column.Name.O, lowerValue, upperValue, len(lowerValues) > 0, len(upperValues) > 0)
+		}
+
+		// `splitRangeByRandom` will skip when chunkCnt <= 1
+		// assume the number of the selected buckets is `x`
+		// if x >= 2                              ->  chunkCnt = 1
+		// if x = 1                               ->  chunkCnt = (count + halfChunkSize) / chunkSize
+		// count >= chunkSize
+		if i == firstBucket {
+			//
+			chunkCnt := int((count + halfChunkSize) / s.chunkSize)
+			s.splitChunkForBucket(ctx, firstBucket, i, 0, chunkCnt, chunkCnt, chunkRange)
+		} else {
+			// use multi-buckets so chunkCnt = 1
+			s.splitChunkForBucket(ctx, firstBucket, i, 0, 1, 1, chunkRange)
+		}
+
+		latestCount = s.buckets[i].Count
+		lowerValues = upperValues
+		firstBucket = i + 1
+
+		failpoint.Inject("check-one-bucket", func() {
+			log.Info("failpoint check-one-bucket injected, stop producing new chunks.")
+			failpoint.Return()
+		})
+	}
+
+	// merge the rest keys into one chunk
+	chunkRange := chunk.NewChunkRange()
+	if len(lowerValues) > 0 {
+		for j, column := range s.indexColumns {
+			chunkRange.Update(column.Name.O, lowerValues[j], "", true, false)
+		}
+	}
+	// When the table is much less than chunkSize,
+	// it will return a chunk include the whole table.
+	s.splitChunkForBucket(ctx, firstBucket, len(s.buckets), 0, 1, 1, chunkRange)
+}
diff --git a/sync_diff_inspector/splitter/index_fields.go b/sync_diff_inspector/splitter/index_fields.go
new file mode 100644
index 00000000000..1508d59075d
--- /dev/null
+++ b/sync_diff_inspector/splitter/index_fields.go
@@ -0,0 +1,111 @@
+// Copyright 2022 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"sort"
+	"strings"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+// indexFields wraps the column info for the user config "index-fields".
+type indexFields struct {
+	cols      []*model.ColumnInfo
+	tableInfo *model.TableInfo
+	empty     bool
+}
+
+func indexFieldsFromConfigString(strFields string, tableInfo *model.TableInfo) (*indexFields, error) {
+	if len(strFields) == 0 {
+		// Empty option
+		return &indexFields{empty: true}, nil
+	}
+
+	if tableInfo == nil {
+		log.Panic("parsing index fields with empty tableInfo",
+			zap.String("index-fields", strFields))
+	}
+
+	splitFieldArr := strings.Split(strFields, ",")
+	for i := range splitFieldArr {
+		splitFieldArr[i] = strings.TrimSpace(splitFieldArr[i])
+	}
+
+	fields, err := GetSplitFields(tableInfo, splitFieldArr)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	// Sort the columns to help with comparison.
+	sortColsInPlace(fields)
+
+	return &indexFields{
+		cols:      fields,
+		tableInfo: tableInfo,
+	}, nil
+}
+
+func (f *indexFields) MatchesIndex(index *model.IndexInfo) bool {
+	if f.empty {
+		// Default config matches all.
+		return true
+	}
+
+	// Sanity checks.
+	if index == nil {
+		log.Panic("matching with empty index")
+	}
+	if len(f.cols) == 0 {
+		log.Panic("unexpected cols with length 0")
+	}
+
+	if len(index.Columns) != len(f.cols) {
+		// We need an exact match.
+		// Lengths not matching eliminates the possibility.
+		return false
+	}
+
+	indexCols := utils.GetColumnsFromIndex(index, f.tableInfo)
+	// Sort for comparison
+	sortColsInPlace(indexCols)
+
+	for i := 0; i < len(indexCols); i++ {
+		if f.cols[i].ID != indexCols[i].ID {
+			return false
+		}
+	}
+
+	return true
+}
+
+func (f *indexFields) Cols() []*model.ColumnInfo {
+	return f.cols
+}
+
+// IsEmpty returns true if the struct represents an empty
+// user-configured "index-fields" option.
+func (f *indexFields) IsEmpty() bool {
+	return f.empty
+}
+
+func sortColsInPlace(cols []*model.ColumnInfo) {
+	sort.SliceStable(cols, func(i, j int) bool {
+		return cols[i].ID < cols[j].ID
+	})
+}
diff --git a/sync_diff_inspector/splitter/index_fields_test.go b/sync_diff_inspector/splitter/index_fields_test.go
new file mode 100644
index 00000000000..6b6cc5768e7
--- /dev/null
+++ b/sync_diff_inspector/splitter/index_fields_test.go
@@ -0,0 +1,106 @@
+// Copyright 2022 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"testing"
+
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/stretchr/testify/require"
+)
+
+func TestIndexFieldsSimple(t *testing.T) {
+	t.Parallel()
+
+	createTableSQL1 := "CREATE TABLE `sbtest1` " +
+		"(`id` int(11) NOT NULL AUTO_INCREMENT, " +
+		" `k` int(11) NOT NULL DEFAULT '0', " +
+		"`c` char(120) NOT NULL DEFAULT '', " +
+		"PRIMARY KEY (`id`), KEY `k_1` (`k`))"
+
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+
+	fields, err := indexFieldsFromConfigString("k", tableInfo)
+	require.NoError(t, err)
+	require.False(t, fields.IsEmpty())
+	require.Len(t, fields.Cols(), 1)
+
+	for _, index := range tableInfo.Indices {
+		switch index.Name.String() {
+		case "PRIMARY":
+			require.False(t, fields.MatchesIndex(index))
+		case "k_1":
+			require.True(t, fields.MatchesIndex(index))
+		default:
+			require.FailNow(t, "unreachable")
+		}
+	}
+}
+
+func TestIndexFieldsComposite(t *testing.T) {
+	t.Parallel()
+
+	createTableSQL1 := "CREATE TABLE `sbtest1` " +
+		"(`id` int(11) NOT NULL AUTO_INCREMENT, " +
+		" `k` int(11) NOT NULL DEFAULT '0', " +
+		"`c` char(120) NOT NULL DEFAULT '', " +
+		"PRIMARY KEY (`id`, `k`)," +
+		"KEY `k_1` (`k`)," +
+		"UNIQUE INDEX `c_1` (`c`))"
+
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+
+	fields, err := indexFieldsFromConfigString("id, k", tableInfo)
+	require.NoError(t, err)
+	require.False(t, fields.IsEmpty())
+	require.Len(t, fields.Cols(), 2)
+
+	for _, index := range tableInfo.Indices {
+		switch index.Name.String() {
+		case "PRIMARY":
+			require.True(t, fields.MatchesIndex(index))
+		case "k_1":
+			require.False(t, fields.MatchesIndex(index))
+		case "c_1":
+			require.False(t, fields.MatchesIndex(index))
+		default:
+			require.FailNow(t, "unreachable")
+		}
+	}
+}
+
+func TestIndexFieldsEmpty(t *testing.T) {
+	t.Parallel()
+
+	createTableSQL1 := "CREATE TABLE `sbtest1` " +
+		"(`id` int(11) NOT NULL AUTO_INCREMENT, " +
+		" `k` int(11) NOT NULL DEFAULT '0', " +
+		"`c` char(120) NOT NULL DEFAULT '', " +
+		"PRIMARY KEY (`id`), KEY `k_1` (`k`))"
+
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+
+	fields, err := indexFieldsFromConfigString("", tableInfo)
+	require.NoError(t, err)
+	require.True(t, fields.IsEmpty())
+
+	for _, index := range tableInfo.Indices {
+		// Expected to match all.
+		require.True(t, fields.MatchesIndex(index))
+	}
+}
diff --git a/sync_diff_inspector/splitter/limit.go b/sync_diff_inspector/splitter/limit.go
new file mode 100644
index 00000000000..0075506a673
--- /dev/null
+++ b/sync_diff_inspector/splitter/limit.go
@@ -0,0 +1,259 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"strings"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+type LimitIterator struct {
+	table     *common.TableDiff
+	tagChunk  *chunk.Range
+	queryTmpl string
+
+	indexID int64
+
+	chunksCh chan *chunk.Range
+	errCh    chan error
+	cancel   context.CancelFunc
+	dbConn   *sql.DB
+
+	progressID   string
+	columnOffset map[string]int
+}
+
+func NewLimitIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*LimitIterator, error) {
+	return NewLimitIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil)
+}
+
+func NewLimitIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*LimitIterator, error) {
+	indices, err := utils.GetBetterIndex(ctx, dbConn, table.Schema, table.Table, table.Info)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	var indexColumns []*model.ColumnInfo
+	var tagChunk *chunk.Range
+	columnOffset := make(map[string]int)
+	chunksCh := make(chan *chunk.Range, DefaultChannelBuffer)
+	errCh := make(chan error)
+	undone := startRange == nil
+	beginBucketID := 0
+	var indexID int64
+	for _, index := range indices {
+		if index == nil {
+			continue
+		}
+		if startRange != nil && startRange.IndexID != index.ID {
+			continue
+		}
+		log.Debug("Limit select index", zap.String("index", index.Name.O))
+
+		indexColumns = utils.GetColumnsFromIndex(index, table.Info)
+
+		if len(indexColumns) < len(index.Columns) {
+			// some column in index is ignored.
+			log.Debug("indexColumns empty, try next index")
+			indexColumns = nil
+			continue
+		}
+
+		indexID = index.ID
+		for i, indexColumn := range indexColumns {
+			columnOffset[indexColumn.Name.O] = i
+		}
+
+		if startRange != nil {
+			tagChunk = chunk.NewChunkRange()
+			bounds := startRange.ChunkRange.Bounds
+			if len(bounds) != len(indexColumns) {
+				log.Warn("checkpoint node columns are not equal to selected index columns, skip checkpoint.")
+				break
+			}
+
+			for _, bound := range bounds {
+				undone = undone || bound.HasUpper
+				tagChunk.Update(bound.Column, bound.Upper, "", bound.HasUpper, false)
+			}
+
+			beginBucketID = startRange.ChunkRange.Index.BucketIndexRight + 1
+
+		} else {
+			tagChunk = chunk.NewChunkRangeOffset(columnOffset)
+		}
+
+		break
+	}
+
+	if indexColumns == nil {
+		return nil, errors.NotFoundf("not found index")
+	}
+
+	chunkSize := table.ChunkSize
+	if chunkSize <= 0 {
+		cnt, err := dbutil.GetRowCount(ctx, dbConn, table.Schema, table.Table, "", nil)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		if len(table.Info.Indices) != 0 {
+			chunkSize = utils.CalculateChunkSize(cnt)
+		} else {
+			// no index
+			// will use table scan
+			// so we use one chunk
+			chunkSize = cnt
+		}
+	}
+	log.Info("get chunk size for table", zap.Int64("chunk size", chunkSize),
+		zap.String("db", table.Schema), zap.String("table", table.Table))
+
+	lctx, cancel := context.WithCancel(ctx)
+	queryTmpl := generateLimitQueryTemplate(indexColumns, table, chunkSize)
+
+	limitIterator := &LimitIterator{
+		table,
+		tagChunk,
+		queryTmpl,
+
+		indexID,
+
+		chunksCh,
+		errCh,
+
+		cancel,
+		dbConn,
+
+		progressID,
+		columnOffset,
+	}
+
+	progress.StartTable(progressID, 0, false)
+	if !undone {
+		// this table is finished.
+		close(chunksCh)
+	} else {
+		go limitIterator.produceChunks(lctx, beginBucketID)
+	}
+
+	return limitIterator, nil
+}
+
+func (lmt *LimitIterator) Close() {
+	lmt.cancel()
+}
+
+func (lmt *LimitIterator) Next() (*chunk.Range, error) {
+	select {
+	case err := <-lmt.errCh:
+		return nil, errors.Trace(err)
+	case c, ok := <-lmt.chunksCh:
+		if !ok && c == nil {
+			return nil, nil
+		}
+		return c, nil
+	}
+}
+
+func (lmt *LimitIterator) GetIndexID() int64 {
+	return lmt.indexID
+}
+
+func (lmt *LimitIterator) produceChunks(ctx context.Context, bucketID int) {
+	for {
+		where, args := lmt.tagChunk.ToString(lmt.table.Collation)
+		query := fmt.Sprintf(lmt.queryTmpl, where)
+		dataMap, err := lmt.getLimitRow(ctx, query, args)
+		if err != nil {
+			select {
+			case <-ctx.Done():
+			case lmt.errCh <- errors.Trace(err):
+			}
+			return
+		}
+
+		chunkRange := lmt.tagChunk
+		lmt.tagChunk = nil
+		if dataMap == nil {
+			// there is no row in result set
+			chunk.InitChunk(chunkRange, chunk.Limit, bucketID, bucketID, lmt.table.Collation, lmt.table.Range)
+			bucketID++
+			progress.UpdateTotal(lmt.progressID, 1, true)
+			select {
+			case <-ctx.Done():
+			case lmt.chunksCh <- chunkRange:
+			}
+			close(lmt.chunksCh)
+			return
+		}
+
+		newTagChunk := chunk.NewChunkRangeOffset(lmt.columnOffset)
+		for column, data := range dataMap {
+			newTagChunk.Update(column, string(data.Data), "", !data.IsNull, false)
+			chunkRange.Update(column, "", string(data.Data), false, !data.IsNull)
+		}
+
+		chunk.InitChunk(chunkRange, chunk.Limit, bucketID, bucketID, lmt.table.Collation, lmt.table.Range)
+		bucketID++
+		progress.UpdateTotal(lmt.progressID, 1, false)
+		select {
+		case <-ctx.Done():
+			return
+		case lmt.chunksCh <- chunkRange:
+		}
+		lmt.tagChunk = newTagChunk
+	}
+}
+
+func (lmt *LimitIterator) getLimitRow(ctx context.Context, query string, args []interface{}) (map[string]*dbutil.ColumnData, error) {
+	rows, err := lmt.dbConn.QueryContext(ctx, query, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	if !rows.Next() {
+		if err := rows.Err(); err != nil {
+			return nil, err
+		}
+		return nil, nil
+	}
+	dataMap, err := dbutil.ScanRow(rows)
+	if err != nil {
+		return nil, err
+	}
+	return dataMap, nil
+}
+
+func generateLimitQueryTemplate(indexColumns []*model.ColumnInfo, table *common.TableDiff, chunkSize int64) string {
+	fields := make([]string, 0, len(indexColumns))
+	for _, columnInfo := range indexColumns {
+		fields = append(fields, dbutil.ColumnName(columnInfo.Name.O))
+	}
+	columns := strings.Join(fields, ", ")
+
+	// TODO: the limit splitter has not been used yet.
+	// once it is used, need to add `collation` after `ORDER BY`.
+	return fmt.Sprintf("SELECT %s FROM %s WHERE %%s ORDER BY %s LIMIT %d,1", columns, dbutil.TableName(table.Schema, table.Table), columns, chunkSize)
+}
diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go
new file mode 100644
index 00000000000..470c33c6a24
--- /dev/null
+++ b/sync_diff_inspector/splitter/random.go
@@ -0,0 +1,248 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"strings"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/failpoint"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"go.uber.org/zap"
+)
+
+type RandomIterator struct {
+	table     *common.TableDiff
+	chunkSize int64
+	chunks    []*chunk.Range
+	nextChunk uint
+
+	dbConn *sql.DB
+}
+
+func NewRandomIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*RandomIterator, error) {
+	return NewRandomIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil)
+}
+
+func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*RandomIterator, error) {
+	// get the chunk count by data count and chunk size
+	var splitFieldArr []string
+	if len(table.Fields) != 0 {
+		splitFieldArr = strings.Split(table.Fields, ",")
+	}
+
+	for i := range splitFieldArr {
+		splitFieldArr[i] = strings.TrimSpace(splitFieldArr[i])
+	}
+
+	fields, err := GetSplitFields(table.Info, splitFieldArr)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	chunkRange := chunk.NewChunkRange()
+	beginIndex := 0
+	bucketChunkCnt := 0
+	chunkCnt := 0
+	var chunkSize int64 = 0
+	if startRange != nil {
+		c := startRange.GetChunk()
+		if c.IsLastChunkForTable() {
+			return &RandomIterator{
+				table:     table,
+				chunkSize: 0,
+				chunks:    nil,
+				nextChunk: 0,
+				dbConn:    dbConn,
+			}, nil
+		}
+		// The sequences in `chunk.Range.Bounds` should be equivalent.
+		for _, bound := range c.Bounds {
+			chunkRange.Update(bound.Column, bound.Upper, "", true, false)
+		}
+
+		// Recover the chunkIndex. Let it be next to the checkpoint node.
+		beginIndex = c.Index.ChunkIndex + 1
+		bucketChunkCnt = c.Index.ChunkCnt
+		// For chunk splitted by random splitter, the checkpoint chunk records the tableCnt.
+		chunkCnt = bucketChunkCnt - beginIndex
+	} else {
+		cnt, err := dbutil.GetRowCount(ctx, dbConn, table.Schema, table.Table, table.Range, nil)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+
+		chunkSize = table.ChunkSize
+		// We can use config file to fix chunkSize,
+		// otherwise chunkSize is 0.
+		if chunkSize <= 0 {
+			if len(table.Info.Indices) != 0 {
+				chunkSize = utils.CalculateChunkSize(cnt)
+			} else {
+				// no index
+				// will use table scan
+				// so we use one chunk
+				// plus 1 to avoid chunkSize is 0
+				// while chunkCnt = (2cnt)/(cnt+1) <= 1
+				chunkSize = cnt + 1
+			}
+		}
+		log.Info("get chunk size for table", zap.Int64("chunk size", chunkSize),
+			zap.String("db", table.Schema), zap.String("table", table.Table))
+
+		// When cnt is 0, chunkCnt should be also 0.
+		// When cnt is in [1, chunkSize], chunkCnt should be 1.
+		chunkCnt = int((cnt + chunkSize - 1) / chunkSize)
+		log.Info("split range by random", zap.Int64("row count", cnt), zap.Int("split chunk num", chunkCnt))
+		bucketChunkCnt = chunkCnt
+	}
+
+	chunks, err := splitRangeByRandom(ctx, dbConn, chunkRange, chunkCnt, table.Schema, table.Table, fields, table.Range, table.Collation)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	chunk.InitChunks(chunks, chunk.Random, 0, 0, beginIndex, table.Collation, table.Range, bucketChunkCnt)
+
+	failpoint.Inject("ignore-last-n-chunk-in-bucket", func(v failpoint.Value) {
+		log.Info("failpoint ignore-last-n-chunk-in-bucket injected (random splitter)", zap.Int("n", v.(int)))
+		if len(chunks) <= 1+v.(int) {
+			failpoint.Return(nil, nil)
+		}
+		chunks = chunks[:(len(chunks) - v.(int))]
+	})
+
+	progress.StartTable(progressID, len(chunks), true)
+	return &RandomIterator{
+		table:     table,
+		chunkSize: chunkSize,
+		chunks:    chunks,
+		nextChunk: 0,
+		dbConn:    dbConn,
+	}, nil
+
+}
+
+func (s *RandomIterator) Next() (*chunk.Range, error) {
+	if uint(len(s.chunks)) <= s.nextChunk {
+		return nil, nil
+	}
+	c := s.chunks[s.nextChunk]
+	s.nextChunk = s.nextChunk + 1
+	failpoint.Inject("print-chunk-info", func() {
+		lowerBounds := make([]string, len(c.Bounds))
+		upperBounds := make([]string, len(c.Bounds))
+		for i, bound := range c.Bounds {
+			lowerBounds[i] = bound.Lower
+			upperBounds[i] = bound.Upper
+		}
+		log.Info("failpoint print-chunk-info injected (random splitter)", zap.Strings("lowerBounds", lowerBounds), zap.Strings("upperBounds", upperBounds), zap.String("indexCode", c.Index.ToString()))
+	})
+	return c, nil
+}
+
+func (s *RandomIterator) Close() {
+
+}
+
+// GetSplitFields returns fields to split chunks, order by pk, uk, index, columns.
+func GetSplitFields(table *model.TableInfo, splitFields []string) ([]*model.ColumnInfo, error) {
+	colsMap := make(map[string]*model.ColumnInfo)
+
+	splitCols := make([]*model.ColumnInfo, 0, 2)
+	for _, splitField := range splitFields {
+		col := dbutil.FindColumnByName(table.Columns, splitField)
+		if col == nil {
+			return nil, errors.NotFoundf("column %s in table %s", splitField, table.Name)
+
+		}
+		splitCols = append(splitCols, col)
+	}
+
+	if len(splitCols) != 0 {
+		return splitCols, nil
+	}
+
+	for _, col := range table.Columns {
+		colsMap[col.Name.O] = col
+	}
+	indices := dbutil.FindAllIndex(table)
+	if len(indices) != 0 {
+	NEXTINDEX:
+		for _, idx := range indices {
+			cols := make([]*model.ColumnInfo, 0, len(table.Columns))
+			for _, icol := range idx.Columns {
+				col := colsMap[icol.Name.O]
+				if col.Hidden {
+					continue NEXTINDEX
+				}
+				cols = append(cols, col)
+			}
+			return cols, nil
+		}
+	}
+
+	for _, col := range table.Columns {
+		if !col.Hidden {
+			return []*model.ColumnInfo{col}, nil
+		}
+	}
+	return nil, errors.NotFoundf("not found column")
+}
+
+// splitRangeByRandom splits a chunk to multiple chunks by random
+// Notice: If the `count <= 1`, it will skip splitting and return `chunk` as a slice directly.
+func splitRangeByRandom(ctx context.Context, db *sql.DB, chunk *chunk.Range, count int, schema string, table string, columns []*model.ColumnInfo, limits, collation string) (chunks []*chunk.Range, err error) {
+	if count <= 1 {
+		chunks = append(chunks, chunk)
+		return chunks, nil
+	}
+
+	chunkLimits, args := chunk.ToString(collation)
+	limitRange := fmt.Sprintf("(%s) AND (%s)", chunkLimits, limits)
+
+	randomValues, err := utils.GetRandomValues(ctx, db, schema, table, columns, count-1, limitRange, args, collation)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	log.Debug("get split values by random", zap.Stringer("chunk", chunk), zap.Int("random values num", len(randomValues)))
+	for i := 0; i <= len(randomValues); i++ {
+		newChunk := chunk.Copy()
+
+		for j, column := range columns {
+			if i == 0 {
+				if len(randomValues) == 0 {
+					// randomValues is empty, so chunks will append chunk itself.
+					break
+				}
+				newChunk.Update(column.Name.O, "", randomValues[i][j], false, true)
+			} else if i == len(randomValues) {
+				newChunk.Update(column.Name.O, randomValues[i-1][j], "", true, false)
+			} else {
+				newChunk.Update(column.Name.O, randomValues[i-1][j], randomValues[i][j], true, true)
+			}
+		}
+		chunks = append(chunks, newChunk)
+	}
+	log.Debug("split range by random", zap.Stringer("origin chunk", chunk), zap.Int("split num", len(chunks)))
+	return chunks, nil
+}
diff --git a/sync_diff_inspector/splitter/splitter.go b/sync_diff_inspector/splitter/splitter.go
new file mode 100644
index 00000000000..d2a43ed1ce2
--- /dev/null
+++ b/sync_diff_inspector/splitter/splitter.go
@@ -0,0 +1,87 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"fmt"
+
+	"github.com/pingcap/tiflow/sync_diff_inspector/checkpoints"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+)
+
+const (
+	SplitThreshold = 1000
+)
+
+// ChunkIterator generate next chunk for only one table lazily.
+type ChunkIterator interface {
+	// Next seeks the next chunk, return nil if seeks to end.
+	Next() (*chunk.Range, error)
+	Close()
+}
+
+// RangeInfo represents the unit of a process chunk.
+// It's the only entrance of checkpoint.
+type RangeInfo struct {
+	ChunkRange *chunk.Range `json:"chunk-range"`
+	// for bucket checkpoint
+	IndexID int64 `json:"index-id"`
+
+	ProgressID string `json:"progress-id"`
+}
+
+// GetTableIndex return the index of table diffs.
+// IMPORTANT!!!
+// We need to keep the tables order during checkpoint.
+// So we should have to save the config info to checkpoint file too
+func (r *RangeInfo) GetTableIndex() int { return r.ChunkRange.Index.TableIndex }
+
+func (r *RangeInfo) GetBucketIndexLeft() int { return r.ChunkRange.Index.BucketIndexLeft }
+
+func (r *RangeInfo) GetBucketIndexRight() int { return r.ChunkRange.Index.BucketIndexRight }
+
+func (r *RangeInfo) GetChunkIndex() int { return r.ChunkRange.Index.ChunkIndex }
+
+func (r *RangeInfo) GetChunk() *chunk.Range {
+	return r.ChunkRange
+}
+
+func (r *RangeInfo) Copy() *RangeInfo {
+	return &RangeInfo{
+		ChunkRange: r.ChunkRange.Clone(),
+		IndexID:    r.IndexID,
+		ProgressID: r.ProgressID,
+	}
+}
+
+func (r *RangeInfo) Update(column, lower, upper string, updateLower, updateUpper bool, collation, limits string) {
+	r.ChunkRange.Update(column, lower, upper, updateLower, updateUpper)
+	conditions, args := r.ChunkRange.ToString(collation)
+	r.ChunkRange.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits)
+	r.ChunkRange.Args = args
+}
+
+func (r *RangeInfo) ToNode() *checkpoints.Node {
+	return &checkpoints.Node{
+		ChunkRange: r.ChunkRange,
+		IndexID:    r.IndexID,
+	}
+}
+
+func FromNode(n *checkpoints.Node) *RangeInfo {
+	return &RangeInfo{
+		ChunkRange: n.ChunkRange,
+		IndexID:    n.IndexID,
+	}
+}
diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go
new file mode 100644
index 00000000000..3f641d100ee
--- /dev/null
+++ b/sync_diff_inspector/splitter/splitter_test.go
@@ -0,0 +1,936 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package splitter
+
+import (
+	"context"
+	"database/sql/driver"
+	"fmt"
+	"sort"
+	"strconv"
+	"testing"
+
+	sqlmock "github.com/DATA-DOG/go-sqlmock"
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
+	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
+	"github.com/stretchr/testify/require"
+)
+
+type chunkResult struct {
+	chunkStr string
+	args     []interface{}
+}
+
+func TestSplitRangeByRandom(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	testCases := []struct {
+		createTableSQL string
+		splitCount     int
+		originChunk    *chunk.Range
+		randomValues   [][]string
+		expectResult   []chunkResult
+	}{
+		{
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))",
+			3,
+			chunk.NewChunkRange().CopyAndUpdate("a", "0", "10", true, true).CopyAndUpdate("b", "a", "z", true, true),
+			[][]string{
+				{"5", "7"},
+				{"g", "n"},
+			},
+			[]chunkResult{
+				{
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"0", "0", "a", "5", "5", "g"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"5", "5", "g", "7", "7", "n"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"7", "7", "n", "10", "10", "z"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`, `a`))",
+			3,
+			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true).CopyAndUpdate("a", "0", "10", true, true),
+			[][]string{
+				{"g", "n"},
+				{"5", "7"},
+			},
+			[]chunkResult{
+				{
+					"((`b` > ?) OR (`b` = ? AND `a` > ?)) AND ((`b` < ?) OR (`b` = ? AND `a` <= ?))",
+					[]interface{}{"a", "a", "0", "g", "g", "5"},
+				}, {
+					"((`b` > ?) OR (`b` = ? AND `a` > ?)) AND ((`b` < ?) OR (`b` = ? AND `a` <= ?))",
+					[]interface{}{"g", "g", "5", "n", "n", "7"},
+				}, {
+					"((`b` > ?) OR (`b` = ? AND `a` > ?)) AND ((`b` < ?) OR (`b` = ? AND `a` <= ?))",
+					[]interface{}{"n", "n", "7", "z", "z", "10"},
+				},
+			},
+		},
+		{
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))",
+			3,
+			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true),
+			[][]string{
+				{"g", "n"},
+			},
+			[]chunkResult{
+				{
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"a", "g"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"g", "n"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"n", "z"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))",
+			2,
+			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true),
+			[][]string{
+				{"g"},
+			},
+			[]chunkResult{
+				{
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"a", "g"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"g", "z"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))",
+			3,
+			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true),
+			[][]string{
+				{},
+			},
+			[]chunkResult{
+				{
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"a", "z"},
+				},
+			},
+		},
+	}
+
+	for _, testCase := range testCases {
+		tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New())
+		require.NoError(t, err)
+
+		splitCols, err := GetSplitFields(tableInfo, nil)
+		require.NoError(t, err)
+		createFakeResultForRandomSplit(mock, 0, testCase.randomValues)
+		chunks, err := splitRangeByRandom(context.Background(), db, testCase.originChunk, testCase.splitCount, "test", "test", splitCols, "", "")
+		require.NoError(t, err)
+		for j, chunk := range chunks {
+			chunkStr, args := chunk.ToString("")
+			require.Equal(t, chunkStr, testCase.expectResult[j].chunkStr)
+			require.Equal(t, args, testCase.expectResult[j].args)
+		}
+	}
+}
+
+func TestRandomSpliter(t *testing.T) {
+	ctx := context.Background()
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	testCases := []struct {
+		createTableSQL string
+		count          int
+		fields         string
+		IgnoreColumns  []string
+		randomValues   [][]string
+		expectResult   []chunkResult
+	}{
+		{
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))",
+			10,
+			"",
+			nil,
+			[][]string{
+				{"1", "2", "3", "4", "5"},
+				{"a", "b", "c", "d", "e"},
+			},
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"1", "1", "a"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"1", "1", "a", "2", "2", "b"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"2", "2", "b", "3", "3", "c"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"3", "3", "c", "4", "4", "d"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"4", "4", "d", "5", "5", "e"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"5", "5", "e"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))",
+			10,
+			"",
+			nil,
+			[][]string{
+				{"a", "b", "c", "d", "e"},
+			},
+			[]chunkResult{
+				{
+					"(`b` <= ?)",
+					[]interface{}{"a"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"a", "b"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"b", "c"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"c", "d"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"d", "e"},
+				}, {
+					"(`b` > ?)",
+					[]interface{}{"e"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float)",
+			10,
+			"b,c",
+			nil,
+			[][]string{
+				{"a", "b", "c", "d", "e"},
+				{"1.1", "2.2", "3.3", "4.4", "5.5"},
+			},
+			[]chunkResult{
+				{
+					"(`b` < ?) OR (`b` = ? AND `c` <= ?)",
+					[]interface{}{"a", "a", "1.1"},
+				}, {
+					"((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))",
+					[]interface{}{"a", "a", "1.1", "b", "b", "2.2"},
+				}, {
+					"((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))",
+					[]interface{}{"b", "b", "2.2", "c", "c", "3.3"},
+				}, {
+					"((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))",
+					[]interface{}{"c", "c", "3.3", "d", "d", "4.4"},
+				}, {
+					"((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))",
+					[]interface{}{"d", "d", "4.4", "e", "e", "5.5"},
+				}, {
+					"(`b` > ?) OR (`b` = ? AND `c` > ?)",
+					[]interface{}{"e", "e", "5.5"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float)",
+			10,
+			"",
+			[]string{"a"},
+			[][]string{
+				{"a", "b", "c", "d", "e"},
+			},
+			[]chunkResult{
+				{
+					"(`b` <= ?)",
+					[]interface{}{"a"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"a", "b"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"b", "c"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"c", "d"},
+				}, {
+					"((`b` > ?)) AND ((`b` <= ?))",
+					[]interface{}{"d", "e"},
+				}, {
+					"(`b` > ?)",
+					[]interface{}{"e"},
+				},
+			},
+		}, {
+			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float)",
+			10,
+			"",
+			nil,
+			[][]string{
+				{"1", "2", "3", "4", "5"},
+			},
+			[]chunkResult{
+				{
+					"(`a` <= ?)",
+					[]interface{}{"1"},
+				}, {
+					"((`a` > ?)) AND ((`a` <= ?))",
+					[]interface{}{"1", "2"},
+				}, {
+					"((`a` > ?)) AND ((`a` <= ?))",
+					[]interface{}{"2", "3"},
+				}, {
+					"((`a` > ?)) AND ((`a` <= ?))",
+					[]interface{}{"3", "4"},
+				}, {
+					"((`a` > ?)) AND ((`a` <= ?))",
+					[]interface{}{"4", "5"},
+				}, {
+					"(`a` > ?)",
+					[]interface{}{"5"},
+				},
+			},
+		},
+	}
+
+	for _, testCase := range testCases {
+		tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New())
+		require.NoError(t, err)
+
+		info, needUnifiedTimeStamp := utils.ResetColumns(tableInfo, testCase.IgnoreColumns)
+		tableDiff := &common.TableDiff{
+			Schema:              "test",
+			Table:               "test",
+			Info:                info,
+			IgnoreColumns:       testCase.IgnoreColumns,
+			NeedUnifiedTimeZone: needUnifiedTimeStamp,
+			Fields:              testCase.fields,
+			ChunkSize:           5,
+		}
+
+		createFakeResultForRandomSplit(mock, testCase.count, testCase.randomValues)
+
+		iter, err := NewRandomIterator(ctx, "", tableDiff, db)
+		require.NoError(t, err)
+
+		j := 0
+		for {
+			chunk, err := iter.Next()
+			require.NoError(t, err)
+			if chunk == nil {
+				break
+			}
+			chunkStr, args := chunk.ToString("")
+			require.Equal(t, chunkStr, testCase.expectResult[j].chunkStr)
+			require.Equal(t, args, testCase.expectResult[j].args)
+			j = j + 1
+		}
+	}
+
+	// Test Checkpoint
+	stopJ := 3
+	tableInfo, err := dbutil.GetTableInfoBySQL(testCases[0].createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	tableDiff := &common.TableDiff{
+		Schema: "test",
+		Table:  "test",
+		Info:   tableInfo,
+		//IgnoreColumns: []string{"c"},
+		//Fields:        "a,b",
+		ChunkSize: 5,
+	}
+
+	createFakeResultForRandomSplit(mock, testCases[0].count, testCases[0].randomValues)
+
+	iter, err := NewRandomIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+
+	var chunk *chunk.Range
+	for j := 0; j < stopJ; j++ {
+		chunk, err = iter.Next()
+		require.NoError(t, err)
+	}
+
+	bounds1 := chunk.Bounds
+	chunkID1 := chunk.Index
+
+	rangeInfo := &RangeInfo{
+		ChunkRange: chunk,
+	}
+
+	createFakeResultForRandomSplit(mock, testCases[0].count, testCases[0].randomValues)
+
+	iter, err = NewRandomIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo)
+	require.NoError(t, err)
+
+	chunk, err = iter.Next()
+	require.NoError(t, err)
+
+	for i, bound := range chunk.Bounds {
+		require.Equal(t, bounds1[i].Upper, bound.Lower)
+	}
+
+	require.Equal(t, chunk.Index.ChunkCnt, chunkID1.ChunkCnt)
+	require.Equal(t, chunk.Index.ChunkIndex, chunkID1.ChunkIndex+1)
+
+}
+
+func createFakeResultForRandomSplit(mock sqlmock.Sqlmock, count int, randomValues [][]string) {
+	createFakeResultForCount(mock, count)
+	if randomValues == nil {
+		return
+	}
+	// generate fake result for get random value for column a
+	columns := []string{"a", "b", "c", "d", "e", "f"}
+	rowsNames := make([]string, 0, len(randomValues))
+	for i := 0; i < len(randomValues); i++ {
+		rowsNames = append(rowsNames, columns[i])
+	}
+	randomRows := sqlmock.NewRows(rowsNames)
+	for i := 0; i < len(randomValues[0]); i++ {
+		row := make([]driver.Value, 0, len(randomValues))
+		for j := 0; j < len(randomValues); j++ {
+			row = append(row, randomValues[j][i])
+		}
+		randomRows.AddRow(row...)
+	}
+	mock.ExpectQuery("ORDER BY rand_value").WillReturnRows(randomRows)
+
+}
+
+func TestBucketSpliter(t *testing.T) {
+	ctx := context.Background()
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	testCases := []struct {
+		chunkSize     int64
+		aRandomValues []interface{}
+		bRandomValues []interface{}
+		expectResult  []chunkResult
+	}{
+		{
+			// chunk size less than the count of bucket 64, and the bucket's count 64 >= 32, so will split by random in every bucket
+			32,
+			[]interface{}{32, 32 * 3, 32 * 5, 32 * 7, 32 * 9},
+			[]interface{}{6, 6 * 3, 6 * 5, 6 * 7, 6 * 9},
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"32", "32", "6"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"32", "32", "6", "63", "63", "11"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"63", "63", "11", "96", "96", "18"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"96", "96", "18", "127", "127", "23"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"127", "127", "23", "160", "160", "30"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"160", "160", "30", "191", "191", "35"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"191", "191", "35", "224", "224", "42"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"224", "224", "42", "255", "255", "47"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"255", "255", "47", "288", "288", "54"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"288", "288", "54", "319", "319", "59"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"319", "319", "59"},
+				},
+			},
+		}, {
+			// chunk size less than the count of bucket 64, but 64 is  less than 2*50, so will not split every bucket
+			50,
+			nil,
+			nil,
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"63", "63", "11"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"63", "63", "11", "127", "127", "23"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"127", "127", "23", "191", "191", "35"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"191", "191", "35", "255", "255", "47"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"255", "255", "47", "319", "319", "59"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"319", "319", "59"},
+				},
+			},
+		}, {
+			// chunk size is equal to the count of bucket 64, so every becket will generate a chunk
+			64,
+			nil,
+			nil,
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"63", "63", "11"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"63", "63", "11", "127", "127", "23"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"127", "127", "23", "191", "191", "35"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"191", "191", "35", "255", "255", "47"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"255", "255", "47", "319", "319", "59"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"319", "319", "59"},
+				},
+			},
+		}, {
+			// chunk size is greater than the count of bucket 64, will combine two bucket into chunk
+			127,
+			nil,
+			nil,
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"127", "127", "23"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"127", "127", "23", "255", "255", "47"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"255", "255", "47"},
+				},
+			},
+		}, {
+			// chunk size is equal to the double count of bucket 64, will combine two bucket into one chunk
+			128,
+			nil,
+			nil,
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"127", "127", "23"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"127", "127", "23", "255", "255", "47"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"255", "255", "47"},
+				},
+			},
+		}, {
+			// chunk size is greater than the double count of bucket 64, will combine three bucket into one chunk
+			129,
+			nil,
+			nil,
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"191", "191", "35"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"191", "191", "35"},
+				},
+			},
+		}, {
+			// chunk size is greater than the total count, only generate one chunk
+			400,
+			nil,
+			nil,
+			[]chunkResult{
+				{
+					"TRUE",
+					nil,
+				},
+			},
+		},
+	}
+
+	tableDiff := &common.TableDiff{
+		Schema: "test",
+		Table:  "test",
+		Info:   tableInfo,
+	}
+
+	for i, testCase := range testCases {
+		fmt.Printf("%d", i)
+		createFakeResultForBucketSplit(mock, testCase.aRandomValues, testCase.bRandomValues)
+		tableDiff.ChunkSize = testCase.chunkSize
+		iter, err := NewBucketIterator(ctx, "", tableDiff, db)
+		require.NoError(t, err)
+
+		obtainChunks := make([]chunkResult, 0, len(testCase.expectResult))
+		nextBeginBucket := 0
+		for {
+			chunk, err := iter.Next()
+			require.NoError(t, err)
+			if chunk == nil {
+				break
+			}
+			chunkStr, _ := chunk.ToString("")
+			if nextBeginBucket == 0 {
+				require.Equal(t, chunk.Index.BucketIndexLeft, 0)
+			} else {
+				require.Equal(t, chunk.Index.BucketIndexLeft, nextBeginBucket)
+			}
+			if chunk.Index.ChunkIndex+1 == chunk.Index.ChunkCnt {
+				nextBeginBucket = chunk.Index.BucketIndexRight + 1
+			}
+			obtainChunks = append(obtainChunks, chunkResult{chunkStr, chunk.Args})
+
+		}
+		sort.Slice(obtainChunks, func(i, j int) bool {
+			totalIndex := len(obtainChunks[i].args)
+			if totalIndex > len(obtainChunks[j].args) {
+				totalIndex = len(obtainChunks[j].args)
+			}
+			for index := 0; index < totalIndex; index++ {
+				a1, _ := strconv.Atoi(obtainChunks[i].args[index].(string))
+				a2, _ := strconv.Atoi(obtainChunks[j].args[index].(string))
+				if a1 < a2 {
+					return true
+				} else if a1 > a2 {
+					return false
+				}
+			}
+			if len(obtainChunks[i].args) == len(obtainChunks[j].args) {
+				// hack way for test case 6
+				return len(obtainChunks[i].chunkStr) > len(obtainChunks[j].chunkStr)
+			}
+			return len(obtainChunks[i].args) < len(obtainChunks[j].args)
+		})
+		// we expect chunk count is same after we generate chunk concurrently
+		require.Equal(t, len(obtainChunks), len(testCase.expectResult))
+		for i, e := range testCase.expectResult {
+			require.Equal(t, obtainChunks[i].args, e.args)
+			require.Equal(t, obtainChunks[i].chunkStr, e.chunkStr)
+		}
+	}
+
+	// Test Checkpoint
+	stopJ := 3
+	createFakeResultForBucketSplit(mock, testCases[0].aRandomValues, testCases[0].bRandomValues)
+	tableDiff.ChunkSize = testCases[0].chunkSize
+	iter, err := NewBucketIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+	j := 0
+	var chunk *chunk.Range
+	for ; j < stopJ; j++ {
+		chunk, err = iter.Next()
+		require.NoError(t, err)
+	}
+	for {
+		c, err := iter.Next()
+		require.NoError(t, err)
+		if c == nil {
+			break
+		}
+	}
+	bounds1 := chunk.Bounds
+
+	rangeInfo := &RangeInfo{
+		ChunkRange: chunk,
+		IndexID:    iter.GetIndexID(),
+	}
+
+	// drop the origin db since we cannot ensure order of mock string after we concurrent produce chunks.
+	db, mock, err = sqlmock.New()
+	require.NoError(t, err)
+	createFakeResultForBucketSplit(mock, nil, nil)
+	createFakeResultForCount(mock, 64)
+	createFakeResultForRandom(mock, testCases[0].aRandomValues[stopJ:], testCases[0].bRandomValues[stopJ:])
+	iter, err = NewBucketIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo, utils.NewWorkerPool(1, "bucketIter"))
+	require.NoError(t, err)
+	chunk, err = iter.Next()
+	require.NoError(t, err)
+
+	for i, bound := range chunk.Bounds {
+		require.Equal(t, bounds1[i].Upper, bound.Lower)
+	}
+}
+
+func createFakeResultForBucketSplit(mock sqlmock.Sqlmock, aRandomValues, bRandomValues []interface{}) {
+	/*
+		+---------+------------+-------------+----------+-----------+-------+---------+-------------+-------------+
+		| Db_name | Table_name | Column_name | Is_index | Bucket_id | Count | Repeats | Lower_Bound | Upper_Bound |
+		+---------+------------+-------------+----------+-----------+-------+---------+-------------+-------------+
+		| test    | test       | PRIMARY     |        1 |         0 |    64 |       1 | (0, 0)      | (63, 11)    |
+		| test    | test       | PRIMARY     |        1 |         1 |   128 |       1 | (64, 12)    | (127, 23)   |
+		| test    | test       | PRIMARY     |        1 |         2 |   192 |       1 | (128, 24)   | (191, 35)   |
+		| test    | test       | PRIMARY     |        1 |         3 |   256 |       1 | (192, 36)   | (255, 47)   |
+		| test    | test       | PRIMARY     |        1 |         4 |   320 |       1 | (256, 48)   | (319, 59)   |
+		+---------+------------+-------------+----------+-----------+-------+---------+-------------+-------------+
+	*/
+
+	statsRows := sqlmock.NewRows([]string{"Db_name", "Table_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound"})
+	for i := 0; i < 5; i++ {
+		statsRows.AddRow("test", "test", "PRIMARY", 1, (i+1)*64, (i+1)*64, 1, fmt.Sprintf("(%d, %d)", i*64, i*12), fmt.Sprintf("(%d, %d)", (i+1)*64-1, (i+1)*12-1))
+	}
+	mock.ExpectQuery("SHOW STATS_BUCKETS").WillReturnRows(statsRows)
+
+	createFakeResultForRandom(mock, aRandomValues, bRandomValues)
+}
+
+func createFakeResultForCount(mock sqlmock.Sqlmock, count int) {
+	if count > 0 {
+		// generate fake result for get the row count of this table
+		countRows := sqlmock.NewRows([]string{"cnt"}).AddRow(count)
+		mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
+	}
+}
+
+func createFakeResultForRandom(mock sqlmock.Sqlmock, aRandomValues, bRandomValues []interface{}) {
+	for i := 0; i < len(aRandomValues); i++ {
+		aRandomRows := sqlmock.NewRows([]string{"a", "b"})
+		aRandomRows.AddRow(aRandomValues[i], bRandomValues[i])
+		mock.ExpectQuery("ORDER BY rand_value").WillReturnRows(aRandomRows)
+	}
+}
+
+func TestLimitSpliter(t *testing.T) {
+	ctx := context.Background()
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	testCases := []struct {
+		limitAValues []string
+		limitBValues []string
+		expectResult []chunkResult
+	}{
+		{
+			[]string{"1000", "2000", "3000", "4000"},
+			[]string{"a", "b", "c", "d"},
+			[]chunkResult{
+				{
+					"(`a` < ?) OR (`a` = ? AND `b` <= ?)",
+					[]interface{}{"1000", "1000", "a"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"1000", "1000", "a", "2000", "2000", "b"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"2000", "2000", "b", "3000", "3000", "c"},
+				}, {
+					"((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))",
+					[]interface{}{"3000", "3000", "c", "4000", "4000", "d"},
+				}, {
+					"(`a` > ?) OR (`a` = ? AND `b` > ?)",
+					[]interface{}{"4000", "4000", "d"},
+				},
+			},
+		},
+	}
+
+	tableDiff := &common.TableDiff{
+		Schema:    "test",
+		Table:     "test",
+		Info:      tableInfo,
+		ChunkSize: 1000,
+	}
+
+	for _, testCase := range testCases {
+		createFakeResultForLimitSplit(mock, testCase.limitAValues, testCase.limitBValues, true)
+
+		iter, err := NewLimitIterator(ctx, "", tableDiff, db)
+		require.NoError(t, err)
+
+		j := 0
+		for {
+			chunk, err := iter.Next()
+			require.NoError(t, err)
+			if chunk == nil {
+				break
+			}
+			chunkStr, args := chunk.ToString("")
+			require.Equal(t, chunkStr, testCase.expectResult[j].chunkStr)
+			require.Equal(t, args, testCase.expectResult[j].args)
+			j = j + 1
+		}
+	}
+
+	// Test Checkpoint
+	stopJ := 2
+	createFakeResultForLimitSplit(mock, testCases[0].limitAValues[:stopJ], testCases[0].limitBValues[:stopJ], true)
+	iter, err := NewLimitIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+	j := 0
+	var chunk *chunk.Range
+	for ; j < stopJ; j++ {
+		chunk, err = iter.Next()
+		require.NoError(t, err)
+	}
+	bounds1 := chunk.Bounds
+
+	rangeInfo := &RangeInfo{
+		ChunkRange: chunk,
+		IndexID:    iter.GetIndexID(),
+	}
+
+	createFakeResultForLimitSplit(mock, testCases[0].limitAValues[stopJ:], testCases[0].limitBValues[stopJ:], true)
+	iter, err = NewLimitIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo)
+	require.NoError(t, err)
+	chunk, err = iter.Next()
+	require.NoError(t, err)
+
+	for i, bound := range chunk.Bounds {
+		require.Equal(t, bounds1[i].Upper, bound.Lower)
+	}
+}
+
+func createFakeResultForLimitSplit(mock sqlmock.Sqlmock, aValues []string, bValues []string, needEnd bool) {
+	for i, a := range aValues {
+		limitRows := sqlmock.NewRows([]string{"a", "b"})
+		limitRows.AddRow(a, bValues[i])
+		mock.ExpectQuery("SELECT `a`,.*").WillReturnRows(limitRows)
+	}
+
+	if needEnd {
+		mock.ExpectQuery("SELECT `a`,.*").WillReturnRows(sqlmock.NewRows([]string{"a", "b"}))
+	}
+}
+
+func TestRangeInfo(t *testing.T) {
+	rangeInfo := &RangeInfo{
+		ChunkRange: chunk.NewChunkRange(),
+		IndexID:    2,
+		ProgressID: "324312",
+	}
+	rangeInfo.Update("a", "1", "2", true, true, "[23]", "[sdg]")
+	rangeInfo.ChunkRange.Index.TableIndex = 1
+	chunkRange := rangeInfo.GetChunk()
+	require.Equal(t, chunkRange.Where, "((((`a` COLLATE '[23]' > ?)) AND ((`a` COLLATE '[23]' <= ?))) AND ([sdg]))")
+	require.Equal(t, chunkRange.Args, []interface{}{"1", "2"})
+
+	require.Equal(t, rangeInfo.GetTableIndex(), 1)
+
+	rangeInfo2 := FromNode(rangeInfo.ToNode())
+
+	chunkRange = rangeInfo2.GetChunk()
+	require.Equal(t, chunkRange.Where, "((((`a` COLLATE '[23]' > ?)) AND ((`a` COLLATE '[23]' <= ?))) AND ([sdg]))")
+	require.Equal(t, chunkRange.Args, []interface{}{"1", "2"})
+
+	require.Equal(t, rangeInfo2.GetTableIndex(), 1)
+
+}
+
+func TestChunkSize(t *testing.T) {
+	ctx := context.Background()
+	db, mock, err := sqlmock.New()
+	require.NoError(t, err)
+
+	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	tableDiff := &common.TableDiff{
+		Schema:    "test",
+		Table:     "test",
+		Info:      tableInfo,
+		ChunkSize: 0,
+	}
+
+	// test bucket splitter chunksize
+	statsRows := sqlmock.NewRows([]string{"Db_name", "Table_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound"})
+	// Notice, use wrong Bound to kill bucket producer
+	statsRows.AddRow("test", "test", "PRIMARY", 1, 0, 1000000000, 1, "(1, 2, wrong!)", "(2, 3, wrong!)")
+	mock.ExpectQuery("SHOW STATS_BUCKETS").WillReturnRows(statsRows)
+
+	bucketIter, err := NewBucketIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+	require.Equal(t, bucketIter.chunkSize, int64(100000))
+
+	createFakeResultForBucketSplit(mock, nil, nil)
+	bucketIter, err = NewBucketIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+	require.Equal(t, bucketIter.chunkSize, int64(50000))
+
+	// test random splitter chunksize
+	// chunkNum is only 1, so don't need randomValues
+	createFakeResultForRandomSplit(mock, 1000, nil)
+	randomIter, err := NewRandomIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+	require.Equal(t, randomIter.chunkSize, int64(50000))
+
+	createFakeResultForRandomSplit(mock, 1000000000, [][]string{
+		{"1", "2", "3", "4", "5"},
+		{"a", "b", "c", "d", "e"},
+	})
+	randomIter, err = NewRandomIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+	require.Equal(t, randomIter.chunkSize, int64(100000))
+
+	createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime)"
+	tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	tableDiff_noindex := &common.TableDiff{
+		Schema:    "test",
+		Table:     "test",
+		Info:      tableInfo,
+		ChunkSize: 0,
+	}
+	// no index
+	createFakeResultForRandomSplit(mock, 1000, nil)
+	randomIter, err = NewRandomIterator(ctx, "", tableDiff_noindex, db)
+	require.NoError(t, err)
+	require.Equal(t, randomIter.chunkSize, int64(1001))
+
+	// test limit splitter chunksize
+	createFakeResultForCount(mock, 1000)
+	mock.ExpectQuery("SELECT `a`,.*limit 50000.*").WillReturnRows(sqlmock.NewRows([]string{"a", "b"}))
+	_, err = NewLimitIterator(ctx, "", tableDiff, db)
+	require.NoError(t, err)
+
+}
diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go
new file mode 100644
index 00000000000..b9604f81129
--- /dev/null
+++ b/sync_diff_inspector/utils/pd.go
@@ -0,0 +1,288 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package utils
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/coreos/go-semver/semver"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	pd "github.com/tikv/pd/client"
+	clientv3 "go.etcd.io/etcd/client/v3"
+	"go.uber.org/zap"
+)
+
+const (
+	tidbServerInformationPath = "/tidb/server/info"
+	defaultEtcdDialTimeOut    = 3 * time.Second
+
+	defaultGCSafePointTTL = 5 * 60
+)
+
+var (
+	tidbVersionRegex       = regexp.MustCompile(`-[v]?\d+\.\d+\.\d+([0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?`)
+	autoGCSafePointVersion = semver.New("4.0.0")
+)
+
+func getPDDDLIDs(pCtx context.Context, cli *clientv3.Client) ([]string, error) {
+	ctx, cancel := context.WithTimeout(pCtx, 10*time.Second)
+	defer cancel()
+
+	resp, err := cli.Get(ctx, tidbServerInformationPath, clientv3.WithPrefix())
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	pdDDLIds := make([]string, len(resp.Kvs))
+	for i, kv := range resp.Kvs {
+		items := strings.Split(string(kv.Key), "/")
+		pdDDLIds[i] = items[len(items)-1]
+	}
+	return pdDDLIds, nil
+}
+
+// getTiDBDDLIDs gets DDL IDs from TiDB
+func getTiDBDDLIDs(ctx context.Context, db *sql.DB) ([]string, error) {
+	query := "SELECT * FROM information_schema.tidb_servers_info;"
+	rows, err := db.QueryContext(ctx, query)
+	if err != nil {
+		return []string{}, errors.Annotatef(err, "sql: %s", query)
+	}
+	return GetSpecifiedColumnValueAndClose(rows, "DDL_ID")
+}
+
+func checkSameCluster(ctx context.Context, db *sql.DB, pdAddrs []string) (bool, error) {
+	cli, err := clientv3.New(clientv3.Config{
+		Endpoints:   pdAddrs,
+		DialTimeout: defaultEtcdDialTimeOut,
+	})
+	if err != nil {
+		return false, errors.Trace(err)
+	}
+	tidbDDLIDs, err := getTiDBDDLIDs(ctx, db)
+	if err != nil {
+		return false, err
+	}
+	pdDDLIDs, err := getPDDDLIDs(ctx, cli)
+	if err != nil {
+		return false, err
+	}
+	sort.Strings(tidbDDLIDs)
+	sort.Strings(pdDDLIDs)
+
+	return sameStringArray(tidbDDLIDs, pdDDLIDs), nil
+}
+
+func sameStringArray(a, b []string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
+
+// GetPDClientForGC is an initialization step.
+func GetPDClientForGC(ctx context.Context, db *sql.DB) (pd.Client, error) {
+	if ok, _ := dbutil.IsTiDB(ctx, db); ok {
+		pdAddrs, err := GetPDAddrs(ctx, db)
+		if err != nil {
+			return nil, err
+		}
+		if len(pdAddrs) > 0 {
+			if same, err := checkSameCluster(ctx, db, pdAddrs); err != nil {
+				log.Info("[automatically GC] check whether fetched pd addr and TiDB belong to one cluster failed", zap.Strings("pd address", pdAddrs), zap.Error(err))
+			} else if same {
+				pdClient, err := pd.NewClientWithContext(ctx, pdAddrs, pd.SecurityOption{})
+				if err != nil {
+					log.Info("[automatically GC] create pd client to control GC failed", zap.Strings("pd address", pdAddrs), zap.Error(err))
+					return nil, err
+				}
+				return pdClient, nil
+			}
+		}
+	}
+	return nil, nil
+}
+
+// GetPDAddrs gets PD address from TiDB
+func GetPDAddrs(ctx context.Context, db *sql.DB) ([]string, error) {
+	query := "SELECT * FROM information_schema.cluster_info where type = 'pd';"
+	rows, err := db.QueryContext(ctx, query)
+	if err != nil {
+		return []string{}, errors.Annotatef(err, "sql: %s", query)
+	}
+	return GetSpecifiedColumnValueAndClose(rows, "STATUS_ADDRESS")
+}
+
+// GetSpecifiedColumnValueAndClose get columns' values whose name is equal to columnName and close the given rows
+func GetSpecifiedColumnValueAndClose(rows *sql.Rows, columnName string) ([]string, error) {
+	if rows == nil {
+		return []string{}, nil
+	}
+	defer rows.Close()
+	columnName = strings.ToUpper(columnName)
+	var strs []string
+	columns, _ := rows.Columns()
+	addr := make([]interface{}, len(columns))
+	oneRow := make([]sql.NullString, len(columns))
+	fieldIndex := -1
+	for i, col := range columns {
+		if strings.ToUpper(col) == columnName {
+			fieldIndex = i
+		}
+		addr[i] = &oneRow[i]
+	}
+	if fieldIndex == -1 {
+		return strs, nil
+	}
+	for rows.Next() {
+		err := rows.Scan(addr...)
+		if err != nil {
+			return strs, errors.Trace(err)
+		}
+		if oneRow[fieldIndex].Valid {
+			strs = append(strs, oneRow[fieldIndex].String)
+		}
+	}
+	return strs, errors.Trace(rows.Err())
+}
+
+// parse versino string to semver.Version
+func parseVersion(versionStr string) (*semver.Version, error) {
+	versionStr = tidbVersionRegex.FindString(versionStr)[1:]
+	versionStr = strings.TrimPrefix(versionStr, "v")
+	return semver.NewVersion(versionStr)
+}
+
+// It's OK to failed to get db version
+func TryToGetVersion(ctx context.Context, db *sql.DB) *semver.Version {
+	versionStr, err := dbutil.GetDBVersion(ctx, db)
+	if err != nil {
+		return nil
+	}
+	if !strings.Contains(strings.ToLower(versionStr), "tidb") {
+		return nil
+	}
+	version, err := parseVersion(versionStr)
+	if err != nil {
+		// It's OK when parse version failed
+		version = nil
+	}
+	return version
+}
+
+// StartGCSavepointUpdateService keeps GC safePoint stop moving forward.
+func StartGCSavepointUpdateService(ctx context.Context, pdCli pd.Client, db *sql.DB, snapshot string) error {
+	versionStr, err := selectVersion(db)
+	if err != nil {
+		log.Info("detect version of tidb failed")
+		return nil
+	}
+	tidbVersion, err := parseVersion(versionStr)
+	if err != nil {
+		log.Info("parse version of tidb failed")
+		return nil
+	}
+	// get latest snapshot
+	snapshotTS, err := parseSnapshotToTSO(db, snapshot)
+	if tidbVersion.Compare(*autoGCSafePointVersion) > 0 {
+		log.Info("tidb support auto gc safepoint", zap.Stringer("version", tidbVersion))
+		if err != nil {
+			return err
+		}
+		go updateServiceSafePoint(ctx, pdCli, snapshotTS)
+	} else {
+		log.Info("tidb doesn't support auto gc safepoint", zap.Stringer("version", tidbVersion))
+	}
+	return nil
+}
+
+func updateServiceSafePoint(ctx context.Context, pdClient pd.Client, snapshotTS uint64) {
+	updateInterval := time.Duration(defaultGCSafePointTTL/2) * time.Second
+	tick := time.NewTicker(updateInterval)
+	DiffServiceSafePointID := fmt.Sprintf("Sync_diff_%d", time.Now().UnixNano())
+	log.Info("generate dumpling gc safePoint id", zap.String("id", DiffServiceSafePointID))
+	for {
+		log.Debug("update PD safePoint limit with ttl",
+			zap.Uint64("safePoint", snapshotTS),
+			zap.Duration("updateInterval", updateInterval))
+		for retryCnt := 0; retryCnt <= 10; retryCnt++ {
+			_, err := pdClient.UpdateServiceGCSafePoint(ctx, DiffServiceSafePointID, defaultGCSafePointTTL, snapshotTS)
+			if err == nil {
+				break
+			}
+			log.Debug("update PD safePoint failed", zap.Error(err), zap.Int("retryTime", retryCnt))
+			select {
+			case <-ctx.Done():
+				return
+			case <-time.After(time.Second):
+			}
+		}
+		select {
+		case <-ctx.Done():
+			return
+		case <-tick.C:
+		}
+	}
+}
+
+func parseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) {
+	snapshotTS, err := strconv.ParseUint(snapshot, 10, 64)
+	if err == nil {
+		return snapshotTS, nil
+	}
+	var tso sql.NullInt64
+	query := "SELECT unix_timestamp(?)"
+	row := pool.QueryRow(query, snapshot)
+	err = row.Scan(&tso)
+	if err != nil {
+		return 0, errors.Annotatef(err, "sql: %s", strings.ReplaceAll(query, "?", fmt.Sprintf(`"%s"`, snapshot)))
+	}
+	if !tso.Valid {
+		return 0, errors.Errorf("snapshot %s format not supported. please use tso or '2006-01-02 15:04:05' format time", snapshot)
+	}
+	return uint64(tso.Int64*1000) << 18, nil
+}
+
+func GetSnapshot(ctx context.Context, db *sql.DB) ([]string, error) {
+	query := "SHOW MASTER STATUS;"
+	rows, err := db.QueryContext(ctx, query)
+	if err != nil {
+		return []string{}, errors.Annotatef(err, "sql: %s", query)
+	}
+	return GetSpecifiedColumnValueAndClose(rows, "Position")
+}
+
+func selectVersion(db *sql.DB) (string, error) {
+	var versionInfo string
+	const query = "SELECT version()"
+	row := db.QueryRow(query)
+	err := row.Scan(&versionInfo)
+	if err != nil {
+		return "", errors.Annotatef(err, "sql: %s", query)
+	}
+	return versionInfo, nil
+}
diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go
new file mode 100644
index 00000000000..1c99dcb5d74
--- /dev/null
+++ b/sync_diff_inspector/utils/table.go
@@ -0,0 +1,187 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package utils
+
+import (
+	"context"
+	"fmt"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/coreos/go-semver/semver"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/tidb/pkg/ddl"
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/parser/ast"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/parser/mysql"
+	"github.com/pingcap/tidb/pkg/sessionctx"
+	"github.com/pingcap/tidb/pkg/types"
+	"github.com/pingcap/tidb/pkg/util/collate"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
+	"github.com/pingcap/tidb/pkg/util/mock"
+)
+
+const (
+	AnnotationClusteredReplaceString    = "${1} /*T![clustered_index] CLUSTERED */${2}\n"
+	AnnotationNonClusteredReplaceString = "${1} /*T![clustered_index] NONCLUSTERED */${2}\n"
+)
+
+func init() {
+	collate.SetNewCollationEnabledForTest(false)
+}
+
+// addClusteredAnnotation add the `/*T![clustered_index] NONCLUSTERED */` for primary key of create table info
+// In the older version, the create table info hasn't `/*T![clustered_index] NONCLUSTERED */`,
+// which lead the issue https://github.com/pingcap/tidb-tools/issues/678
+//
+// Before Get Create Table Info:
+// mysql> SHOW CREATE TABLE `test`.`itest`;
+//
+//	+-------+--------------------------------------------------------------------+
+//	| Table | Create Table                                                                                                                              |
+//	+-------+--------------------------------------------------------------------+
+//	| itest | CREATE TABLE `itest` (
+//		`id` int(11) DEFAULT NULL,
+//		`name` varchar(24) DEFAULT NULL,
+//		PRIMARY KEY (`id`)
+//		) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin |
+//	+-------+--------------------------------------------------------------------+
+//
+// After Add the annotation:
+//
+//	+-------+--------------------------------------------------------------------+
+//	| Table | Create Table                                                                                                                              |
+//	+-------+--------------------------------------------------------------------+
+//	| itest | CREATE TABLE `itest` (
+//		`id` int(11) DEFAULT NULL,
+//		`name` varchar(24) DEFAULT NULL,
+//		PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */
+//		) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin |
+//	+-------+--------------------------------------------------------------------+
+func addClusteredAnnotationForPrimaryKey(raw string, replace string) (string, error) {
+	reg, regErr := regexp.Compile(`(PRIMARY\sKEY.*\))(\s*,?)\s*\n`)
+	if reg == nil || regErr != nil {
+		return raw, errors.Annotate(regErr, "failed to compile regex for add clustered annotation, err: %s")
+	}
+	return reg.ReplaceAllString(raw, replace), nil
+}
+
+func getTableInfoBySQL(ctx sessionctx.Context, createTableSQL string, parser2 *parser.Parser) (table *model.TableInfo, err error) {
+	stmt, err := parser2.ParseOneStmt(createTableSQL, "", "")
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	s, ok := stmt.(*ast.CreateTableStmt)
+	if ok {
+		table, err := ddl.BuildTableInfoWithStmt(ctx, s, mysql.DefaultCharset, "", nil)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+
+		// put primary key in indices
+		if table.PKIsHandle {
+			pkIndex := &model.IndexInfo{
+				Name:    model.NewCIStr("PRIMARY"),
+				Primary: true,
+				State:   model.StatePublic,
+				Unique:  true,
+				Tp:      model.IndexTypeBtree,
+				Columns: []*model.IndexColumn{
+					{
+						Name:   table.GetPkName(),
+						Length: types.UnspecifiedLength,
+					},
+				},
+			}
+
+			table.Indices = append(table.Indices, pkIndex)
+		}
+
+		return table, nil
+	}
+
+	return nil, errors.Errorf("get table info from sql %s failed!", createTableSQL)
+}
+
+func isPKISHandle(
+	ctx context.Context,
+	db dbutil.QueryExecutor,
+	schemaName, tableName string,
+) bool {
+	query := fmt.Sprintf("SELECT _tidb_rowid FROM %s LIMIT 0;", dbutil.TableName(schemaName, tableName))
+	rows, err := db.QueryContext(ctx, query)
+	if err != nil && strings.Contains(err.Error(), "Unknown column") {
+		return true
+	}
+	if rows != nil {
+		rows.Close()
+	}
+	return false
+}
+
+func GetTableInfoWithVersion(
+	ctx context.Context,
+	db dbutil.QueryExecutor,
+	schemaName, tableName string,
+	version *semver.Version,
+) (*model.TableInfo, error) {
+	createTableSQL, err := dbutil.GetCreateTableSQL(ctx, db, schemaName, tableName)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	if version != nil && version.Major <= 4 {
+		var replaceString string
+		if isPKISHandle(ctx, db, schemaName, tableName) {
+			replaceString = AnnotationClusteredReplaceString
+		} else {
+			replaceString = AnnotationNonClusteredReplaceString
+		}
+		createTableSQL, err = addClusteredAnnotationForPrimaryKey(createTableSQL, replaceString)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+	}
+	parser2, err := dbutil.GetParserForDB(ctx, db)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	sctx := mock.NewContext()
+	// unify the timezone to UTC +0:00
+	sctx.GetSessionVars().TimeZone = time.UTC
+	sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictTransTables)
+	sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictAllTables)
+	return getTableInfoBySQL(sctx, createTableSQL, parser2)
+}
+
+// GetTableInfo returns table information.
+func GetTableInfo(
+	ctx context.Context, db dbutil.QueryExecutor,
+	schemaName, tableName string,
+) (*model.TableInfo, error) {
+	createTableSQL, err := dbutil.GetCreateTableSQL(ctx, db, schemaName, tableName)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+
+	parser2, err := dbutil.GetParserForDB(ctx, db)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	return dbutiltest.GetTableInfoBySQL(createTableSQL, parser2)
+}
diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go
new file mode 100644
index 00000000000..d80bc669123
--- /dev/null
+++ b/sync_diff_inspector/utils/utils.go
@@ -0,0 +1,1059 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package utils
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"math"
+	"reflect"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+
+	"github.com/olekukonko/tablewriter"
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/parser/mysql"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"go.uber.org/zap"
+)
+
+// SecretString is a wrapper for sensitive strings like password,
+// which yields redacted string when being marshaled.
+type SecretString string
+
+func (s SecretString) MarshalJSON() ([]byte, error) {
+	return []byte(`"******"`), nil
+}
+
+func (s SecretString) String() string {
+	return "******"
+}
+
+// Plain unwraps the secret string.
+func (s SecretString) Plain() string {
+	return string(s)
+}
+
+// IsBlobType returns true if tp is Blob type
+func IsBlobType(tp byte) bool {
+	switch tp {
+	case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeBlob, mysql.TypeLongBlob:
+		return true
+	}
+
+	return false
+}
+
+// WorkerPool contains a pool of workers.
+// The number of workers in the channel represents how many goruntines
+// can be created to execute the task.
+// After the task is done, worker will be sent back to the channel.
+type WorkerPool struct {
+	limit   uint
+	workers chan *Worker
+	name    string
+	wg      sync.WaitGroup
+}
+
+// Worker identified by ID.
+type Worker struct {
+	ID uint64
+}
+
+type taskFunc func()
+
+// NewWorkerPool returns a WorkerPool with `limit` workers in the channel.
+func NewWorkerPool(limit uint, name string) *WorkerPool {
+	workers := make(chan *Worker, limit)
+	for i := uint(0); i < limit; i++ {
+		workers <- &Worker{ID: uint64(i + 1)}
+	}
+	return &WorkerPool{
+		limit:   limit,
+		workers: workers,
+		name:    name,
+	}
+}
+
+// Apply wait for an idle worker to run `taskFunc`.
+// Notice: function `Apply` and `WaitFinished` cannot be called in parallel
+func (pool *WorkerPool) Apply(fn taskFunc) {
+	worker := pool.apply()
+	pool.wg.Add(1)
+	go func() {
+		defer pool.wg.Done()
+		defer pool.recycle(worker)
+		fn()
+	}()
+}
+
+// apply waits for an idle worker from the channel and return it
+func (pool *WorkerPool) apply() *Worker {
+	var worker *Worker
+	select {
+	case worker = <-pool.workers:
+	default:
+		log.Debug("wait for workers", zap.String("pool", pool.name))
+		worker = <-pool.workers
+	}
+	return worker
+}
+
+// recycle sends an idle worker back to the channel
+func (pool *WorkerPool) recycle(worker *Worker) {
+	if worker == nil {
+		panic("invalid restore worker")
+	}
+	pool.workers <- worker
+}
+
+// HasWorker checks if the pool has unallocated workers.
+func (pool *WorkerPool) HasWorker() bool {
+	return len(pool.workers) > 0
+}
+
+// WaitFinished waits till the pool finishs all the tasks.
+func (pool *WorkerPool) WaitFinished() {
+	pool.wg.Wait()
+}
+
+// GetColumnsFromIndex returns `ColumnInfo`s of the specified index.
+func GetColumnsFromIndex(index *model.IndexInfo, tableInfo *model.TableInfo) []*model.ColumnInfo {
+	indexColumns := make([]*model.ColumnInfo, 0, len(index.Columns))
+	for _, indexColumn := range index.Columns {
+		indexColumns = append(indexColumns, tableInfo.Columns[indexColumn.Offset])
+	}
+
+	return indexColumns
+}
+
+// GetTableRowsQueryFormat returns a rowsQuerySQL template for the specific table.
+//
+//	e.g. SELECT /*!40001 SQL_NO_CACHE */ `a`, `b` FROM `schema`.`table` WHERE %s ORDER BY `a`.
+func GetTableRowsQueryFormat(schema, table string, tableInfo *model.TableInfo, collation string) (string, []*model.ColumnInfo) {
+	orderKeys, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo)
+
+	columnNames := make([]string, 0, len(tableInfo.Columns))
+	for _, col := range tableInfo.Columns {
+		if col.Hidden {
+			continue
+		}
+
+		name := dbutil.ColumnName(col.Name.O)
+		// When col value is 0, the result is NULL.
+		// But we can use ISNULL to distinguish between null and 0.
+		if col.FieldType.GetType() == mysql.TypeFloat {
+			name = fmt.Sprintf("round(%s, 5-floor(log10(abs(%s)))) as %s", name, name, name)
+		} else if col.FieldType.GetType() == mysql.TypeDouble {
+			name = fmt.Sprintf("round(%s, 14-floor(log10(abs(%s)))) as %s", name, name, name)
+		}
+		columnNames = append(columnNames, name)
+	}
+	columns := strings.Join(columnNames, ", ")
+	if collation != "" {
+		collation = fmt.Sprintf(" COLLATE '%s'", collation)
+	}
+
+	for i, key := range orderKeys {
+		orderKeys[i] = dbutil.ColumnName(key)
+	}
+
+	query := fmt.Sprintf("SELECT /*!40001 SQL_NO_CACHE */ %s FROM %s WHERE %%s ORDER BY %s%s",
+		columns, dbutil.TableName(schema, table), strings.Join(orderKeys, ","), collation)
+
+	return query, orderKeyCols
+}
+
+// GenerateReplaceDML returns the insert SQL for the specific row values.
+func GenerateReplaceDML(data map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string {
+	colNames := make([]string, 0, len(table.Columns))
+	values := make([]string, 0, len(table.Columns))
+	for _, col := range table.Columns {
+		if col.IsGenerated() {
+			continue
+		}
+
+		colNames = append(colNames, dbutil.ColumnName(col.Name.O))
+		if data[col.Name.O].IsNull {
+			values = append(values, "NULL")
+			continue
+		}
+
+		if NeedQuotes(col.FieldType.GetType()) {
+			if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) {
+				values = append(values, fmt.Sprintf("x'%x'", data[col.Name.O].Data))
+			} else {
+				values = append(values, fmt.Sprintf("'%s'", strings.Replace(string(data[col.Name.O].Data), "'", "\\'", -1)))
+			}
+		} else {
+			values = append(values, string(data[col.Name.O].Data))
+		}
+	}
+
+	return fmt.Sprintf("REPLACE INTO %s(%s) VALUES (%s);", dbutil.TableName(schema, table.Name.O), strings.Join(colNames, ","), strings.Join(values, ","))
+}
+
+// GerateReplaceDMLWithAnnotation returns the replace SQL for the specific 2 rows.
+// And add Annotations to show the different columns.
+func GenerateReplaceDMLWithAnnotation(source, target map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string {
+	sqlColNames := make([]string, 0, len(table.Columns))
+	sqlValues := make([]string, 0, len(table.Columns))
+	colNames := append(make([]string, 0, len(table.Columns)+1), "diff columns")
+	values1 := append(make([]string, 0, len(table.Columns)+1), "source data")
+	values2 := append(make([]string, 0, len(table.Columns)+1), "target data")
+	tableString := &strings.Builder{}
+	diffTable := tablewriter.NewWriter(tableString)
+	for _, col := range table.Columns {
+		if col.IsGenerated() {
+			continue
+		}
+
+		var data1, data2 *dbutil.ColumnData
+		var value1 string
+		data1 = source[col.Name.O]
+		data2 = target[col.Name.O]
+
+		if data1.IsNull {
+			value1 = "NULL"
+		} else {
+			if NeedQuotes(col.FieldType.GetType()) {
+				if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) {
+					value1 = fmt.Sprintf("x'%x'", data1.Data)
+				} else {
+					value1 = fmt.Sprintf("'%s'", strings.Replace(string(data1.Data), "'", "\\'", -1))
+				}
+			} else {
+				value1 = string(data1.Data)
+			}
+		}
+		colName := dbutil.ColumnName(col.Name.O)
+		sqlColNames = append(sqlColNames, colName)
+		sqlValues = append(sqlValues, value1)
+
+		// Only show different columns in annotations.
+		if (string(data1.Data) == string(data2.Data)) && (data1.IsNull == data2.IsNull) {
+			continue
+		}
+
+		colNames = append(colNames, colName)
+		values1 = append(values1, value1)
+
+		if data2.IsNull {
+			values2 = append(values2, "NULL")
+		} else {
+			if NeedQuotes(col.FieldType.GetType()) {
+				if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) {
+					values2 = append(values2, fmt.Sprintf("x'%x'", data1.Data))
+				} else {
+					values2 = append(values2, fmt.Sprintf("'%s'", strings.Replace(string(data2.Data), "'", "\\'", -1)))
+				}
+			} else {
+				values2 = append(values2, string(data2.Data))
+			}
+		}
+
+	}
+
+	diffTable.SetRowLine(true)
+	diffTable.SetHeader(colNames)
+	diffTable.Append(values1)
+	diffTable.Append(values2)
+	diffTable.SetCenterSeparator("╋")
+	diffTable.SetColumnSeparator("╏")
+	diffTable.SetRowSeparator("╍")
+	diffTable.SetAlignment(tablewriter.ALIGN_LEFT)
+	diffTable.SetBorder(false)
+	diffTable.Render()
+
+	return fmt.Sprintf("/*\n%s*/\nREPLACE INTO %s(%s) VALUES (%s);", tableString.String(), dbutil.TableName(schema, table.Name.O), strings.Join(sqlColNames, ","), strings.Join(sqlValues, ","))
+}
+
+// GerateReplaceDMLWithAnnotation returns the delete SQL for the specific row.
+func GenerateDeleteDML(data map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string {
+	kvs := make([]string, 0, len(table.Columns))
+	for _, col := range table.Columns {
+		if col.IsGenerated() {
+			continue
+		}
+
+		if data[col.Name.O].IsNull {
+			kvs = append(kvs, fmt.Sprintf("%s is NULL", dbutil.ColumnName(col.Name.O)))
+			continue
+		}
+
+		if NeedQuotes(col.FieldType.GetType()) {
+			if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) {
+				kvs = append(kvs, fmt.Sprintf("%s = x'%x'", dbutil.ColumnName(col.Name.O), data[col.Name.O].Data))
+			} else {
+				kvs = append(kvs, fmt.Sprintf("%s = '%s'", dbutil.ColumnName(col.Name.O), strings.Replace(string(data[col.Name.O].Data), "'", "\\'", -1)))
+			}
+		} else {
+			kvs = append(kvs, fmt.Sprintf("%s = %s", dbutil.ColumnName(col.Name.O), string(data[col.Name.O].Data)))
+		}
+	}
+	return fmt.Sprintf("DELETE FROM %s WHERE %s LIMIT 1;", dbutil.TableName(schema, table.Name.O), strings.Join(kvs, " AND "))
+
+}
+
+// isCompatible checks whether 2 column types are compatible.
+// e.g. char and vachar.
+func isCompatible(tp1, tp2 byte) bool {
+	if tp1 == tp2 {
+		return true
+	}
+
+	log.Warn("column type different, check compatibility.")
+	var t1, t2 int
+	switch tp1 {
+	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24:
+		t1 = 1
+	case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
+		t1 = 2
+	case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar:
+		t1 = 3
+	default:
+		return false
+	}
+
+	switch tp2 {
+	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24:
+		t2 = 1
+	case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
+		t2 = 2
+	case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar:
+		t2 = 3
+	default:
+		return false
+	}
+
+	return t1 == t2
+}
+
+func sameProperties(c1, c2 *model.ColumnInfo) bool {
+	switch c1.GetType() {
+	case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar:
+		if c1.FieldType.GetCharset() != c2.FieldType.GetCharset() {
+			log.Warn("Ignoring character set differences",
+				zap.String("column name", c1.Name.O),
+				zap.String("charset source", c1.FieldType.GetCharset()),
+				zap.String("charset target", c2.FieldType.GetCharset()),
+			)
+		}
+		if c1.FieldType.GetCollate() != c2.FieldType.GetCollate() {
+			log.Warn("Ignoring collation differences",
+				zap.String("column name", c1.Name.O),
+				zap.String("collation source", c1.FieldType.GetCollate()),
+				zap.String("collation target", c2.FieldType.GetCollate()),
+			)
+		}
+		return c1.FieldType.GetFlen() == c2.FieldType.GetFlen()
+	default:
+		return true
+	}
+}
+
+// CompareStruct compare tables' columns and indices from upstream and downstream.
+// There are 2 return values:
+//
+//	isEqual	: result of comparing tables' columns and indices
+//	isPanic	: the differences of tables' struct can not be ignored. Need to skip data comparing.
+func CompareStruct(upstreamTableInfos []*model.TableInfo, downstreamTableInfo *model.TableInfo) (isEqual bool, isPanic bool) {
+	// compare columns
+	for _, upstreamTableInfo := range upstreamTableInfos {
+		if len(upstreamTableInfo.Columns) != len(downstreamTableInfo.Columns) {
+			// the numbers of each columns are different, don't compare data
+			log.Error("column num not equal",
+				zap.String("upstream table", upstreamTableInfo.Name.O),
+				zap.Int("column num", len(upstreamTableInfo.Columns)),
+				zap.String("downstream table", downstreamTableInfo.Name.O),
+				zap.Int("column num", len(downstreamTableInfo.Columns)),
+			)
+			return false, true
+		}
+
+		for i, column := range upstreamTableInfo.Columns {
+			if column.Name.O != downstreamTableInfo.Columns[i].Name.O {
+				// names are different, panic!
+				log.Error("column name not equal",
+					zap.String("upstream table", upstreamTableInfo.Name.O),
+					zap.String("column name", column.Name.O),
+					zap.String("downstream table", downstreamTableInfo.Name.O),
+					zap.String("column name", downstreamTableInfo.Columns[i].Name.O),
+				)
+				return false, true
+			}
+
+			if !isCompatible(column.GetType(), downstreamTableInfo.Columns[i].GetType()) {
+				// column types are different, panic!
+				log.Error("column type not compatible",
+					zap.String("upstream table", upstreamTableInfo.Name.O),
+					zap.String("column name", column.Name.O),
+					zap.Uint8("column type", column.GetType()),
+					zap.String("downstream table", downstreamTableInfo.Name.O),
+					zap.String("column name", downstreamTableInfo.Columns[i].Name.O),
+					zap.Uint8("column type", downstreamTableInfo.Columns[i].GetType()),
+				)
+				return false, true
+			}
+
+			if !sameProperties(column, downstreamTableInfo.Columns[i]) {
+				// column properties are different, panic!
+				log.Error("column properties not compatible",
+					zap.String("upstream table", upstreamTableInfo.Name.O),
+					zap.String("column name", column.Name.O),
+					zap.Uint8("column type", column.GetType()),
+					zap.String("downstream table", downstreamTableInfo.Name.O),
+					zap.String("column name", downstreamTableInfo.Columns[i].Name.O),
+					zap.Uint8("column type", downstreamTableInfo.Columns[i].GetType()),
+				)
+				return false, true
+			}
+		}
+	}
+
+	// compare indices
+	deleteIndicesSet := make(map[string]struct{})
+	unilateralIndicesSet := make(map[string]struct{})
+	downstreamIndicesMap := make(map[string]*struct {
+		index *model.IndexInfo
+		cnt   int
+	})
+	for _, index := range downstreamTableInfo.Indices {
+		downstreamIndicesMap[index.Name.O] = &struct {
+			index *model.IndexInfo
+			cnt   int
+		}{index, 0}
+	}
+	for _, upstreamTableInfo := range upstreamTableInfos {
+
+	NextIndex:
+		for _, upstreamIndex := range upstreamTableInfo.Indices {
+			if _, ok := deleteIndicesSet[upstreamIndex.Name.O]; ok {
+				continue NextIndex
+			}
+
+			indexU, ok := downstreamIndicesMap[upstreamIndex.Name.O]
+			if ok {
+				if len(indexU.index.Columns) != len(upstreamIndex.Columns) {
+					// different index, should be removed
+					deleteIndicesSet[upstreamIndex.Name.O] = struct{}{}
+					continue NextIndex
+				}
+
+				for i, indexColumn := range upstreamIndex.Columns {
+					if indexColumn.Offset != indexU.index.Columns[i].Offset || indexColumn.Name.O != indexU.index.Columns[i].Name.O {
+						// different index, should be removed
+						deleteIndicesSet[upstreamIndex.Name.O] = struct{}{}
+						continue NextIndex
+					}
+				}
+				indexU.cnt = indexU.cnt + 1
+			} else {
+				unilateralIndicesSet[upstreamIndex.Name.O] = struct{}{}
+			}
+		}
+	}
+
+	existBilateralIndex := false
+	for _, indexU := range downstreamIndicesMap {
+		if _, ok := deleteIndicesSet[indexU.index.Name.O]; ok {
+			continue
+		}
+		if indexU.cnt < len(upstreamTableInfos) {
+			// Some upstreamInfos don't have this index.
+			unilateralIndicesSet[indexU.index.Name.O] = struct{}{}
+		} else {
+			// there is an index the whole tables have,
+			// so unilateral indices can be deleted.
+			existBilateralIndex = true
+		}
+	}
+
+	// delete indices
+	// If there exist bilateral index, unilateral indices can be deleted.
+	if existBilateralIndex {
+		for indexName := range unilateralIndicesSet {
+			deleteIndicesSet[indexName] = struct{}{}
+		}
+	} else {
+		log.Warn("no index exists in both upstream and downstream", zap.String("table", downstreamTableInfo.Name.O))
+	}
+	if len(deleteIndicesSet) > 0 {
+		newDownstreamIndices := make([]*model.IndexInfo, 0, len(downstreamTableInfo.Indices))
+		for _, index := range downstreamTableInfo.Indices {
+			if _, ok := deleteIndicesSet[index.Name.O]; !ok {
+				newDownstreamIndices = append(newDownstreamIndices, index)
+			} else {
+				log.Debug("delete downstream index", zap.String("name", downstreamTableInfo.Name.O), zap.String("index", index.Name.O))
+			}
+		}
+		downstreamTableInfo.Indices = newDownstreamIndices
+
+		for _, upstreamTableInfo := range upstreamTableInfos {
+			newUpstreamIndices := make([]*model.IndexInfo, 0, len(upstreamTableInfo.Indices))
+			for _, index := range upstreamTableInfo.Indices {
+				if _, ok := deleteIndicesSet[index.Name.O]; !ok {
+					newUpstreamIndices = append(newUpstreamIndices, index)
+				} else {
+					log.Debug("delete upstream index", zap.String("name", upstreamTableInfo.Name.O), zap.String("index", index.Name.O))
+				}
+			}
+			upstreamTableInfo.Indices = newUpstreamIndices
+		}
+
+	}
+
+	return len(deleteIndicesSet) == 0, false
+}
+
+// NeedQuotes determines whether an escape character is required for `'`.
+func NeedQuotes(tp byte) bool {
+	return !(dbutil.IsNumberType(tp) || dbutil.IsFloatType(tp))
+}
+
+// CompareData compare two row datas.
+// equal = true: map1 = map2
+// equal = false:
+//  1. cmp = 0: map1 and map2 have the same orderkeycolumns, but other columns are in difference.
+//  2. cmp = -1: map1 < map2 (by comparing the orderkeycolumns)
+//  3. cmp = 1: map1 > map2
+func CompareData(map1, map2 map[string]*dbutil.ColumnData, orderKeyCols, columns []*model.ColumnInfo) (equal bool, cmp int32, err error) {
+	var (
+		data1, data2 *dbutil.ColumnData
+		str1, str2   string
+		key          string
+		ok           bool
+	)
+
+	equal = true
+
+	defer func() {
+		if equal || err != nil {
+			return
+		}
+
+		if cmp == 0 {
+			log.Warn("find different row", zap.String("column", key), zap.String("row1", rowToString(map1)), zap.String("row2", rowToString(map2)))
+		} else if cmp > 0 {
+			log.Warn("target had superfluous data", zap.String("row", rowToString(map2)))
+		} else {
+			log.Warn("target lack data", zap.String("row", rowToString(map1)))
+		}
+	}()
+
+	for _, column := range columns {
+		if data1, ok = map1[column.Name.O]; !ok {
+			return false, 0, errors.Errorf("upstream don't have key %s", column.Name.O)
+		}
+		if data2, ok = map2[column.Name.O]; !ok {
+			return false, 0, errors.Errorf("downstream don't have key %s", column.Name.O)
+		}
+		str1 = string(data1.Data)
+		str2 = string(data2.Data)
+		if column.FieldType.GetType() == mysql.TypeFloat || column.FieldType.GetType() == mysql.TypeDouble {
+			if data1.IsNull && data2.IsNull {
+				continue
+			} else if !data1.IsNull && !data2.IsNull {
+				num1, err1 := strconv.ParseFloat(str1, 64)
+				num2, err2 := strconv.ParseFloat(str2, 64)
+				if err1 != nil || err2 != nil {
+					err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", str1, str2, err1, err2)
+					return
+				}
+				if math.Abs(num1-num2) <= 1e-6 {
+					continue
+				}
+			}
+		} else if column.FieldType.GetType() == mysql.TypeJSON {
+			if (str1 == str2) || (data1.IsNull && data2.IsNull) {
+				continue
+			}
+			if !data1.IsNull && !data2.IsNull {
+				var v1, v2 any
+				err := json.Unmarshal(data1.Data, &v1)
+				if err != nil {
+					return false, 0, errors.Errorf("unmarshal json %s failed, error %v", str1, err)
+				}
+				err = json.Unmarshal(data2.Data, &v2)
+				if err != nil {
+					return false, 0, errors.Errorf("unmarshal json %s failed, error %v", str2, err)
+				}
+				if reflect.DeepEqual(v1, v2) {
+					continue
+				}
+			}
+		} else {
+			if (str1 == str2) && (data1.IsNull == data2.IsNull) {
+				continue
+			}
+		}
+
+		equal = false
+		key = column.Name.O
+		break
+
+	}
+	if equal {
+		return
+	}
+
+	// Not Equal. Compare orderkeycolumns.
+	for _, col := range orderKeyCols {
+		if data1, ok = map1[col.Name.O]; !ok {
+			err = errors.Errorf("don't have key %s", col.Name.O)
+			return
+		}
+		if data2, ok = map2[col.Name.O]; !ok {
+			err = errors.Errorf("don't have key %s", col.Name.O)
+			return
+		}
+
+		if NeedQuotes(col.FieldType.GetType()) {
+			strData1 := string(data1.Data)
+			strData2 := string(data2.Data)
+
+			if len(strData1) == len(strData2) && strData1 == strData2 {
+				continue
+			}
+
+			if strData1 < strData2 {
+				cmp = -1
+			} else {
+				cmp = 1
+			}
+			break
+		} else if data1.IsNull || data2.IsNull {
+			if data1.IsNull && data2.IsNull {
+				continue
+			}
+
+			if data1.IsNull {
+				cmp = -1
+			} else {
+				cmp = 1
+			}
+			break
+		} else {
+			num1, err1 := strconv.ParseFloat(string(data1.Data), 64)
+			num2, err2 := strconv.ParseFloat(string(data2.Data), 64)
+			if err1 != nil || err2 != nil {
+				err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", string(data1.Data), string(data2.Data), err1, err2)
+				return
+			}
+
+			if num1 == num2 {
+				continue
+			}
+
+			if num1 < num2 {
+				cmp = -1
+			} else {
+				cmp = 1
+			}
+			break
+		}
+	}
+
+	return
+}
+
+// rowtoString covert rowData to String
+func rowToString(row map[string]*dbutil.ColumnData) string {
+	var s strings.Builder
+	s.WriteString("{ ")
+	for key, val := range row {
+		if val.IsNull {
+			s.WriteString(fmt.Sprintf("%s: IsNull, ", key))
+		} else {
+			s.WriteString(fmt.Sprintf("%s: %s, ", key, val.Data))
+		}
+	}
+	s.WriteString(" }")
+
+	return s.String()
+}
+
+// MinLenInSlices returns the smallest length among slices.
+func MinLenInSlices(slices [][]string) int {
+	min := 0
+	for i, slice := range slices {
+		if i == 0 || len(slice) < min {
+			min = len(slice)
+		}
+	}
+
+	return min
+}
+
+// SliceToMap converts Slice to Set
+func SliceToMap(slice []string) map[string]interface{} {
+	sMap := make(map[string]interface{})
+	for _, str := range slice {
+		sMap[str] = struct{}{}
+	}
+	return sMap
+}
+
+// GetApproximateMidBySize return the `count`th row in rows that meet the `limitRange`.
+func GetApproximateMidBySize(ctx context.Context, db *sql.DB, schema, table string, indexColumns []*model.ColumnInfo, limitRange string, args []interface{}, count int64) (map[string]string, error) {
+	/*
+		example
+		mysql> select i_id, i_im_id, i_name from item where i_id > 0 order by i_id, i_im_id, i_name collate limit 5000,1;
+		+------+---------+-----------------+
+		| i_id | i_im_id | i_name          |
+		+------+---------+-----------------+
+		| 5001 |    3494 | S66WiWB3t1FUG02 |
+		+------+---------+-----------------+
+		1 row in set (0.09 sec)
+	*/
+	columnNames := make([]string, 0, len(indexColumns))
+	for _, col := range indexColumns {
+		columnNames = append(columnNames, dbutil.ColumnName(col.Name.O))
+	}
+
+	// Note: add collation after order by will largely reduce the speed.
+	query := fmt.Sprintf("SELECT %s FROM %s WHERE %s ORDER BY %s LIMIT 1 OFFSET %d",
+		strings.Join(columnNames, ", "),
+		dbutil.TableName(schema, table),
+		limitRange,
+		strings.Join(columnNames, ", "),
+		count/2)
+	log.Debug("get mid by size", zap.String("sql", query), zap.Reflect("args", args))
+	rows, err := db.QueryContext(ctx, query, args...)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	defer rows.Close()
+	columns := make([]interface{}, len(indexColumns))
+	for i := range columns {
+		columns[i] = new(string)
+	}
+	if !rows.Next() {
+		if err := rows.Err(); err != nil {
+			return nil, errors.Trace(err)
+		}
+		log.Error("there is no row in result set")
+		return nil, nil
+	}
+	err = rows.Scan(columns...)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	columnValues := make(map[string]string)
+	for i, column := range columns {
+		columnValues[indexColumns[i].Name.O] = *column.(*string)
+	}
+	return columnValues, nil
+}
+
+// GetTableSize loads the TableSize from `information_schema`.`tables`.
+func GetTableSize(ctx context.Context, db *sql.DB, schemaName, tableName string) (int64, error) {
+	query := "select sum(data_length) as data from `information_schema`.`tables` where table_schema=? and table_name=? GROUP BY data_length;"
+	var dataSize sql.NullInt64
+	err := db.QueryRowContext(ctx, query, schemaName, tableName).Scan(&dataSize)
+	if err != nil {
+		return int64(0), errors.Trace(err)
+	}
+	return dataSize.Int64, nil
+}
+
+// GetCountAndMd5Checksum returns checksum code and count of some data by given condition
+func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, args []interface{}) (int64, uint64, error) {
+	/*
+		calculate MD5 checksum and count example:
+		mysql> SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `a`.`t`;
+		+--------+----------------------
+		|  CNT   | CHECKSUM            |
+		+--------+----------------------
+		| 100000 | 3462532621352132810 |
+		+--------+----------------------
+		1 row in set (0.46 sec)
+	*/
+	columnNames := make([]string, 0, len(tbInfo.Columns))
+	columnIsNull := make([]string, 0, len(tbInfo.Columns))
+	log.Debug("table columns", zap.Any("columns", tbInfo.Columns))
+	for _, col := range tbInfo.Columns {
+		if col.Hidden {
+			continue
+		}
+		name := dbutil.ColumnName(col.Name.O)
+		// When col value is 0, the result is NULL.
+		// But we can use ISNULL to distinguish between null and 0.
+		if col.FieldType.GetType() == mysql.TypeFloat {
+			name = fmt.Sprintf("round(%s, 5-floor(log10(abs(%s))))", name, name)
+		} else if col.FieldType.GetType() == mysql.TypeDouble {
+			name = fmt.Sprintf("round(%s, 14-floor(log10(abs(%s))))", name, name)
+		}
+		columnNames = append(columnNames, name)
+		columnIsNull = append(columnIsNull, fmt.Sprintf("ISNULL(%s)", name))
+	}
+
+	query := fmt.Sprintf("SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', %s, CONCAT(%s))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', %s, CONCAT(%s))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM %s WHERE %s;",
+		strings.Join(columnNames, ", "), strings.Join(columnIsNull, ", "), strings.Join(columnNames, ", "), strings.Join(columnIsNull, ", "), dbutil.TableName(schemaName, tableName), limitRange)
+	log.Debug("count and checksum", zap.String("sql", query), zap.Reflect("args", args))
+
+	var count sql.NullInt64
+	var checksum uint64
+	err := db.QueryRowContext(ctx, query, args...).Scan(&count, &checksum)
+	if err != nil {
+		log.Warn("execute checksum query fail", zap.String("query", query), zap.Reflect("args", args), zap.Error(err))
+		return -1, 0, errors.Trace(err)
+	}
+	if !count.Valid {
+		// if don't have any data, the checksum will be `NULL`
+		log.Warn("get empty count", zap.String("sql", query), zap.Reflect("args", args))
+		return 0, 0, nil
+	}
+	return count.Int64, checksum, nil
+}
+
+// GetRandomValues returns some random values. Different from /pkg/dbutil.GetRandomValues, it returns multi-columns at the same time.
+func GetRandomValues(ctx context.Context, db *sql.DB, schema, table string, columns []*model.ColumnInfo, num int, limitRange string, limitArgs []interface{}, collation string) ([][]string, error) {
+	/*
+		example: there is one index consists of `id`, `a`, `b`.
+		mysql> SELECT `id`, `a`, `b` FROM (SELECT `id`, `a`, `b`, rand() rand_value FROM `test`.`test`  WHERE `id` COLLATE "latin1_bin" > 0 AND `id` COLLATE "latin1_bin" < 100 ORDER BY rand_value LIMIT 5) rand_tmp ORDER BY `id` COLLATE "latin1_bin";
+		+------+------+------+
+		| id   | a    | b    |
+		+------+------+------+
+		|    1 |    2 |    3 |
+		|    2 |    3 |    4 |
+		|    3 |    4 |    5 |
+		+------+------+------+
+	*/
+
+	if limitRange == "" {
+		limitRange = "TRUE"
+	}
+
+	if collation != "" {
+		collation = fmt.Sprintf(" COLLATE '%s'", collation)
+	}
+
+	columnNames := make([]string, 0, len(columns))
+	for _, col := range columns {
+		columnNames = append(columnNames, dbutil.ColumnName(col.Name.O))
+	}
+
+	query := fmt.Sprintf("SELECT %[1]s FROM (SELECT %[1]s, rand() rand_value FROM %[2]s WHERE %[3]s ORDER BY rand_value LIMIT %[4]d)rand_tmp ORDER BY %[1]s%[5]s",
+		strings.Join(columnNames, ", "), dbutil.TableName(schema, table), limitRange, num, collation)
+	log.Debug("get random values", zap.String("sql", query), zap.Reflect("args", limitArgs))
+
+	rows, err := db.QueryContext(ctx, query, limitArgs...)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	defer rows.Close()
+
+	randomValues := make([][]string, 0, num)
+NEXTROW:
+	for rows.Next() {
+		colVals := make([][]byte, len(columns))
+		colValsI := make([]interface{}, len(colVals))
+		for i := range colValsI {
+			colValsI[i] = &colVals[i]
+		}
+		err = rows.Scan(colValsI...)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+
+		randomValue := make([]string, len(columns))
+
+		for i, col := range colVals {
+			if col == nil {
+				continue NEXTROW
+			}
+			randomValue[i] = string(col)
+		}
+		randomValues = append(randomValues, randomValue)
+	}
+
+	return randomValues, errors.Trace(rows.Err())
+}
+
+// ResetColumns removes index from `tableInfo.Indices`, whose columns appear in `columns`.
+// And removes column from `tableInfo.Columns`, which appears in `columns`.
+// And initializes the offset of the column of each index to new `tableInfo.Columns`.
+//
+// Return the new tableInfo and the flag whether the columns have timestamp type.
+func ResetColumns(tableInfo *model.TableInfo, columns []string) (*model.TableInfo, bool) {
+	// Although columns is empty, need to initialize indices' offset mapping to column.
+
+	hasTimeStampType := false
+	// Remove all index from `tableInfo.Indices`, whose columns are involved of any column in `columns`.
+	removeColMap := SliceToMap(columns)
+	for i := 0; i < len(tableInfo.Indices); i++ {
+		index := tableInfo.Indices[i]
+		for j := 0; j < len(index.Columns); j++ {
+			col := index.Columns[j]
+			if _, ok := removeColMap[col.Name.O]; ok {
+				tableInfo.Indices = append(tableInfo.Indices[:i], tableInfo.Indices[i+1:]...)
+				i--
+				break
+			}
+		}
+	}
+
+	// Remove column from `tableInfo.Columns`, which appears in `columns`.
+	for j := 0; j < len(tableInfo.Columns); j++ {
+		col := tableInfo.Columns[j]
+		if _, ok := removeColMap[col.Name.O]; ok {
+			tableInfo.Columns = append(tableInfo.Columns[:j], tableInfo.Columns[j+1:]...)
+			j--
+		}
+	}
+
+	// calculate column offset
+	colMap := make(map[string]int, len(tableInfo.Columns))
+	for i, col := range tableInfo.Columns {
+		col.Offset = i
+		colMap[col.Name.O] = i
+		hasTimeStampType = hasTimeStampType || (col.FieldType.GetType() == mysql.TypeTimestamp)
+	}
+
+	// Initialize the offset of the column of each index to new `tableInfo.Columns`.
+	for _, index := range tableInfo.Indices {
+		for _, col := range index.Columns {
+			offset, ok := colMap[col.Name.O]
+			if !ok {
+				// this should never happened
+				log.Fatal("column not exists", zap.String("column", col.Name.O))
+			}
+			col.Offset = offset
+		}
+	}
+
+	return tableInfo, hasTimeStampType
+}
+
+// UniqueID returns `schema`.`table`
+func UniqueID(schema string, table string) string {
+	// QuoteSchema quotes a full table name
+	return fmt.Sprintf("`%s`.`%s`", EscapeName(schema), EscapeName(table))
+}
+
+// EscapeName replaces all "`" in name with "“"
+func EscapeName(name string) string {
+	return strings.Replace(name, "`", "``", -1)
+}
+
+// GetBetterIndex returns the index more dinstict.
+// If the index is primary key or unique, it can be return directly.
+// Otherwise select the index which has higher value of `COUNT(DISTINCT a)/COUNT(*)`.
+func GetBetterIndex(ctx context.Context, db *sql.DB, schema, table string, tableInfo *model.TableInfo) ([]*model.IndexInfo, error) {
+	// SELECT COUNT(DISTINCT city)/COUNT(*) FROM `schema`.`table`;
+	indices := dbutil.FindAllIndex(tableInfo)
+	for _, index := range indices {
+		if index.Primary || index.Unique {
+			return []*model.IndexInfo{index}, nil
+		}
+	}
+	sels := make([]float64, len(indices))
+	for _, index := range indices {
+		column := GetColumnsFromIndex(index, tableInfo)[0]
+		selectivity, err := GetSelectivity(ctx, db, schema, table, column.Name.O, tableInfo)
+		if err != nil {
+			return indices, errors.Trace(err)
+		}
+		log.Debug("index selectivity", zap.String("table", dbutil.TableName(schema, table)), zap.Float64("selectivity", selectivity))
+		sels = append(sels, selectivity)
+	}
+	sort.Slice(indices, func(i, j int) bool {
+		return sels[i] > sels[j]
+	})
+	return indices, nil
+}
+
+// GetSelectivity returns the value of `COUNT(DISTINCT col)/COUNT(1)` SQL.
+func GetSelectivity(ctx context.Context, db *sql.DB, schemaName, tableName, columnName string, tbInfo *model.TableInfo) (float64, error) {
+	query := fmt.Sprintf("SELECT COUNT(DISTINCT %s)/COUNT(1) as SEL FROM %s;", dbutil.ColumnName(columnName), dbutil.TableName(schemaName, tableName))
+	var selectivity sql.NullFloat64
+	args := []interface{}{}
+	err := db.QueryRowContext(ctx, query, args...).Scan(&selectivity)
+	if err != nil {
+		log.Warn("execute get selectivity query fail", zap.String("query", query))
+		return 0.0, errors.Trace(err)
+	}
+	if !selectivity.Valid {
+		// if don't have any data, the checksum will be `NULL`
+		log.Warn("get empty count or checksum", zap.String("sql", query))
+		return 0.0, nil
+	}
+	return selectivity.Float64, nil
+}
+
+// CalculateChunkSize returns chunkSize according to table rows count.
+func CalculateChunkSize(rowCount int64) int64 {
+	// we assume chunkSize is 50000 for any cluster.
+	chunkSize := int64(50000)
+	if rowCount > int64(chunkSize)*10000 {
+		// we assume we only need 10k chunks for any table.
+		chunkSize = rowCount / 10000
+	}
+	return chunkSize
+}
+
+// AnalyzeTable do 'ANALYZE TABLE `table`' SQL.
+func AnalyzeTable(ctx context.Context, db *sql.DB, tableName string) error {
+	_, err := db.ExecContext(ctx, "ANALYZE TABLE "+tableName)
+	return err
+}
+
+// GetSQLFileName returns filename of fix-SQL identified by chunk's `Index`.
+func GetSQLFileName(index *chunk.ChunkID) string {
+	return fmt.Sprintf("%d:%d-%d:%d", index.TableIndex, index.BucketIndexLeft, index.BucketIndexRight, index.ChunkIndex)
+}
+
+// GetChunkIDFromSQLFileName convert the filename to chunk's `Index`.
+func GetChunkIDFromSQLFileName(fileIDStr string) (int, int, int, int, error) {
+	ids := strings.Split(fileIDStr, ":")
+	tableIndex, err := strconv.Atoi(ids[0])
+	if err != nil {
+		return 0, 0, 0, 0, errors.Trace(err)
+	}
+	bucketIndex := strings.Split(ids[1], "-")
+	bucketIndexLeft, err := strconv.Atoi(bucketIndex[0])
+	if err != nil {
+		return 0, 0, 0, 0, errors.Trace(err)
+	}
+	bucketIndexRight, err := strconv.Atoi(bucketIndex[1])
+	if err != nil {
+		return 0, 0, 0, 0, errors.Trace(err)
+	}
+	chunkIndex, err := strconv.Atoi(ids[2])
+	if err != nil {
+		return 0, 0, 0, 0, errors.Trace(err)
+	}
+	return tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, nil
+}
+
+// IsRangeTrivial checks if a user configured Range is empty or `TRUE`.
+func IsRangeTrivial(rangeCond string) bool {
+	if rangeCond == "" {
+		return true
+	}
+	return strings.ToLower(rangeCond) == "true"
+}
+
+func IsBinaryColumn(col *model.ColumnInfo) bool {
+	// varbinary or binary
+	return (col.GetType() == mysql.TypeVarchar || col.GetType() == mysql.TypeString) && mysql.HasBinaryFlag(col.GetFlag())
+}
diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go
new file mode 100644
index 00000000000..969add2c0f6
--- /dev/null
+++ b/sync_diff_inspector/utils/utils_test.go
@@ -0,0 +1,688 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package utils
+
+import (
+	"context"
+	"database/sql/driver"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/pingcap/tidb/pkg/parser"
+	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
+	"github.com/stretchr/testify/require"
+)
+
+type tableCaseType struct {
+	schema         string
+	table          string
+	createTableSQL string
+	rowColumns     []string
+	rows           [][]driver.Value
+	indices        []string
+	sels           []float64
+	selected       string
+}
+
+func TestWorkerPool(t *testing.T) {
+	pool := NewWorkerPool(2, "test")
+	infoCh := make(chan uint64)
+	doneCh := make(chan struct{})
+	var v uint64 = 0
+	pool.Apply(func() {
+		infoCh <- 2
+	})
+	pool.Apply(func() {
+		new_v := <-infoCh
+		v = new_v
+		doneCh <- struct{}{}
+	})
+	<-doneCh
+	require.Equal(t, v, uint64(2))
+	require.True(t, pool.HasWorker())
+	pool.WaitFinished()
+}
+
+func TestStringsToInterface(t *testing.T) {
+	res := []interface{}{"1", "2", "3"}
+	require.Equal(t, res[0], "1")
+	require.Equal(t, res[1], "2")
+	require.Equal(t, res[2], "3")
+
+	require.Equal(t, MinLenInSlices([][]string{{"123", "324", "r32"}, {"32", "23"}}), 2)
+
+	expectSlice := []string{"2", "3", "4"}
+	sliceMap := SliceToMap(expectSlice)
+	for _, expect := range expectSlice {
+		_, ok := sliceMap[expect]
+		require.True(t, ok)
+	}
+	require.Equal(t, len(sliceMap), len(expectSlice))
+
+	require.Equal(t, UniqueID("123", "456"), "`123`.`456`")
+
+}
+
+func TestBasicTableUtilOperation(t *testing.T) {
+	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	query, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123")
+	require.Equal(t, query, "SELECT /*!40001 SQL_NO_CACHE */ `a`, `b`, round(`c`, 5-floor(log10(abs(`c`)))) as `c`, `d` FROM `test`.`test` WHERE %s ORDER BY `a`,`b` COLLATE '123'")
+	expectName := []string{"a", "b"}
+	for i, col := range orderKeyCols {
+		require.Equal(t, col.Name.O, expectName[i])
+	}
+
+	data1 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte("a"), IsNull: false},
+		"c": {Data: []byte("1.22"), IsNull: false},
+		"d": {Data: []byte("sdf"), IsNull: false},
+	}
+	data2 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte("b"), IsNull: false},
+		"c": {Data: []byte("2.22"), IsNull: false},
+		"d": {Data: []byte("sdf"), IsNull: false},
+	}
+	data3 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("2"), IsNull: false},
+		"b": {Data: []byte("a"), IsNull: false},
+		"c": {Data: []byte("0.22"), IsNull: false},
+		"d": {Data: []byte("asdf"), IsNull: false},
+	}
+	data4 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte("a"), IsNull: true},
+		"c": {Data: []byte("0.221"), IsNull: false},
+		"d": {Data: []byte("asdf"), IsNull: false},
+	}
+	data5 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("2"), IsNull: false},
+		"b": {Data: []byte("a"), IsNull: true},
+		"c": {Data: []byte("0.222"), IsNull: false},
+		"d": {Data: []byte("asdf"), IsNull: false},
+	}
+	data6 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: true},
+		"b": {Data: []byte("a"), IsNull: false},
+		"c": {Data: []byte("0.2221"), IsNull: false},
+		"d": {Data: []byte("asdf"), IsNull: false},
+	}
+	data7 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: true},
+		"b": {Data: []byte("a"), IsNull: false},
+		"c": {Data: []byte("0.2221"), IsNull: false},
+		"d": {Data: []byte("asdf"), IsNull: false},
+	}
+	data8 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte("a"), IsNull: false},
+		"c": {Data: []byte(""), IsNull: true},
+		"d": {Data: []byte("sdf"), IsNull: false},
+	}
+	data9 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte("a"), IsNull: false},
+		"c": {Data: []byte("0"), IsNull: false},
+		"d": {Data: []byte("sdf"), IsNull: false},
+	}
+
+	columns := tableInfo.Columns
+
+	require.Equal(t, GenerateReplaceDML(data1, tableInfo, "schema"), "REPLACE INTO `schema`.`test`(`a`,`b`,`c`,`d`) VALUES (1,'a',1.22,'sdf');")
+	require.Equal(t, GenerateDeleteDML(data8, tableInfo, "schema"), "DELETE FROM `schema`.`test` WHERE `a` = 1 AND `b` = 'a' AND `c` is NULL AND `d` = 'sdf' LIMIT 1;")
+	require.Equal(t, GenerateDeleteDML(data9, tableInfo, "schema"), "DELETE FROM `schema`.`test` WHERE `a` = 1 AND `b` = 'a' AND `c` = 0 AND `d` = 'sdf' LIMIT 1;")
+	require.Equal(t, GenerateReplaceDMLWithAnnotation(data1, data2, tableInfo, "schema"),
+		"/*\n"+
+			"  DIFF COLUMNS ╏ `B` ╏ `C`   \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍\n"+
+			"  source data  ╏ 'a' ╏ 1.22  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍\n"+
+			"  target data  ╏ 'b' ╏ 2.22  \n"+
+			"╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍\n"+
+			"*/\n"+
+			"REPLACE INTO `schema`.`test`(`a`,`b`,`c`,`d`) VALUES (1,'a',1.22,'sdf');")
+	require.Equal(t, GenerateDeleteDML(data1, tableInfo, "schema"), "DELETE FROM `schema`.`test` WHERE `a` = 1 AND `b` = 'a' AND `c` = 1.22 AND `d` = 'sdf' LIMIT 1;")
+
+	// same
+	equal, cmp, err := CompareData(data1, data1, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.True(t, equal)
+
+	// orderkey same but other column different
+	equal, cmp, err = CompareData(data1, data3, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(-1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data3, data1, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(1))
+	require.False(t, equal)
+
+	// orderKey different
+	equal, cmp, err = CompareData(data1, data2, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(-1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data2, data1, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data4, data1, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data1, data4, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data5, data4, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data4, data5, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(-1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data4, data6, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data6, data4, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(-1))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data6, data7, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.True(t, equal)
+
+	equal, cmp, err = CompareData(data1, data8, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data8, data1, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.False(t, equal)
+
+	equal, cmp, err = CompareData(data8, data9, orderKeyCols, columns)
+	require.NoError(t, err)
+	require.Equal(t, cmp, int32(0))
+	require.False(t, equal)
+
+	// Test ignore columns
+	createTableSQL = "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, `e` timestamp, primary key(`a`, `b`), key(`c`, `d`))"
+	tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	require.Equal(t, len(tableInfo.Indices), 2)
+	require.Equal(t, len(tableInfo.Columns), 5)
+	require.Equal(t, tableInfo.Indices[0].Columns[1].Name.O, "b")
+	require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 2)
+	info, hasTimeStampType := ResetColumns(tableInfo, []string{"c"})
+	require.True(t, hasTimeStampType)
+	require.Equal(t, len(info.Indices), 1)
+	require.Equal(t, len(info.Columns), 4)
+	require.Equal(t, tableInfo.Indices[0].Columns[1].Name.O, "b")
+	require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 1)
+}
+
+func TestGetCountAndMd5Checksum(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	createTableSQL := "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, primary key(`a`, `b`), key(`c`, `d`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456))
+
+	count, checksum, err := GetCountAndMd5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", []interface{}{"123", "234"})
+	require.NoError(t, err)
+	require.Equal(t, count, int64(123))
+	require.Equal(t, checksum, uint64(0x1c8))
+}
+
+func TestGetApproximateMid(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), primary key(`a`, `b`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	rows := sqlmock.NewRows([]string{"a", "b"}).AddRow("5", "10")
+	mock.ExpectQuery("SELECT `a`, `b` FROM `test`.`test_utils` WHERE 2222 ORDER BY `a`, `b` LIMIT 1 OFFSET 10").WithArgs("aaaa").WillReturnRows(rows)
+
+	data, err := GetApproximateMidBySize(ctx, conn, "test", "test_utils", tableInfo.Columns, "2222", []interface{}{"aaaa"}, 20)
+	require.NoError(t, err)
+	require.Equal(t, data["a"], "5")
+	require.Equal(t, data["b"], "10")
+
+	// no data
+	rows = sqlmock.NewRows([]string{"a", "b"})
+	mock.ExpectQuery("SELECT `a`, `b` FROM `test`\\.`test_utils` WHERE 2222.* LIMIT 1 OFFSET 10*").WithArgs("aaaa").WillReturnRows(rows)
+
+	data, err = GetApproximateMidBySize(ctx, conn, "test", "test_utils", tableInfo.Columns, "2222", []interface{}{"aaaa"}, 20)
+	require.NoError(t, err)
+	require.Nil(t, data)
+}
+
+func TestGenerateSQLs(t *testing.T) {
+	createTableSQL := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), `id_gen` int(11) GENERATED ALWAYS AS ((`id` + 1)) VIRTUAL, primary key(`id`, `name`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	rowsData := map[string]*dbutil.ColumnData{
+		"id":          {Data: []byte("1"), IsNull: false},
+		"name":        {Data: []byte("xxx"), IsNull: false},
+		"birthday":    {Data: []byte("2018-01-01 00:00:00"), IsNull: false},
+		"update_time": {Data: []byte("10:10:10"), IsNull: false},
+		"money":       {Data: []byte("11.1111"), IsNull: false},
+		"id_gen":      {Data: []byte("2"), IsNull: false}, // generated column should not be contained in fix sql
+	}
+
+	replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test")
+	deleteSQL := GenerateDeleteDML(rowsData, tableInfo, "diff_test")
+	require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (1,'xxx','2018-01-01 00:00:00','10:10:10',11.1111);")
+	require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `name` = 'xxx' AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;")
+
+	// test the unique key
+	createTableSQL2 := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), unique key(`id`, `name`))"
+	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+	replaceSQL = GenerateReplaceDML(rowsData, tableInfo2, "diff_test")
+	deleteSQL = GenerateDeleteDML(rowsData, tableInfo2, "diff_test")
+	require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (1,'xxx','2018-01-01 00:00:00','10:10:10',11.1111);")
+	require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `name` = 'xxx' AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;")
+
+	// test value is nil
+	rowsData["name"] = &dbutil.ColumnData{Data: []byte(""), IsNull: true}
+	replaceSQL = GenerateReplaceDML(rowsData, tableInfo, "diff_test")
+	deleteSQL = GenerateDeleteDML(rowsData, tableInfo, "diff_test")
+	require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (1,NULL,'2018-01-01 00:00:00','10:10:10',11.1111);")
+	require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `name` is NULL AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;")
+
+	rowsData["id"] = &dbutil.ColumnData{Data: []byte(""), IsNull: true}
+	replaceSQL = GenerateReplaceDML(rowsData, tableInfo, "diff_test")
+	deleteSQL = GenerateDeleteDML(rowsData, tableInfo, "diff_test")
+	require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (NULL,NULL,'2018-01-01 00:00:00','10:10:10',11.1111);")
+	require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` is NULL AND `name` is NULL AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;")
+
+	// test value with "'"
+	rowsData["name"] = &dbutil.ColumnData{Data: []byte("a'a"), IsNull: false}
+	replaceSQL = GenerateReplaceDML(rowsData, tableInfo, "diff_test")
+	deleteSQL = GenerateDeleteDML(rowsData, tableInfo, "diff_test")
+	require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (NULL,'a\\'a','2018-01-01 00:00:00','10:10:10',11.1111);")
+	require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` is NULL AND `name` = 'a\\'a' AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;")
+}
+
+func TestResetColumns(t *testing.T) {
+	createTableSQL1 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`))"
+	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	require.NoError(t, err)
+	tbInfo, hasTimeStampType := ResetColumns(tableInfo1, []string{"a"})
+	require.Equal(t, len(tbInfo.Columns), 3)
+	require.Equal(t, len(tbInfo.Indices), 0)
+	require.Equal(t, tbInfo.Columns[2].Offset, 2)
+	require.False(t, hasTimeStampType)
+
+	createTableSQL2 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))"
+	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+	tbInfo, _ = ResetColumns(tableInfo2, []string{"a", "b"})
+	require.Equal(t, len(tbInfo.Columns), 2)
+	require.Equal(t, len(tbInfo.Indices), 0)
+
+	createTableSQL3 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))"
+	tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New())
+	require.NoError(t, err)
+	tbInfo, _ = ResetColumns(tableInfo3, []string{"b", "c"})
+	require.Equal(t, len(tbInfo.Columns), 2)
+	require.Equal(t, len(tbInfo.Indices), 1)
+}
+
+func TestGetTableSize(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+	dataRows := sqlmock.NewRows([]string{"a", "b"})
+	rowNums := 1000
+	for k := 0; k < rowNums; k++ {
+		str := fmt.Sprintf("%d", k)
+		dataRows.AddRow(str, str)
+	}
+	sizeRows := sqlmock.NewRows([]string{"data"})
+	sizeRows.AddRow("8000")
+	mock.ExpectQuery("data").WillReturnRows(sizeRows)
+	size, err := GetTableSize(ctx, conn, "test", "test")
+	require.NoError(t, err)
+	require.Equal(t, size, int64(8000))
+}
+
+func TestGetBetterIndex(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+	tableCases := []*tableCaseType{
+		{
+			schema:         "single_index",
+			table:          "test1",
+			createTableSQL: "CREATE TABLE `single_index`.`test1` (`a` int, `b` char, primary key(`a`), index(`b`))",
+			rowColumns:     []string{"a", "b"},
+			rows: [][]driver.Value{
+				{"1", "a"},
+				{"2", "a"},
+				{"3", "b"},
+				{"4", "b"},
+				{"5", "c"},
+				{"6", "c"},
+				{"7", "d"},
+				{"8", "d"},
+				{"9", "e"},
+				{"A", "e"},
+				{"B", "f"},
+				{"C", "f"},
+			},
+			indices:  []string{"PRIMARY", "b"},
+			sels:     []float64{1.0, 0.5},
+			selected: "PRIMARY",
+		}, {
+			schema:         "single_index",
+			table:          "test1",
+			createTableSQL: "CREATE TABLE `single_index`.`test1` (`a` int, `b` char, index(a), index(b))",
+			rowColumns:     []string{"a", "b"},
+			rows: [][]driver.Value{
+				{"1", "a"},
+				{"2", "a"},
+				{"3", "b"},
+				{"4", "b"},
+				{"5", "c"},
+				{"6", "c"},
+				{"7", "d"},
+				{"8", "d"},
+				{"9", "e"},
+				{"A", "e"},
+				{"B", "f"},
+				{"C", "f"},
+			},
+			indices:  []string{"a", "b"},
+			sels:     []float64{1.0, 0.5},
+			selected: "a",
+		},
+	}
+	tableCase := tableCases[0]
+	tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
+	require.NoError(t, err)
+	indices := dbutil.FindAllIndex(tableInfo)
+	for i, index := range indices {
+		require.Equal(t, index.Name.O, tableCase.indices[i])
+	}
+	for i, col := range tableCase.rowColumns {
+		retRows := sqlmock.NewRows([]string{"SEL"})
+		retRows.AddRow(tableCase.sels[i])
+		mock.ExpectQuery("SELECT").WillReturnRows(retRows)
+		sel, err := GetSelectivity(ctx, conn, tableCase.schema, tableCase.table, col, tableInfo)
+		require.NoError(t, err)
+		require.Equal(t, sel, tableCase.sels[i])
+	}
+	indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo)
+	require.NoError(t, err)
+	require.Equal(t, indices[0].Name.O, tableCase.selected)
+
+	tableCase = tableCases[1]
+	tableInfo, err = dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
+	require.NoError(t, err)
+	indices = dbutil.FindAllIndex(tableInfo)
+	for i, index := range indices {
+		require.Equal(t, index.Name.O, tableCase.indices[i])
+	}
+	for i, col := range tableCase.rowColumns {
+		retRows := sqlmock.NewRows([]string{"SEL"})
+		retRows.AddRow(tableCase.sels[i])
+		mock.ExpectQuery("SELECT").WillReturnRows(retRows)
+		sel, err := GetSelectivity(ctx, conn, tableCase.schema, tableCase.table, col, tableInfo)
+		require.NoError(t, err)
+		require.Equal(t, sel, tableCase.sels[i])
+	}
+	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2"))
+	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("5"))
+	indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo)
+	require.NoError(t, err)
+	require.Equal(t, indices[0].Name.O, tableCase.selected)
+
+}
+
+func TestCalculateChunkSize(t *testing.T) {
+	require.Equal(t, CalculateChunkSize(1000), int64(50000))
+	require.Equal(t, CalculateChunkSize(1000000000), int64(100000))
+}
+
+func TestGetSQLFileName(t *testing.T) {
+	index := &chunk.ChunkID{
+		TableIndex:       1,
+		BucketIndexLeft:  2,
+		BucketIndexRight: 3,
+		ChunkIndex:       4,
+		ChunkCnt:         10,
+	}
+	require.Equal(t, GetSQLFileName(index), "1:2-3:4")
+}
+
+func TestGetChunkIDFromSQLFileName(t *testing.T) {
+	tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := GetChunkIDFromSQLFileName("11:12-13:14")
+	require.NoError(t, err)
+	require.Equal(t, tableIndex, 11)
+	require.Equal(t, bucketIndexLeft, 12)
+	require.Equal(t, bucketIndexRight, 13)
+	require.Equal(t, chunkIndex, 14)
+}
+
+func TestCompareStruct(t *testing.T) {
+	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	var isEqual bool
+	var isPanic bool
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo}, tableInfo)
+	require.True(t, isEqual)
+	require.False(t, isPanic)
+
+	// column length different
+	createTableSQL2 := "create table `test`(`a` int, `b` varchar(10), `c` float, primary key(`a`, `b`), index(`c`))"
+	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.False(t, isEqual)
+	require.True(t, isPanic)
+
+	// column name differernt
+	createTableSQL2 = "create table `test`(`aa` int, `b` varchar(10), `c` float, `d` datetime, primary key(`aa`, `b`), index(`c`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.False(t, isEqual)
+	require.True(t, isPanic)
+
+	// column type compatible
+	createTableSQL2 = "create table `test`(`a` int, `b` char(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.True(t, isEqual)
+	require.False(t, isPanic)
+
+	createTableSQL2 = "create table `test`(`a` int(11), `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.True(t, isEqual)
+	require.False(t, isPanic)
+
+	// column type not compatible
+	createTableSQL2 = "create table `test`(`a` int, `b` varchar(10), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.False(t, isEqual)
+	require.True(t, isPanic)
+
+	// column properties not compatible
+	createTableSQL2 = "create table `test`(`a` int, `b` varchar(11), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.False(t, isEqual)
+	require.True(t, isPanic)
+
+	// index check
+
+	// index different
+	createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.False(t, isEqual)
+	require.False(t, isPanic)
+	require.Equal(t, len(tableInfo.Indices), 1)
+	require.Equal(t, tableInfo.Indices[0].Name.O, "PRIMARY")
+
+	// index column different
+	createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
+	tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `c`), index(`c`))"
+	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	require.NoError(t, err)
+
+	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
+	require.False(t, isEqual)
+	require.False(t, isPanic)
+	require.Equal(t, len(tableInfo.Indices), 1)
+	require.Equal(t, tableInfo.Indices[0].Name.O, "c")
+
+}
+
+func TestGenerateSQLBlob(t *testing.T) {
+	rowsData := map[string]*dbutil.ColumnData{
+		"id": {Data: []byte("1"), IsNull: false},
+		"b":  {Data: []byte("foo"), IsNull: false},
+	}
+
+	cases := []struct {
+		createTableSql string
+	}{
+		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` tinyblob)"},
+		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` blob)"},
+		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` mediumblob)"},
+		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` longblob)"},
+	}
+
+	for _, c := range cases {
+		tableInfo, err := dbutil.GetTableInfoBySQL(c.createTableSql, parser.New())
+		require.NoError(t, err)
+
+		replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test")
+		deleteSQL := GenerateDeleteDML(rowsData, tableInfo, "diff_test")
+		require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`b`) VALUES (1,x'666f6f');")
+		require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `b` = x'666f6f' LIMIT 1;")
+	}
+}
+
+func TestCompareBlob(t *testing.T) {
+	createTableSQL := "create table `test`.`test`(`a` int primary key, `b` blob)"
+	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	require.NoError(t, err)
+
+	_, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123")
+
+	data1 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte{0xff, 0xfe}, IsNull: false},
+	}
+	data2 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte{0xfe, 0xff}, IsNull: false},
+	}
+	data3 := map[string]*dbutil.ColumnData{
+		"a": {Data: []byte("1"), IsNull: false},
+		"b": {Data: []byte("foobar"), IsNull: false},
+	}
+
+	columns := tableInfo.Columns
+
+	cases := []struct {
+		data1      map[string]*dbutil.ColumnData
+		dataOthers []map[string]*dbutil.ColumnData
+	}{
+		{data1, []map[string]*dbutil.ColumnData{data2, data3}},
+		{data2, []map[string]*dbutil.ColumnData{data1, data3}},
+		{data3, []map[string]*dbutil.ColumnData{data1, data2}},
+	}
+
+	for _, c := range cases {
+		equal, cmp, err := CompareData(c.data1, c.data1, orderKeyCols, columns)
+		require.NoError(t, err)
+		require.Equal(t, cmp, int32(0))
+		require.True(t, equal)
+
+		for _, data := range c.dataOthers {
+			equal, cmp, err = CompareData(c.data1, data, orderKeyCols, columns)
+			require.NoError(t, err)
+			require.Equal(t, cmp, int32(0))
+			require.False(t, equal)
+		}
+	}
+}

From 4437f90d455420adc6bc2be95597cf0774c18f30 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 13:58:20 +0800
Subject: [PATCH 02/22] Update

---
 sync_diff_inspector/checkpoints/checkpoints.go  |  8 +++-----
 sync_diff_inspector/config/config.go            |  2 +-
 sync_diff_inspector/diff/diff.go                |  2 +-
 sync_diff_inspector/source/common/rows.go       |  2 +-
 sync_diff_inspector/source/common/table_diff.go |  2 +-
 sync_diff_inspector/source/mysql_shard.go       |  5 ++---
 sync_diff_inspector/source/source.go            |  2 +-
 sync_diff_inspector/source/source_test.go       |  8 ++++----
 sync_diff_inspector/source/tidb.go              |  5 ++---
 sync_diff_inspector/splitter/bucket.go          |  2 +-
 sync_diff_inspector/splitter/index_fields.go    |  2 +-
 sync_diff_inspector/splitter/limit.go           |  2 +-
 sync_diff_inspector/splitter/random.go          |  2 +-
 sync_diff_inspector/utils/table.go              | 14 ++++++++------
 sync_diff_inspector/utils/utils.go              |  2 +-
 15 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go
index 010fdd2a9c2..8863cad1c51 100644
--- a/sync_diff_inspector/checkpoints/checkpoints.go
+++ b/sync_diff_inspector/checkpoints/checkpoints.go
@@ -20,14 +20,12 @@ import (
 	"os"
 	"sync"
 
+	"github.com/pingcap/errors"
+	"github.com/pingcap/log"
+	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/config"
 	"github.com/pingcap/tiflow/sync_diff_inspector/report"
-
-	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/siddontang/go/ioutil2"
-
-	"github.com/pingcap/errors"
-	"github.com/pingcap/log"
 	"go.uber.org/zap"
 )
 
diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go
index 3ab749bc890..789176ed37d 100644
--- a/sync_diff_inspector/config/config.go
+++ b/sync_diff_inspector/config/config.go
@@ -32,7 +32,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	tidbutil "github.com/pingcap/tidb/pkg/util"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	filter "github.com/pingcap/tidb/pkg/util/table-filter"
diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go
index 6ebc69f65cd..239e7a84f81 100644
--- a/sync_diff_inspector/diff/diff.go
+++ b/sync_diff_inspector/diff/diff.go
@@ -30,7 +30,7 @@ import (
 	"github.com/pingcap/failpoint"
 	"github.com/pingcap/log"
 	tidbconfig "github.com/pingcap/tidb/pkg/config"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tiflow/sync_diff_inspector/checkpoints"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
diff --git a/sync_diff_inspector/source/common/rows.go b/sync_diff_inspector/source/common/rows.go
index a97204881f2..27470c15042 100644
--- a/sync_diff_inspector/source/common/rows.go
+++ b/sync_diff_inspector/source/common/rows.go
@@ -17,7 +17,7 @@ import (
 	"strconv"
 
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
 	"go.uber.org/zap"
diff --git a/sync_diff_inspector/source/common/table_diff.go b/sync_diff_inspector/source/common/table_diff.go
index 1d8befb7a11..2960f0ba7cb 100644
--- a/sync_diff_inspector/source/common/table_diff.go
+++ b/sync_diff_inspector/source/common/table_diff.go
@@ -16,7 +16,7 @@ package common
 import (
 	"database/sql"
 
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 )
 
 // TableShardSource represents the origin schema and table and DB connection before router.
diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go
index 2a43f48081f..ff0edd9e8c9 100644
--- a/sync_diff_inspector/source/mysql_shard.go
+++ b/sync_diff_inspector/source/mysql_shard.go
@@ -20,13 +20,12 @@ import (
 	"fmt"
 	"time"
 
-	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
-
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tidb/pkg/util/filter"
+	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
 	"github.com/pingcap/tiflow/sync_diff_inspector/config"
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
 	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
diff --git a/sync_diff_inspector/source/source.go b/sync_diff_inspector/source/source.go
index 5e615488886..a3f7500a019 100644
--- a/sync_diff_inspector/source/source.go
+++ b/sync_diff_inspector/source/source.go
@@ -23,7 +23,7 @@ import (
 	"github.com/go-sql-driver/mysql"
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tidb/pkg/util/filter"
 	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go
index bb3ea1b02ab..58bbe853eca 100644
--- a/sync_diff_inspector/source/source_test.go
+++ b/sync_diff_inspector/source/source_test.go
@@ -25,8 +25,10 @@ import (
 	"time"
 
 	"github.com/DATA-DOG/go-sqlmock"
+	_ "github.com/go-sql-driver/mysql"
 	"github.com/pingcap/tidb/pkg/parser"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
 	filter "github.com/pingcap/tidb/pkg/util/table-filter"
 	router "github.com/pingcap/tidb/pkg/util/table-router"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
@@ -35,8 +37,6 @@ import (
 	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
 	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
 	"github.com/stretchr/testify/require"
-
-	_ "github.com/go-sql-driver/mysql"
 )
 
 type tableCaseType struct {
@@ -296,7 +296,7 @@ func TestFallbackToRandomIfRangeIsSet(t *testing.T) {
 		"`c` char(120) NOT NULL DEFAULT '', " +
 		"PRIMARY KEY (`id`), KEY `k_1` (`k`))"
 
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 
 	table1 := &common.TableDiff{
@@ -631,7 +631,7 @@ func TestTiDBRouter(t *testing.T) {
 func prepareTiDBTables(t *testing.T, tableCases []*tableCaseType) []*common.TableDiff {
 	tableDiffs := make([]*common.TableDiff, 0, len(tableCases))
 	for n, tableCase := range tableCases {
-		tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
+		tableInfo, err := dbutiltest.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
 		require.NoError(t, err)
 		tableDiffs = append(tableDiffs, &common.TableDiff{
 			Schema: "source_test",
diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go
index 0af78384cc1..a021275ee6e 100644
--- a/sync_diff_inspector/source/tidb.go
+++ b/sync_diff_inspector/source/tidb.go
@@ -20,13 +20,12 @@ import (
 	"time"
 
 	"github.com/coreos/go-semver/semver"
-	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
-
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tidb/pkg/util/filter"
+	tableFilter "github.com/pingcap/tidb/pkg/util/table-filter"
 	"github.com/pingcap/tiflow/sync_diff_inspector/config"
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
 	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go
index b64b3ae68bd..46a53f99d28 100644
--- a/sync_diff_inspector/splitter/bucket.go
+++ b/sync_diff_inspector/splitter/bucket.go
@@ -21,7 +21,7 @@ import (
 	"github.com/pingcap/errors"
 	"github.com/pingcap/failpoint"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
diff --git a/sync_diff_inspector/splitter/index_fields.go b/sync_diff_inspector/splitter/index_fields.go
index 1508d59075d..9c35164aa9d 100644
--- a/sync_diff_inspector/splitter/index_fields.go
+++ b/sync_diff_inspector/splitter/index_fields.go
@@ -19,7 +19,7 @@ import (
 
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
 	"go.uber.org/zap"
 )
diff --git a/sync_diff_inspector/splitter/limit.go b/sync_diff_inspector/splitter/limit.go
index 0075506a673..3138d53181f 100644
--- a/sync_diff_inspector/splitter/limit.go
+++ b/sync_diff_inspector/splitter/limit.go
@@ -21,7 +21,7 @@ import (
 
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go
index 470c33c6a24..839073a48b7 100644
--- a/sync_diff_inspector/splitter/random.go
+++ b/sync_diff_inspector/splitter/random.go
@@ -22,7 +22,7 @@ import (
 	"github.com/pingcap/errors"
 	"github.com/pingcap/failpoint"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/progress"
diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go
index 1c99dcb5d74..10f73e588d3 100644
--- a/sync_diff_inspector/utils/table.go
+++ b/sync_diff_inspector/utils/table.go
@@ -23,11 +23,12 @@ import (
 	"github.com/coreos/go-semver/semver"
 	"github.com/pingcap/errors"
 	"github.com/pingcap/tidb/pkg/ddl"
+	"github.com/pingcap/tidb/pkg/meta/metabuild"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/parser"
 	"github.com/pingcap/tidb/pkg/parser/ast"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	pmodel "github.com/pingcap/tidb/pkg/parser/model"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
-	"github.com/pingcap/tidb/pkg/sessionctx"
 	"github.com/pingcap/tidb/pkg/types"
 	"github.com/pingcap/tidb/pkg/util/collate"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
@@ -80,7 +81,7 @@ func addClusteredAnnotationForPrimaryKey(raw string, replace string) (string, er
 	return reg.ReplaceAllString(raw, replace), nil
 }
 
-func getTableInfoBySQL(ctx sessionctx.Context, createTableSQL string, parser2 *parser.Parser) (table *model.TableInfo, err error) {
+func getTableInfoBySQL(ctx *metabuild.Context, createTableSQL string, parser2 *parser.Parser) (table *model.TableInfo, err error) {
 	stmt, err := parser2.ParseOneStmt(createTableSQL, "", "")
 	if err != nil {
 		return nil, errors.Trace(err)
@@ -96,11 +97,11 @@ func getTableInfoBySQL(ctx sessionctx.Context, createTableSQL string, parser2 *p
 		// put primary key in indices
 		if table.PKIsHandle {
 			pkIndex := &model.IndexInfo{
-				Name:    model.NewCIStr("PRIMARY"),
+				Name:    pmodel.NewCIStr("PRIMARY"),
 				Primary: true,
 				State:   model.StatePublic,
 				Unique:  true,
-				Tp:      model.IndexTypeBtree,
+				Tp:      pmodel.IndexTypeBtree,
 				Columns: []*model.IndexColumn{
 					{
 						Name:   table.GetPkName(),
@@ -166,7 +167,8 @@ func GetTableInfoWithVersion(
 	sctx.GetSessionVars().TimeZone = time.UTC
 	sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictTransTables)
 	sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictAllTables)
-	return getTableInfoBySQL(sctx, createTableSQL, parser2)
+	metaBuildCtx := ddl.NewMetaBuildContextWithSctx(sctx)
+	return getTableInfoBySQL(metaBuildCtx, createTableSQL, parser2)
 }
 
 // GetTableInfo returns table information.
diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go
index d80bc669123..a22fa3ac971 100644
--- a/sync_diff_inspector/utils/utils.go
+++ b/sync_diff_inspector/utils/utils.go
@@ -28,7 +28,7 @@ import (
 	"github.com/olekukonko/tablewriter"
 	"github.com/pingcap/errors"
 	"github.com/pingcap/log"
-	"github.com/pingcap/tidb/pkg/parser/model"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/parser/mysql"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"

From 6463065280ebcfb77e3b36e5e9d660b359fc21c3 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 14:11:59 +0800
Subject: [PATCH 03/22] Update build

---
 Makefile                                      | 2 +-
 scripts/download-integration-test-binaries.sh | 4 +---
 tests/integration_tests/README.md             | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index f4e3d4d7615..82b2501c711 100644
--- a/Makefile
+++ b/Makefile
@@ -228,7 +228,7 @@ check_third_party_binary:
 	@which bin/minio
 	@which bin/bin/schema-registry-start
 
-integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server
+integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server sync_diff_inspector
 	$(FAILPOINT_ENABLE)
 	$(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \
 		-coverpkg=github.com/pingcap/tiflow/... \
diff --git a/scripts/download-integration-test-binaries.sh b/scripts/download-integration-test-binaries.sh
index 765d848aede..ec0d8849438 100755
--- a/scripts/download-integration-test-binaries.sh
+++ b/scripts/download-integration-test-binaries.sh
@@ -91,7 +91,7 @@ download_community_binaries() {
 	mv ${THIRD_BIN_DIR}/tiflash ${THIRD_BIN_DIR}/_tiflash
 	mv ${THIRD_BIN_DIR}/_tiflash/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/_tiflash
 	tar -xz -C ${THIRD_BIN_DIR} pd-ctl -f ${TMP_DIR}/$tidb_file_name/ctl-${dist}.tar.gz
-	tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl $toolkit_file_name/sync_diff_inspector -f ${TMP_DIR}/$toolkit_tar_name
+	tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl -f ${TMP_DIR}/$toolkit_tar_name
 	mv ${THIRD_BIN_DIR}/$toolkit_file_name/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/$toolkit_file_name
 
 	# Download additional tools
@@ -147,7 +147,6 @@ download_binaries() {
 	local minio_download_url="${FILE_SERVER_URL}/download/minio.tar.gz"
 	local go_ycsb_download_url="${FILE_SERVER_URL}/download/builds/pingcap/go-ycsb/test-br/go-ycsb"
 	local etcd_download_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/etcd-v3.4.7-linux-amd64.tar.gz"
-	local sync_diff_inspector_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/sync_diff_inspector_hash-a129f096_linux-amd64.tar.gz"
 	local jq_download_url="${FILE_SERVER_URL}/download/builds/pingcap/test/jq-1.6/jq-linux64"
 	local schema_registry_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/schema-registry.tar.gz"
 
@@ -158,7 +157,6 @@ download_binaries() {
 	download_and_extract "$tiflash_download_url" "tiflash.tar.gz"
 	download_and_extract "$minio_download_url" "minio.tar.gz"
 	download_and_extract "$etcd_download_url" "etcd.tar.gz" "etcd-v3.4.7-linux-amd64/etcdctl"
-	download_and_extract "$sync_diff_inspector_url" "sync_diff_inspector.tar.gz"
 	download_and_extract "$schema_registry_url" "schema-registry.tar.gz"
 
 	download_file "$go_ycsb_download_url" "go-ycsb" "${THIRD_BIN_DIR}/go-ycsb"
diff --git a/tests/integration_tests/README.md b/tests/integration_tests/README.md
index 483f697338f..b2c3d471f85 100644
--- a/tests/integration_tests/README.md
+++ b/tests/integration_tests/README.md
@@ -14,7 +14,6 @@ If you need to specify a version, os or arch, you can use, for example: `make pr
    * `pd-ctl`      # version >= 6.0.0-rc.1
    * `tiflash`     # tiflash binary
    * `libc++.so, libc++abi.so, libgmssl.so, libtiflash_proxy.so` # some necessary so files related to tiflash
-   * `sync_diff_inspector`
    * [go-ycsb](https://github.com/pingcap/go-ycsb)
    * [etcdctl](https://github.com/etcd-io/etcd/tree/master/etcdctl)
    * [jq](https://stedolan.github.io/jq/)

From fa78f45de60e05c3dbf6361d61b841995290ff07 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 14:26:37 +0800
Subject: [PATCH 04/22] Fix format

---
 sync_diff_inspector/chunk/chunk_test.go       |  2 -
 sync_diff_inspector/config/config_test.go     |  1 -
 sync_diff_inspector/config/dm.go              |  2 +-
 sync_diff_inspector/progress/progress.go      |  1 -
 sync_diff_inspector/source/source_test.go     | 14 ++---
 sync_diff_inspector/source/tidb.go            |  3 +-
 sync_diff_inspector/splitter/random.go        |  3 --
 sync_diff_inspector/splitter/splitter_test.go | 33 ++++++------
 sync_diff_inspector/utils/utils.go            |  2 -
 sync_diff_inspector/utils/utils_test.go       | 52 +++++++++----------
 10 files changed, 50 insertions(+), 63 deletions(-)

diff --git a/sync_diff_inspector/chunk/chunk_test.go b/sync_diff_inspector/chunk/chunk_test.go
index b5d62dd9449..694bea6f949 100644
--- a/sync_diff_inspector/chunk/chunk_test.go
+++ b/sync_diff_inspector/chunk/chunk_test.go
@@ -419,7 +419,6 @@ func TestChunkToString(t *testing.T) {
 	}
 	require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"3","has-lower":true,"has-upper":true},{"column":"c","lower":"6","upper":"6","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`)
 	require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,6) < (a,b,c) <= (1,3,6)")
-
 }
 
 func TestChunkInit(t *testing.T) {
@@ -599,7 +598,6 @@ func TestChunkID(t *testing.T) {
 		chunkIDtmp.FromString(str)
 		require.Equal(t, chunkIDLarge.Compare(chunkIDtmp), 0)
 	}
-
 }
 
 func TestChunkIndex(t *testing.T) {
diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go
index 7c12c260bb2..32d2a9f40ce 100644
--- a/sync_diff_inspector/config/config_test.go
+++ b/sync_diff_inspector/config/config_test.go
@@ -58,7 +58,6 @@ func TestParseConfig(t *testing.T) {
 	require.True(t, cfg.TableConfigs["config1"].Valid())
 
 	require.NoError(t, os.RemoveAll(cfg.Task.OutputDir))
-
 }
 
 func TestError(t *testing.T) {
diff --git a/sync_diff_inspector/config/dm.go b/sync_diff_inspector/config/dm.go
index 68564910e4b..4591b74f79e 100644
--- a/sync_diff_inspector/config/dm.go
+++ b/sync_diff_inspector/config/dm.go
@@ -50,7 +50,7 @@ func getDMTaskCfgURL(dmAddr, task string) string {
 func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) {
 	tr := &http.Transport{
 		// TODO: support tls
-		//TLSClientConfig: tlsCfg,
+		// TLSClientConfig: tlsCfg,
 	}
 	client := &http.Client{Transport: tr}
 	req, err := http.NewRequest("GET", getDMTaskCfgURL(dmAddr, task), nil)
diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go
index b559ac85937..3fed728a8d1 100644
--- a/sync_diff_inspector/progress/progress.go
+++ b/sync_diff_inspector/progress/progress.go
@@ -217,7 +217,6 @@ func (tpp *TableProgressPrinter) PrintSummary() {
 	}
 
 	fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr)
-
 }
 
 func (tpp *TableProgressPrinter) Error(err error) {
diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go
index 58bbe853eca..692452af4b7 100644
--- a/sync_diff_inspector/source/source_test.go
+++ b/sync_diff_inspector/source/source_test.go
@@ -59,8 +59,10 @@ type MockChunkIterator struct {
 	index     *chunk.ChunkID
 }
 
-const CHUNKS = 5
-const BUCKETS = 1
+const (
+	CHUNKS  = 5
+	BUCKETS = 1
+)
 
 func (m *MockChunkIterator) Next() (*chunk.Range, error) {
 	if m.index.ChunkIndex == m.index.ChunkCnt-1 {
@@ -79,11 +81,9 @@ func (m *MockChunkIterator) Next() (*chunk.Range, error) {
 }
 
 func (m *MockChunkIterator) Close() {
-
 }
 
-type MockAnalyzer struct {
-}
+type MockAnalyzer struct{}
 
 func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.TableDiff, rangeInfo *splitter.RangeInfo) (splitter.ChunkIterator, error) {
 	i := &chunk.ChunkID{
@@ -430,7 +430,7 @@ func TestMysqlShardSources(t *testing.T) {
 			break
 		}
 		for j, value := range tableCase.rows[i] {
-			//c.Log(j)
+			// c.Log(j)
 			require.Equal(t, columns[tableCase.rowColumns[j]].IsNull, false)
 			require.Equal(t, columns[tableCase.rowColumns[j]].Data, []byte(value.(string)))
 		}
@@ -763,7 +763,7 @@ func TestRouterRules(t *testing.T) {
 	}
 	portStr, isExist := os.LookupEnv("MYSQL_PORT")
 	if portStr == "" || !isExist {
-		//return
+		// return
 	}
 	port, err := strconv.Atoi(portStr)
 	require.NoError(t, err)
diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go
index a021275ee6e..92d80c5d34b 100644
--- a/sync_diff_inspector/source/tidb.go
+++ b/sync_diff_inspector/source/tidb.go
@@ -63,7 +63,6 @@ func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.T
 		return nil, errors.Trace(err)
 	}
 	return randIter, nil
-
 }
 
 type TiDBRowsIterator struct {
@@ -119,6 +118,7 @@ func (s *TiDBSource) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo
 func (s *TiDBSource) Close() {
 	s.dbConn.Close()
 }
+
 func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
 	beginTime := time.Now()
 	table := s.tableDiffs[tableRange.GetTableIndex()]
@@ -222,7 +222,6 @@ func NewTiDBSource(
 	// instance -> db -> table
 	allTablesMap := make(map[string]map[string]interface{})
 	sourceSchemas, err := dbutil.GetSchemas(ctx, ds.Conn)
-
 	if err != nil {
 		return nil, errors.Annotatef(err, "get schemas from database")
 	}
diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go
index 839073a48b7..98771d0eb28 100644
--- a/sync_diff_inspector/splitter/random.go
+++ b/sync_diff_inspector/splitter/random.go
@@ -139,7 +139,6 @@ func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, tab
 		nextChunk: 0,
 		dbConn:    dbConn,
 	}, nil
-
 }
 
 func (s *RandomIterator) Next() (*chunk.Range, error) {
@@ -161,7 +160,6 @@ func (s *RandomIterator) Next() (*chunk.Range, error) {
 }
 
 func (s *RandomIterator) Close() {
-
 }
 
 // GetSplitFields returns fields to split chunks, order by pk, uk, index, columns.
@@ -173,7 +171,6 @@ func GetSplitFields(table *model.TableInfo, splitFields []string) ([]*model.Colu
 		col := dbutil.FindColumnByName(table.Columns, splitField)
 		if col == nil {
 			return nil, errors.NotFoundf("column %s in table %s", splitField, table.Name)
-
 		}
 		splitCols = append(splitCols, col)
 	}
diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go
index 3f641d100ee..9b16fc5c0ea 100644
--- a/sync_diff_inspector/splitter/splitter_test.go
+++ b/sync_diff_inspector/splitter/splitter_test.go
@@ -23,7 +23,7 @@ import (
 
 	sqlmock "github.com/DATA-DOG/go-sqlmock"
 	"github.com/pingcap/tidb/pkg/parser"
-	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
 	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
@@ -66,7 +66,8 @@ func TestSplitRangeByRandom(t *testing.T) {
 					[]interface{}{"7", "7", "n", "10", "10", "z"},
 				},
 			},
-		}, {
+		},
+		{
 			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`, `a`))",
 			3,
 			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true).CopyAndUpdate("a", "0", "10", true, true),
@@ -106,7 +107,8 @@ func TestSplitRangeByRandom(t *testing.T) {
 					[]interface{}{"n", "z"},
 				},
 			},
-		}, {
+		},
+		{
 			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))",
 			2,
 			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true),
@@ -122,7 +124,8 @@ func TestSplitRangeByRandom(t *testing.T) {
 					[]interface{}{"g", "z"},
 				},
 			},
-		}, {
+		},
+		{
 			"create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))",
 			3,
 			chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true),
@@ -139,7 +142,7 @@ func TestSplitRangeByRandom(t *testing.T) {
 	}
 
 	for _, testCase := range testCases {
-		tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New())
+		tableInfo, err := dbutiltest.GetTableInfoBySQL(testCase.createTableSQL, parser.New())
 		require.NoError(t, err)
 
 		splitCols, err := GetSplitFields(tableInfo, nil)
@@ -319,7 +322,7 @@ func TestRandomSpliter(t *testing.T) {
 	}
 
 	for _, testCase := range testCases {
-		tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New())
+		tableInfo, err := dbutiltest.GetTableInfoBySQL(testCase.createTableSQL, parser.New())
 		require.NoError(t, err)
 
 		info, needUnifiedTimeStamp := utils.ResetColumns(tableInfo, testCase.IgnoreColumns)
@@ -354,15 +357,15 @@ func TestRandomSpliter(t *testing.T) {
 
 	// Test Checkpoint
 	stopJ := 3
-	tableInfo, err := dbutil.GetTableInfoBySQL(testCases[0].createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(testCases[0].createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	tableDiff := &common.TableDiff{
 		Schema: "test",
 		Table:  "test",
 		Info:   tableInfo,
-		//IgnoreColumns: []string{"c"},
-		//Fields:        "a,b",
+		// IgnoreColumns: []string{"c"},
+		// Fields:        "a,b",
 		ChunkSize: 5,
 	}
 
@@ -398,7 +401,6 @@ func TestRandomSpliter(t *testing.T) {
 
 	require.Equal(t, chunk.Index.ChunkCnt, chunkID1.ChunkCnt)
 	require.Equal(t, chunk.Index.ChunkIndex, chunkID1.ChunkIndex+1)
-
 }
 
 func createFakeResultForRandomSplit(mock sqlmock.Sqlmock, count int, randomValues [][]string) {
@@ -421,7 +423,6 @@ func createFakeResultForRandomSplit(mock sqlmock.Sqlmock, count int, randomValue
 		randomRows.AddRow(row...)
 	}
 	mock.ExpectQuery("ORDER BY rand_value").WillReturnRows(randomRows)
-
 }
 
 func TestBucketSpliter(t *testing.T) {
@@ -430,7 +431,7 @@ func TestBucketSpliter(t *testing.T) {
 	require.NoError(t, err)
 
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	testCases := []struct {
@@ -741,7 +742,7 @@ func TestLimitSpliter(t *testing.T) {
 	require.NoError(t, err)
 
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	testCases := []struct {
@@ -862,7 +863,6 @@ func TestRangeInfo(t *testing.T) {
 	require.Equal(t, chunkRange.Args, []interface{}{"1", "2"})
 
 	require.Equal(t, rangeInfo2.GetTableIndex(), 1)
-
 }
 
 func TestChunkSize(t *testing.T) {
@@ -871,7 +871,7 @@ func TestChunkSize(t *testing.T) {
 	require.NoError(t, err)
 
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	tableDiff := &common.TableDiff{
@@ -912,7 +912,7 @@ func TestChunkSize(t *testing.T) {
 	require.Equal(t, randomIter.chunkSize, int64(100000))
 
 	createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime)"
-	tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	tableDiff_noindex := &common.TableDiff{
@@ -932,5 +932,4 @@ func TestChunkSize(t *testing.T) {
 	mock.ExpectQuery("SELECT `a`,.*limit 50000.*").WillReturnRows(sqlmock.NewRows([]string{"a", "b"}))
 	_, err = NewLimitIterator(ctx, "", tableDiff, db)
 	require.NoError(t, err)
-
 }
diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go
index a22fa3ac971..5e4351c1c7e 100644
--- a/sync_diff_inspector/utils/utils.go
+++ b/sync_diff_inspector/utils/utils.go
@@ -310,7 +310,6 @@ func GenerateDeleteDML(data map[string]*dbutil.ColumnData, table *model.TableInf
 		}
 	}
 	return fmt.Sprintf("DELETE FROM %s WHERE %s LIMIT 1;", dbutil.TableName(schema, table.Name.O), strings.Join(kvs, " AND "))
-
 }
 
 // isCompatible checks whether 2 column types are compatible.
@@ -443,7 +442,6 @@ func CompareStruct(upstreamTableInfos []*model.TableInfo, downstreamTableInfo *m
 		}{index, 0}
 	}
 	for _, upstreamTableInfo := range upstreamTableInfos {
-
 	NextIndex:
 		for _, upstreamIndex := range upstreamTableInfo.Indices {
 			if _, ok := deleteIndicesSet[upstreamIndex.Name.O]; ok {
diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go
index 969add2c0f6..fde005ebb3c 100644
--- a/sync_diff_inspector/utils/utils_test.go
+++ b/sync_diff_inspector/utils/utils_test.go
@@ -21,9 +21,10 @@ import (
 	"time"
 
 	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/pingcap/tidb/pkg/meta/model"
 	"github.com/pingcap/tidb/pkg/parser"
-	"github.com/pingcap/tidb/pkg/parser/model"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/stretchr/testify/require"
 )
@@ -75,12 +76,11 @@ func TestStringsToInterface(t *testing.T) {
 	require.Equal(t, len(sliceMap), len(expectSlice))
 
 	require.Equal(t, UniqueID("123", "456"), "`123`.`456`")
-
 }
 
 func TestBasicTableUtilOperation(t *testing.T) {
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	query, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123")
@@ -242,7 +242,7 @@ func TestBasicTableUtilOperation(t *testing.T) {
 
 	// Test ignore columns
 	createTableSQL = "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, `e` timestamp, primary key(`a`, `b`), key(`c`, `d`))"
-	tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	require.Equal(t, len(tableInfo.Indices), 2)
@@ -266,7 +266,7 @@ func TestGetCountAndMd5Checksum(t *testing.T) {
 	defer conn.Close()
 
 	createTableSQL := "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, primary key(`a`, `b`), key(`c`, `d`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456))
@@ -286,7 +286,7 @@ func TestGetApproximateMid(t *testing.T) {
 	defer conn.Close()
 
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	rows := sqlmock.NewRows([]string{"a", "b"}).AddRow("5", "10")
@@ -308,7 +308,7 @@ func TestGetApproximateMid(t *testing.T) {
 
 func TestGenerateSQLs(t *testing.T) {
 	createTableSQL := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), `id_gen` int(11) GENERATED ALWAYS AS ((`id` + 1)) VIRTUAL, primary key(`id`, `name`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	rowsData := map[string]*dbutil.ColumnData{
@@ -327,7 +327,7 @@ func TestGenerateSQLs(t *testing.T) {
 
 	// test the unique key
 	createTableSQL2 := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), unique key(`id`, `name`))"
-	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 	replaceSQL = GenerateReplaceDML(rowsData, tableInfo2, "diff_test")
 	deleteSQL = GenerateDeleteDML(rowsData, tableInfo2, "diff_test")
@@ -357,7 +357,7 @@ func TestGenerateSQLs(t *testing.T) {
 
 func TestResetColumns(t *testing.T) {
 	createTableSQL1 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`))"
-	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 	tbInfo, hasTimeStampType := ResetColumns(tableInfo1, []string{"a"})
 	require.Equal(t, len(tbInfo.Columns), 3)
@@ -366,14 +366,14 @@ func TestResetColumns(t *testing.T) {
 	require.False(t, hasTimeStampType)
 
 	createTableSQL2 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))"
-	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 	tbInfo, _ = ResetColumns(tableInfo2, []string{"a", "b"})
 	require.Equal(t, len(tbInfo.Columns), 2)
 	require.Equal(t, len(tbInfo.Indices), 0)
 
 	createTableSQL3 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))"
-	tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New())
+	tableInfo3, err := dbutiltest.GetTableInfoBySQL(createTableSQL3, parser.New())
 	require.NoError(t, err)
 	tbInfo, _ = ResetColumns(tableInfo3, []string{"b", "c"})
 	require.Equal(t, len(tbInfo.Columns), 2)
@@ -455,7 +455,7 @@ func TestGetBetterIndex(t *testing.T) {
 		},
 	}
 	tableCase := tableCases[0]
-	tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
 	require.NoError(t, err)
 	indices := dbutil.FindAllIndex(tableInfo)
 	for i, index := range indices {
@@ -474,7 +474,7 @@ func TestGetBetterIndex(t *testing.T) {
 	require.Equal(t, indices[0].Name.O, tableCase.selected)
 
 	tableCase = tableCases[1]
-	tableInfo, err = dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
+	tableInfo, err = dbutiltest.GetTableInfoBySQL(tableCase.createTableSQL, parser.New())
 	require.NoError(t, err)
 	indices = dbutil.FindAllIndex(tableInfo)
 	for i, index := range indices {
@@ -493,7 +493,6 @@ func TestGetBetterIndex(t *testing.T) {
 	indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo)
 	require.NoError(t, err)
 	require.Equal(t, indices[0].Name.O, tableCase.selected)
-
 }
 
 func TestCalculateChunkSize(t *testing.T) {
@@ -523,7 +522,7 @@ func TestGetChunkIDFromSQLFileName(t *testing.T) {
 
 func TestCompareStruct(t *testing.T) {
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	var isEqual bool
@@ -534,7 +533,7 @@ func TestCompareStruct(t *testing.T) {
 
 	// column length different
 	createTableSQL2 := "create table `test`(`a` int, `b` varchar(10), `c` float, primary key(`a`, `b`), index(`c`))"
-	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -543,7 +542,7 @@ func TestCompareStruct(t *testing.T) {
 
 	// column name differernt
 	createTableSQL2 = "create table `test`(`aa` int, `b` varchar(10), `c` float, `d` datetime, primary key(`aa`, `b`), index(`c`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -552,7 +551,7 @@ func TestCompareStruct(t *testing.T) {
 
 	// column type compatible
 	createTableSQL2 = "create table `test`(`a` int, `b` char(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -560,7 +559,7 @@ func TestCompareStruct(t *testing.T) {
 	require.False(t, isPanic)
 
 	createTableSQL2 = "create table `test`(`a` int(11), `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -569,7 +568,7 @@ func TestCompareStruct(t *testing.T) {
 
 	// column type not compatible
 	createTableSQL2 = "create table `test`(`a` int, `b` varchar(10), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -578,7 +577,7 @@ func TestCompareStruct(t *testing.T) {
 
 	// column properties not compatible
 	createTableSQL2 = "create table `test`(`a` int, `b` varchar(11), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -589,7 +588,7 @@ func TestCompareStruct(t *testing.T) {
 
 	// index different
 	createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -600,11 +599,11 @@ func TestCompareStruct(t *testing.T) {
 
 	// index column different
 	createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))"
-	tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `c`), index(`c`))"
-	tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo)
@@ -612,7 +611,6 @@ func TestCompareStruct(t *testing.T) {
 	require.False(t, isPanic)
 	require.Equal(t, len(tableInfo.Indices), 1)
 	require.Equal(t, tableInfo.Indices[0].Name.O, "c")
-
 }
 
 func TestGenerateSQLBlob(t *testing.T) {
@@ -631,7 +629,7 @@ func TestGenerateSQLBlob(t *testing.T) {
 	}
 
 	for _, c := range cases {
-		tableInfo, err := dbutil.GetTableInfoBySQL(c.createTableSql, parser.New())
+		tableInfo, err := dbutiltest.GetTableInfoBySQL(c.createTableSql, parser.New())
 		require.NoError(t, err)
 
 		replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test")
@@ -643,7 +641,7 @@ func TestGenerateSQLBlob(t *testing.T) {
 
 func TestCompareBlob(t *testing.T) {
 	createTableSQL := "create table `test`.`test`(`a` int primary key, `b` blob)"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	_, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123")

From 19d3529a10a531b453e27bdaa3a7c2848846fee7 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 17:03:45 +0800
Subject: [PATCH 05/22] Fix lint

---
 go.mod                                        |   2 +-
 .../checkpoints/checkpoints.go                |  40 +--
 .../checkpoints/checkpoints_test.go           |   6 +-
 sync_diff_inspector/chunk/chunk.go            | 161 +++++++-----
 sync_diff_inspector/chunk/chunk_test.go       |  12 +-
 sync_diff_inspector/config/config.go          |  26 +-
 sync_diff_inspector/config/config_test.go     |   2 +-
 sync_diff_inspector/config/dm.go              |  13 +-
 sync_diff_inspector/config/template.go        |   3 +-
 sync_diff_inspector/diff/diff.go              |  57 ++--
 sync_diff_inspector/main.go                   |   6 +-
 sync_diff_inspector/progress/progress.go      | 243 +++++++++---------
 sync_diff_inspector/progress/progress_test.go |   4 +-
 sync_diff_inspector/report/report.go          |  19 +-
 sync_diff_inspector/report/report_test.go     | 118 ++++-----
 sync_diff_inspector/source/chunks_iter.go     |   7 +-
 .../source/common/common_test.go              |   3 +-
 sync_diff_inspector/source/common/rows.go     |  14 +-
 .../source/common/table_diff.go               |   8 +-
 sync_diff_inspector/source/mysql_shard.go     |  49 +++-
 sync_diff_inspector/source/source.go          |  33 ++-
 sync_diff_inspector/source/source_test.go     |  44 ++--
 sync_diff_inspector/source/tidb.go            |  37 ++-
 sync_diff_inspector/splitter/bucket.go        |   7 +
 .../splitter/index_fields_test.go             |   8 +-
 sync_diff_inspector/splitter/limit.go         |  15 +-
 sync_diff_inspector/splitter/random.go        |  13 +-
 sync_diff_inspector/splitter/splitter.go      |  11 +-
 sync_diff_inspector/splitter/splitter_test.go |   4 +-
 sync_diff_inspector/utils/pd.go               |   2 +
 sync_diff_inspector/utils/table.go            |  21 +-
 sync_diff_inspector/utils/utils.go            |  55 ++--
 sync_diff_inspector/utils/utils_test.go       |  26 +-
 33 files changed, 618 insertions(+), 451 deletions(-)

diff --git a/go.mod b/go.mod
index 8ce5bf41de4..d0aec8d663a 100644
--- a/go.mod
+++ b/go.mod
@@ -346,7 +346,7 @@ require (
 	github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c // indirect
 	github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect
 	github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect
-	github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 // indirect
+	github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726
 	github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go
index 8863cad1c51..82b4def15ec 100644
--- a/sync_diff_inspector/checkpoints/checkpoints.go
+++ b/sync_diff_inspector/checkpoints/checkpoints.go
@@ -30,22 +30,23 @@ import (
 )
 
 const (
-	// SuccessState
-	// for chunk: means this chunk's data is equal
+	// SuccessState means
+	// for chunk: this chunk's data is equal
 	// for table: means this all chunk in this table is equal(except ignore chunk)
 	SuccessState = "success"
 
-	// FailedState
-	// for chunk: means this chunk's data is not equal
-	// for table: means some chunks' data is not equal or some chunk check failed in this table
+	// FailedState means
+	// for chunk: this chunk's data is not equal
+	// for table: some chunks' data is not equal or some chunk check failed in this table
 	FailedState = "failed"
 
-	// IgnoreState
+	// IgnoreState means
 	// for chunk: this chunk is ignored. if it is Empty chunk, will ignore some chunk
 	// for table: don't have this state
 	IgnoreState = "ignore"
 )
 
+// Node is the struct for node
 type Node struct {
 	State string `json:"state"` // indicate the state ("success" or "failed") of the chunk
 
@@ -53,16 +54,22 @@ type Node struct {
 	IndexID    int64        `json:"index-id"`
 }
 
-func (n *Node) GetID() *chunk.ChunkID { return n.ChunkRange.Index }
+// GetID returns id from the node
+func (n *Node) GetID() *chunk.CID { return n.ChunkRange.Index }
 
+// GetState returns the state from the node
 func (n *Node) GetState() string { return n.State }
 
+// GetTableIndex returns table index
 func (n *Node) GetTableIndex() int { return n.ChunkRange.Index.TableIndex }
 
+// GetBucketIndexLeft returns BucketIndexLeft
 func (n *Node) GetBucketIndexLeft() int { return n.ChunkRange.Index.BucketIndexLeft }
 
+// GetBucketIndexRight returns BucketIndexRight
 func (n *Node) GetBucketIndexRight() int { return n.ChunkRange.Index.BucketIndexRight }
 
+// GetChunkIndex returns ChunkIndex
 func (n *Node) GetChunkIndex() int { return n.ChunkRange.Index.ChunkIndex }
 
 // IsAdjacent represents whether the next node is adjacent node.
@@ -121,7 +128,7 @@ type Checkpoint struct {
 	hp *nodeHeap
 }
 
-// SaveState contains the information of the latest checked chunk and state of `report`
+// SavedState contains the information of the latest checked chunk and state of `report`
 // When sync-diff start from the checkpoint, it will load this information and continue running
 type SavedState struct {
 	Chunk  *Node          `json:"chunk-info"`
@@ -133,37 +140,39 @@ func (cp *Checkpoint) InitCurrentSavedID(n *Node) {
 	cp.hp.CurrentSavedNode = n
 }
 
+// GetCurrentSavedID returns the saved id with lock
 func (cp *Checkpoint) GetCurrentSavedID() *Node {
 	cp.hp.mu.Lock()
 	defer cp.hp.mu.Unlock()
 	return cp.hp.CurrentSavedNode
 }
 
+// Insert inserts a new node
 func (cp *Checkpoint) Insert(node *Node) {
 	cp.hp.mu.Lock()
 	heap.Push(cp.hp, node)
 	cp.hp.mu.Unlock()
 }
 
-// Len - get the length of the heap
+// Len gets the length of the heap
 func (hp *nodeHeap) Len() int { return len(hp.Nodes) }
 
-// Less - determine which is more priority than another
+// Less determines which is more priority than another
 func (hp *nodeHeap) Less(i, j int) bool {
 	return hp.Nodes[i].IsLess(hp.Nodes[j])
 }
 
-// Swap - implementation of swap for the heap interface
+// Swap implementation of swap for the heap interface
 func (hp *nodeHeap) Swap(i, j int) {
 	hp.Nodes[i], hp.Nodes[j] = hp.Nodes[j], hp.Nodes[i]
 }
 
-// Push - implementation of push for the heap interface
+// Push implementation of push for the heap interface
 func (hp *nodeHeap) Push(x interface{}) {
 	hp.Nodes = append(hp.Nodes, x.(*Node))
 }
 
-// Pop - implementation of pop for heap interface
+// Pop implementation of pop for heap interface
 func (hp *nodeHeap) Pop() (item interface{}) {
 	if len(hp.Nodes) == 0 {
 		return
@@ -173,13 +182,14 @@ func (hp *nodeHeap) Pop() (item interface{}) {
 	return
 }
 
+// Init initialize the Checkpoint
 func (cp *Checkpoint) Init() {
 	hp := &nodeHeap{
 		mu:    &sync.Mutex{},
 		Nodes: make([]*Node, 0),
 		CurrentSavedNode: &Node{
 			ChunkRange: &chunk.Range{
-				Index:   chunk.GetInitChunkID(),
+				Index:   chunk.GetInitCID(),
 				IsFirst: true,
 				IsLast:  true,
 			},
@@ -202,7 +212,7 @@ func (cp *Checkpoint) GetChunkSnapshot() (cur *Node) {
 }
 
 // SaveChunk saves the chunk to file.
-func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, reportInfo *report.Report) (*chunk.ChunkID, error) {
+func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, reportInfo *report.Report) (*chunk.CID, error) {
 	if cur == nil {
 		return nil, nil
 	}
diff --git a/sync_diff_inspector/checkpoints/checkpoints_test.go b/sync_diff_inspector/checkpoints/checkpoints_test.go
index 29b1a76a586..eb73274ae5b 100644
--- a/sync_diff_inspector/checkpoints/checkpoints_test.go
+++ b/sync_diff_inspector/checkpoints/checkpoints_test.go
@@ -41,7 +41,7 @@ func TestSaveChunk(t *testing.T) {
 		go func(i int) {
 			node := &Node{
 				ChunkRange: &chunk.Range{
-					Index: &chunk.ChunkID{
+					Index: &chunk.CID{
 						TableIndex:       0,
 						BucketIndexLeft:  i / 10,
 						BucketIndexRight: i / 10,
@@ -74,7 +74,7 @@ func TestSaveChunk(t *testing.T) {
 	require.NotNil(t, cur)
 	id, err = checker.SaveChunk(ctx, "TestSaveChunk", cur, nil)
 	require.NoError(t, err)
-	require.Equal(t, id.Compare(&chunk.ChunkID{TableIndex: 0, BucketIndexLeft: 9, BucketIndexRight: 9, ChunkIndex: 9}), 0)
+	require.Equal(t, id.Compare(&chunk.CID{TableIndex: 0, BucketIndexLeft: 9, BucketIndexRight: 9, ChunkIndex: 9}), 0)
 }
 
 func TestLoadChunk(t *testing.T) {
@@ -96,7 +96,7 @@ func TestLoadChunk(t *testing.T) {
 							HasUpper: i != rounds,
 						},
 					},
-					Index: &chunk.ChunkID{
+					Index: &chunk.CID{
 						TableIndex:       0,
 						BucketIndexLeft:  i / 10,
 						BucketIndexRight: i / 10,
diff --git a/sync_diff_inspector/chunk/chunk.go b/sync_diff_inspector/chunk/chunk.go
index 6943f413d96..91ef5f27d7a 100644
--- a/sync_diff_inspector/chunk/chunk.go
+++ b/sync_diff_inspector/chunk/chunk.go
@@ -31,10 +31,12 @@ const (
 	gt  = ">"
 )
 
-type ChunkType int
+// Type is the type of the chunk
+type Type int
 
+// List all chunk types
 const (
-	Bucket ChunkType = iota + 1
+	Bucket Type = iota + 1
 	Random
 	Limit
 	Others
@@ -51,8 +53,8 @@ type Bound struct {
 	HasUpper bool `json:"has-upper"`
 }
 
-// ChunkID is to identify the sequence of chunks
-type ChunkID struct {
+// CID is to identify the sequence of chunks
+type CID struct {
 	TableIndex int `json:"table-index"`
 	// we especially treat random split has only one bucket
 	// which is the whole table
@@ -66,8 +68,9 @@ type ChunkID struct {
 	ChunkCnt int `json:"chunk-count"`
 }
 
-func GetInitChunkID() *ChunkID {
-	return &ChunkID{
+// GetInitCID return an empty CID
+func GetInitCID() *CID {
+	return &CID{
 		TableIndex:       -1,
 		BucketIndexLeft:  -1,
 		BucketIndexRight: -1,
@@ -76,7 +79,8 @@ func GetInitChunkID() *ChunkID {
 	}
 }
 
-func (c *ChunkID) Compare(o *ChunkID) int {
+// Compare compare two CIDs
+func (c *CID) Compare(o *CID) int {
 	if c.TableIndex < o.TableIndex {
 		return -1
 	}
@@ -101,16 +105,19 @@ func (c *ChunkID) Compare(o *ChunkID) int {
 	return 1
 }
 
-func (c *ChunkID) Copy() *ChunkID {
+// Copy return a same CID
+func (c *CID) Copy() *CID {
 	cp := *c
 	return &cp
 }
 
-func (c *ChunkID) ToString() string {
+// ToString return string for CID
+func (c *CID) ToString() string {
 	return fmt.Sprintf("%d:%d-%d:%d:%d", c.TableIndex, c.BucketIndexLeft, c.BucketIndexRight, c.ChunkIndex, c.ChunkCnt)
 }
 
-func (c *ChunkID) FromString(s string) error {
+// FromString get CID from given string
+func (c *CID) FromString(s string) error {
 	ids := strings.Split(s, ":")
 	tableIndex, err := strconv.Atoi(ids[0])
 	if err != nil {
@@ -141,11 +148,11 @@ func (c *ChunkID) FromString(s string) error {
 
 // Range represents chunk range
 type Range struct {
-	Index   *ChunkID  `json:"index"`
-	Type    ChunkType `json:"type"`
-	Bounds  []*Bound  `json:"bounds"`
-	IsFirst bool      `json:"is-first"`
-	IsLast  bool      `json:"is-last"`
+	Index   *CID     `json:"index"`
+	Type    Type     `json:"type"`
+	Bounds  []*Bound `json:"bounds"`
+	IsFirst bool     `json:"is-first"`
+	IsLast  bool     `json:"is-last"`
 
 	Where string        `json:"where"`
 	Args  []interface{} `json:"args"`
@@ -153,10 +160,12 @@ type Range struct {
 	columnOffset map[string]int
 }
 
+// IsFirstChunkForBucket return true if it's the first chunk
 func (r *Range) IsFirstChunkForBucket() bool {
 	return r.Index.ChunkIndex == 0
 }
 
+// IsLastChunkForBucket return true if it's the last chunk
 func (r *Range) IsLastChunkForBucket() bool {
 	return r.Index.ChunkIndex == r.Index.ChunkCnt-1
 }
@@ -166,7 +175,7 @@ func NewChunkRange() *Range {
 	return &Range{
 		Bounds:       make([]*Bound, 0, 2),
 		columnOffset: make(map[string]int),
-		Index:        &ChunkID{},
+		Index:        &CID{},
 	}
 }
 
@@ -186,12 +195,13 @@ func NewChunkRangeOffset(columnOffset map[string]int) *Range {
 	}
 }
 
-func (c *Range) IsLastChunkForTable() bool {
-	if c.IsLast {
+// IsLastChunkForTable return true if it's the last chunk
+func (r *Range) IsLastChunkForTable() bool {
+	if r.IsLast {
 		return true
 	}
 	// calculate from bounds
-	for _, b := range c.Bounds {
+	for _, b := range r.Bounds {
 		if b.HasUpper {
 			return false
 		}
@@ -199,12 +209,13 @@ func (c *Range) IsLastChunkForTable() bool {
 	return true
 }
 
-func (c *Range) IsFirstChunkForTable() bool {
-	if c.IsFirst {
+// IsFirstChunkForTable return true if it's the first chunk
+func (r *Range) IsFirstChunkForTable() bool {
+	if r.IsFirst {
 		return true
 	}
 	// calculate from bounds
-	for _, b := range c.Bounds {
+	for _, b := range r.Bounds {
 		if b.HasLower {
 			return false
 		}
@@ -213,8 +224,8 @@ func (c *Range) IsFirstChunkForTable() bool {
 }
 
 // String returns the string of Range, used for log.
-func (c *Range) String() string {
-	chunkBytes, err := json.Marshal(c)
+func (r *Range) String() string {
+	chunkBytes, err := json.Marshal(r)
 	if err != nil {
 		log.Warn("fail to encode chunk into string", zap.Error(err))
 		return ""
@@ -223,7 +234,8 @@ func (c *Range) String() string {
 	return string(chunkBytes)
 }
 
-func (c *Range) ToString(collation string) (string, []interface{}) {
+// ToString return string for range
+func (r *Range) ToString(collation string) (string, []interface{}) {
 	if collation != "" {
 		collation = fmt.Sprintf(" COLLATE '%s'", collation)
 	}
@@ -246,8 +258,8 @@ func (c *Range) ToString(collation string) (string, []interface{}) {
 	preConditionArgsForUpper := make([]interface{}, 0, 1)
 
 	i := 0
-	for ; i < len(c.Bounds); i++ {
-		bound := c.Bounds[i]
+	for ; i < len(r.Bounds); i++ {
+		bound := r.Bounds[i]
 		if !(bound.HasLower && bound.HasUpper) {
 			break
 		}
@@ -260,16 +272,16 @@ func (c *Range) ToString(collation string) (string, []interface{}) {
 		sameArgs = append(sameArgs, bound.Lower)
 	}
 
-	if i == len(c.Bounds) && i > 0 {
+	if i == len(r.Bounds) && i > 0 {
 		// All the columns are equal in bounds, should return FALSE!
 		return "FALSE", nil
 	}
 
-	for ; i < len(c.Bounds); i++ {
-		bound := c.Bounds[i]
+	for ; i < len(r.Bounds); i++ {
+		bound := r.Bounds[i]
 		lowerSymbol := gt
 		upperSymbol := lt
-		if i == len(c.Bounds)-1 {
+		if i == len(r.Bounds)-1 {
 			upperSymbol = lte
 		}
 
@@ -312,28 +324,29 @@ func (c *Range) ToString(collation string) (string, []interface{}) {
 		}
 
 		return fmt.Sprintf("(%s) AND (%s)", strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(lowerArgs, upperArgs...)
-	} else {
-		if len(upperCondition) == 0 && len(lowerCondition) == 0 {
-			return strings.Join(sameCondition, " AND "), sameArgs
-		}
+	}
 
-		if len(upperCondition) == 0 {
-			return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR ")), append(sameArgs, lowerArgs...)
-		}
+	if len(upperCondition) == 0 && len(lowerCondition) == 0 {
+		return strings.Join(sameCondition, " AND "), sameArgs
+	}
 
-		if len(lowerCondition) == 0 {
-			return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(upperCondition, " OR ")), append(sameArgs, upperArgs...)
-		}
+	if len(upperCondition) == 0 {
+		return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR ")), append(sameArgs, lowerArgs...)
+	}
 
-		return fmt.Sprintf("(%s) AND (%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(append(sameArgs, lowerArgs...), upperArgs...)
+	if len(lowerCondition) == 0 {
+		return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(upperCondition, " OR ")), append(sameArgs, upperArgs...)
 	}
+
+	return fmt.Sprintf("(%s) AND (%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(append(sameArgs, lowerArgs...), upperArgs...)
 }
 
-func (c *Range) ToMeta() string {
+// ToMeta return string for range
+func (r *Range) ToMeta() string {
 	lowerCondition := make([]string, 0, 1)
 	upperCondition := make([]string, 0, 1)
 	columnName := make([]string, 0, 1)
-	for _, bound := range c.Bounds {
+	for _, bound := range r.Bounds {
 		columnName = append(columnName, bound.Column)
 		if bound.HasLower {
 			lowerCondition = append(lowerCondition, bound.Lower)
@@ -354,28 +367,29 @@ func (c *Range) ToMeta() string {
 	return fmt.Sprintf("range in sequence: (%s) < (%s) <= (%s)", strings.Join(lowerCondition, ","), strings.Join(columnName, ","), strings.Join(upperCondition, ","))
 }
 
-func (c *Range) addBound(bound *Bound) {
-	c.Bounds = append(c.Bounds, bound)
-	c.columnOffset[bound.Column] = len(c.Bounds) - 1
+func (r *Range) addBound(bound *Bound) {
+	r.Bounds = append(r.Bounds, bound)
+	r.columnOffset[bound.Column] = len(r.Bounds) - 1
 }
 
-func (c *Range) Update(column, lower, upper string, updateLower, updateUpper bool) {
-	if offset, ok := c.columnOffset[column]; ok {
+// Update update the range
+func (r *Range) Update(column, lower, upper string, updateLower, updateUpper bool) {
+	if offset, ok := r.columnOffset[column]; ok {
 		// update the bound
 		if updateLower {
-			c.Bounds[offset].Lower = lower
-			c.Bounds[offset].HasLower = true
+			r.Bounds[offset].Lower = lower
+			r.Bounds[offset].HasLower = true
 		}
 		if updateUpper {
-			c.Bounds[offset].Upper = upper
-			c.Bounds[offset].HasUpper = true
+			r.Bounds[offset].Upper = upper
+			r.Bounds[offset].HasUpper = true
 		}
 
 		return
 	}
 
 	// add a new bound
-	c.addBound(&Bound{
+	r.addBound(&Bound{
 		Column:   column,
 		Lower:    lower,
 		Upper:    upper,
@@ -384,9 +398,10 @@ func (c *Range) Update(column, lower, upper string, updateLower, updateUpper boo
 	})
 }
 
-func (c *Range) Copy() *Range {
+// Copy return a new range
+func (r *Range) Copy() *Range {
 	newChunk := NewChunkRange()
-	for _, bound := range c.Bounds {
+	for _, bound := range r.Bounds {
 		newChunk.addBound(&Bound{
 			Column:   bound.Column,
 			Lower:    bound.Lower,
@@ -399,9 +414,10 @@ func (c *Range) Copy() *Range {
 	return newChunk
 }
 
-func (c *Range) Clone() *Range {
+// Clone return a new range
+func (r *Range) Clone() *Range {
 	newChunk := NewChunkRange()
-	for _, bound := range c.Bounds {
+	for _, bound := range r.Bounds {
 		newChunk.addBound(&Bound{
 			Column:   bound.Column,
 			Lower:    bound.Lower,
@@ -410,28 +426,30 @@ func (c *Range) Clone() *Range {
 			HasUpper: bound.HasUpper,
 		})
 	}
-	newChunk.Type = c.Type
-	newChunk.Where = c.Where
-	newChunk.Args = c.Args
-	for i, v := range c.columnOffset {
+	newChunk.Type = r.Type
+	newChunk.Where = r.Where
+	newChunk.Args = r.Args
+	for i, v := range r.columnOffset {
 		newChunk.columnOffset[i] = v
 	}
-	newChunk.Index = c.Index.Copy()
-	newChunk.IsFirst = c.IsFirst
-	newChunk.IsLast = c.IsLast
+	newChunk.Index = r.Index.Copy()
+	newChunk.IsFirst = r.IsFirst
+	newChunk.IsLast = r.IsLast
 	return newChunk
 }
 
-func (c *Range) CopyAndUpdate(column, lower, upper string, updateLower, updateUpper bool) *Range {
-	newChunk := c.Copy()
+// CopyAndUpdate update the range
+func (r *Range) CopyAndUpdate(column, lower, upper string, updateLower, updateUpper bool) *Range {
+	newChunk := r.Copy()
 	newChunk.Update(column, lower, upper, updateLower, updateUpper)
 	return newChunk
 }
 
+// InitChunks init the given chunks
 // Notice: chunk may contain not only one bucket, which can be expressed as a range [3, 5],
 //
 //	And `lastBucketID` means the `5` and `firstBucketID` means the `3`.
-func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, index int, collation, limits string, chunkCnt int) {
+func InitChunks(chunks []*Range, t Type, firstBucketID, lastBucketID int, index int, collation, limits string, chunkCnt int) {
 	if chunks == nil {
 		return
 	}
@@ -439,7 +457,7 @@ func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, i
 		conditions, args := chunk.ToString(collation)
 		chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits)
 		chunk.Args = args
-		chunk.Index = &ChunkID{
+		chunk.Index = &CID{
 			BucketIndexLeft:  firstBucketID,
 			BucketIndexRight: lastBucketID,
 			ChunkIndex:       index,
@@ -450,11 +468,12 @@ func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, i
 	}
 }
 
-func InitChunk(chunk *Range, t ChunkType, firstBucketID, lastBucketID int, collation, limits string) {
+// InitChunk initialize the chunk
+func InitChunk(chunk *Range, t Type, firstBucketID, lastBucketID int, collation, limits string) {
 	conditions, args := chunk.ToString(collation)
 	chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits)
 	chunk.Args = args
-	chunk.Index = &ChunkID{
+	chunk.Index = &CID{
 		BucketIndexLeft:  firstBucketID,
 		BucketIndexRight: lastBucketID,
 		ChunkIndex:       0,
diff --git a/sync_diff_inspector/chunk/chunk_test.go b/sync_diff_inspector/chunk/chunk_test.go
index 694bea6f949..31dca1caa06 100644
--- a/sync_diff_inspector/chunk/chunk_test.go
+++ b/sync_diff_inspector/chunk/chunk_test.go
@@ -511,7 +511,7 @@ func TestChunkCopyAndUpdate(t *testing.T) {
 }
 
 func TestChunkID(t *testing.T) {
-	chunkIDBase := &ChunkID{
+	chunkIDBase := &CID{
 		TableIndex:       2,
 		BucketIndexLeft:  2,
 		BucketIndexRight: 2,
@@ -521,11 +521,11 @@ func TestChunkID(t *testing.T) {
 
 	str := chunkIDBase.ToString()
 	require.Equal(t, str, "2:2-2:2:4")
-	chunkIDtmp := &ChunkID{}
+	chunkIDtmp := &CID{}
 	chunkIDtmp.FromString(str)
 	require.Equal(t, chunkIDBase.Compare(chunkIDtmp), 0)
 
-	chunkIDSmalls := []*ChunkID{
+	chunkIDSmalls := []*CID{
 		{
 			TableIndex:       1,
 			BucketIndexLeft:  3,
@@ -557,12 +557,12 @@ func TestChunkID(t *testing.T) {
 		require.Equal(t, chunkIDBase.Compare(chunkIDSmall), 1)
 		str = chunkIDSmall.ToString()
 		require.Equal(t, str, stringRes[i])
-		chunkIDtmp = &ChunkID{}
+		chunkIDtmp = &CID{}
 		chunkIDtmp.FromString(str)
 		require.Equal(t, chunkIDSmall.Compare(chunkIDtmp), 0)
 	}
 
-	chunkIDLarges := []*ChunkID{
+	chunkIDLarges := []*CID{
 		{
 			TableIndex:       3,
 			BucketIndexLeft:  1,
@@ -594,7 +594,7 @@ func TestChunkID(t *testing.T) {
 		require.Equal(t, chunkIDBase.Compare(chunkIDLarge), -1)
 		str = chunkIDLarge.ToString()
 		require.Equal(t, str, stringRes[i])
-		chunkIDtmp = &ChunkID{}
+		chunkIDtmp = &CID{}
 		chunkIDtmp.FromString(str)
 		require.Equal(t, chunkIDLarge.Compare(chunkIDtmp), 0)
 	}
diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go
index 789176ed37d..11f613a299e 100644
--- a/sync_diff_inspector/config/config.go
+++ b/sync_diff_inspector/config/config.go
@@ -44,13 +44,17 @@ import (
 )
 
 const (
-	LocalDirPerm  os.FileMode = 0o755
+	// LocalFilePerm is the permission for local files
 	LocalFilePerm os.FileMode = 0o644
 
+	localDirPerm os.FileMode = 0o755
+
+	// LogFileName is the filename of the log
 	LogFileName = "sync_diff.log"
 
 	baseSplitThreadCount = 3
 
+	// UnifiedTimeZone is the time zone
 	UnifiedTimeZone string = "+0:00"
 )
 
@@ -92,7 +96,7 @@ func (t *TableConfig) Valid() bool {
 	return true
 }
 
-// TLS Security wrapper
+// Security is the wrapper for TLS Security
 type Security struct {
 	TLSName string `json:"tls-name"`
 
@@ -112,7 +116,7 @@ type DataSource struct {
 	Port     int                `toml:"port" json:"port"`
 	User     string             `toml:"user" json:"user"`
 	Password utils.SecretString `toml:"password" json:"password"`
-	SqlMode  string             `toml:"sql-mode" json:"sql-mode"`
+	SQLMode  string             `toml:"sql-mode" json:"sql-mode"`
 	Snapshot string             `toml:"snapshot" json:"snapshot"`
 
 	Security *Security `toml:"security" json:"security"`
@@ -136,6 +140,7 @@ func (d *DataSource) SetSnapshot(newSnapshot string) {
 	d.Snapshot = newSnapshot
 }
 
+// ToDBConfig get the current config from data source
 func (d *DataSource) ToDBConfig() *dbutil.DBConfig {
 	return &dbutil.DBConfig{
 		Host:     d.Host,
@@ -146,7 +151,7 @@ func (d *DataSource) ToDBConfig() *dbutil.DBConfig {
 	}
 }
 
-// register TLS config for driver
+// RegisterTLS register TLS config for driver
 func (d *DataSource) RegisterTLS() error {
 	if d.Security == nil {
 		return nil
@@ -173,6 +178,7 @@ func (d *DataSource) RegisterTLS() error {
 	return errors.Trace(err)
 }
 
+// ToDriverConfig get the driver config
 func (d *DataSource) ToDriverConfig() *mysql.Config {
 	cfg := mysql.NewConfig()
 	cfg.Params = make(map[string]string)
@@ -195,6 +201,7 @@ func (d *DataSource) ToDriverConfig() *mysql.Config {
 	return cfg
 }
 
+// TaskConfig is the config for sync diff
 type TaskConfig struct {
 	Source       []string `toml:"source-instances" json:"source-instances"`
 	Routes       []string `toml:"source-routes" json:"source-routes"`
@@ -219,6 +226,7 @@ type TaskConfig struct {
 	HashFile      string
 }
 
+// Init return a new config
 func (t *TaskConfig) Init(
 	dataSources map[string]*DataSource,
 	tableConfigs map[string]*TableConfig,
@@ -310,7 +318,7 @@ func (t *TaskConfig) Init(
 		}
 		if !ok {
 			// not match, raise error
-			return errors.Errorf("config changes breaking the checkpoint, please use another outputDir and start over again!")
+			return errors.Errorf("config changes breaking the checkpoint, please use another outputDir and start over again")
 		}
 	}
 
@@ -522,7 +530,7 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) {
 		Port:     subTaskCfgs[0].To.Port,
 		User:     subTaskCfgs[0].To.User,
 		Password: utils.SecretString(subTaskCfgs[0].To.Password),
-		SqlMode:  sqlMode,
+		SQLMode:  sqlMode,
 		Security: parseTLSFromDMConfig(subTaskCfgs[0].To.Security),
 	}
 	for _, subTaskCfg := range subTaskCfgs {
@@ -543,7 +551,7 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) {
 			Port:     subTaskCfg.From.Port,
 			User:     subTaskCfg.From.User,
 			Password: utils.SecretString(subTaskCfg.From.Password),
-			SqlMode:  sqlMode,
+			SQLMode:  sqlMode,
 			Security: parseTLSFromDMConfig(subTaskCfg.From.Security),
 			Router:   tableRouter,
 
@@ -561,6 +569,7 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) {
 	return nil
 }
 
+// Init initialize the config
 func (c *Config) Init() (err error) {
 	if len(c.DMAddr) > 0 {
 		err := c.adjustConfigByDMSubTasks()
@@ -599,6 +608,7 @@ func (c *Config) Init() (err error) {
 	return nil
 }
 
+// CheckConfig check whether the config is vaild
 func (c *Config) CheckConfig() bool {
 	if c.CheckThreadCount <= 0 {
 		log.Error("check-thread-count must greater than 0!")
@@ -636,7 +646,7 @@ func pathExists(_path string) (bool, error) {
 
 func mkdirAll(base string) error {
 	mask := syscall.Umask(0)
-	err := os.MkdirAll(base, LocalDirPerm)
+	err := os.MkdirAll(base, localDirPerm)
 	syscall.Umask(mask)
 	return errors.Trace(err)
 }
diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go
index 32d2a9f40ce..5a36caa4504 100644
--- a/sync_diff_inspector/config/config_test.go
+++ b/sync_diff_inspector/config/config_test.go
@@ -91,7 +91,7 @@ func TestNoSecretLeak(t *testing.T) {
 		Port:     5432,
 		User:     "postgres",
 		Password: "AVeryV#ryStr0ngP@ssw0rd",
-		SqlMode:  "MYSQL",
+		SQLMode:  "MYSQL",
 		Snapshot: "2022/10/24",
 	}
 	cfg := &Config{}
diff --git a/sync_diff_inspector/config/dm.go b/sync_diff_inspector/config/dm.go
index 4591b74f79e..5179414036d 100644
--- a/sync_diff_inspector/config/dm.go
+++ b/sync_diff_inspector/config/dm.go
@@ -15,6 +15,7 @@ package config
 
 import (
 	"bytes"
+	"context"
 	"crypto/aes"
 	"crypto/cipher"
 	"encoding/base64"
@@ -33,7 +34,6 @@ import (
 	"github.com/pingcap/tiflow/dm/pb"
 	"github.com/pingcap/tiflow/dm/pkg/terror"
 	"github.com/pingcap/tiflow/pkg/column-mapping"
-	flag "github.com/spf13/pflag"
 	"go.uber.org/zap"
 )
 
@@ -53,7 +53,7 @@ func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) {
 		// TLSClientConfig: tlsCfg,
 	}
 	client := &http.Client{Transport: tr}
-	req, err := http.NewRequest("GET", getDMTaskCfgURL(dmAddr, task), nil)
+	req, err := http.NewRequestWithContext(context.Background(), "GET", getDMTaskCfgURL(dmAddr, task), nil)
 	if err != nil {
 		return nil, err
 	}
@@ -95,12 +95,6 @@ func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) {
 
 // SubTaskConfig is the configuration for SubTask.
 type SubTaskConfig struct {
-	// BurntSushi/toml seems have a bug for flag "-"
-	// when doing encoding, if we use `toml:"-"`, it still try to encode it
-	// and it will panic because of unsupported type (reflect.Func)
-	// so we should not export flagSet
-	flagSet *flag.FlagSet
-
 	// when in sharding, multi dm-workers do one task
 	IsSharding                bool   `toml:"is-sharding" json:"is-sharding"`
 	ShardMode                 string `toml:"shard-mode" json:"shard-mode"`
@@ -173,9 +167,6 @@ type SubTaskConfig struct {
 	// deprecated, will auto discover SQL mode
 	EnableANSIQuotes bool `toml:"ansi-quotes" json:"ansi-quotes"`
 
-	// still needed by Syncer / Loader bin
-	printVersion bool
-
 	// which DM worker is running the subtask, this will be injected when the real worker starts running the subtask(StartSubTask).
 	WorkerName string `toml:"-" json:"-"`
 	// task experimental configs
diff --git a/sync_diff_inspector/config/template.go b/sync_diff_inspector/config/template.go
index 0296856520e..189d211a53a 100644
--- a/sync_diff_inspector/config/template.go
+++ b/sync_diff_inspector/config/template.go
@@ -111,6 +111,7 @@ collation = ""
 `
 )
 
+// ExportTemplateConfig print the configType
 func ExportTemplateConfig(configType string) error {
 	switch configType {
 	case "dm", "DM", "Dm", "dM":
@@ -118,7 +119,7 @@ func ExportTemplateConfig(configType string) error {
 	case "norm", "normal", "Norm", "Normal":
 		fmt.Print(normConfig)
 	default:
-		return errors.Errorf("Error: unexpect template name: %s\n-T dm: export a dm config\n-T norm: export a normal config\n", configType)
+		return errors.Errorf("Error: unexpect template name: %s\n-T dm: export a dm config\n-T norm: export a normal config", configType)
 	}
 	return nil
 }
diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go
index 239e7a84f81..b2eec9afd17 100644
--- a/sync_diff_inspector/diff/diff.go
+++ b/sync_diff_inspector/diff/diff.go
@@ -100,6 +100,7 @@ func NewDiff(ctx context.Context, cfg *config.Config) (diff *Diff, err error) {
 	return diff, nil
 }
 
+// PrintSummary print the summary and return true if report is passed
 func (df *Diff) PrintSummary(ctx context.Context) bool {
 	// Stop updating progress bar so that summary won't be flushed.
 	progress.Close()
@@ -112,6 +113,7 @@ func (df *Diff) PrintSummary(ctx context.Context) bool {
 	return df.report.Result == report.Pass
 }
 
+// Close the current struct
 func (df *Diff) Close() {
 	if df.upstream != nil {
 		df.upstream.Close()
@@ -163,15 +165,15 @@ func (df *Diff) initCheckpoint() error {
 		node, reportInfo, err := df.cp.LoadChunk(path)
 		if err != nil {
 			return errors.Annotate(err, "the checkpoint load process failed")
-		} else {
-			// this need not be synchronized, because at the moment, the is only one thread access the section
-			log.Info("load checkpoint",
-				zap.Any("chunk index", node.GetID()),
-				zap.Reflect("chunk", node),
-				zap.String("state", node.GetState()))
-			df.cp.InitCurrentSavedID(node)
 		}
 
+		// this need not be synchronized, because at the moment, the is only one thread access the section
+		log.Info("load checkpoint",
+			zap.Any("chunk index", node.GetID()),
+			zap.Reflect("chunk", node),
+			zap.String("state", node.GetState()))
+		df.cp.InitCurrentSavedID(node)
+
 		if node != nil {
 			// remove the sql file that ID bigger than node.
 			// cause we will generate these sql again.
@@ -189,7 +191,7 @@ func (df *Diff) initCheckpoint() error {
 		}
 	} else {
 		log.Info("not found checkpoint file, start from beginning")
-		id := &chunk.ChunkID{TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, ChunkIndex: -1, ChunkCnt: 0}
+		id := &chunk.CID{TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, ChunkIndex: -1, ChunkCnt: 0}
 		err := df.removeSQLFiles(id)
 		if err != nil {
 			return errors.Trace(err)
@@ -199,7 +201,7 @@ func (df *Diff) initCheckpoint() error {
 	return nil
 }
 
-func encodeReportConfig(config *report.ReportConfig) ([]byte, error) {
+func encodeConfig(config *report.Config) ([]byte, error) {
 	buf := new(bytes.Buffer)
 	if err := toml.NewEncoder(buf).Encode(config); err != nil {
 		return nil, errors.Trace(err)
@@ -208,35 +210,35 @@ func encodeReportConfig(config *report.ReportConfig) ([]byte, error) {
 }
 
 func getConfigsForReport(cfg *config.Config) ([][]byte, []byte, error) {
-	sourceConfigs := make([]*report.ReportConfig, len(cfg.Task.SourceInstances))
+	sourceConfigs := make([]*report.Config, len(cfg.Task.SourceInstances))
 	for i := 0; i < len(cfg.Task.SourceInstances); i++ {
 		instance := cfg.Task.SourceInstances[i]
 
-		sourceConfigs[i] = &report.ReportConfig{
+		sourceConfigs[i] = &report.Config{
 			Host:     instance.Host,
 			Port:     instance.Port,
 			User:     instance.User,
 			Snapshot: instance.Snapshot,
-			SqlMode:  instance.SqlMode,
+			SQLMode:  instance.SQLMode,
 		}
 	}
 	instance := cfg.Task.TargetInstance
-	targetConfig := &report.ReportConfig{
+	targetConfig := &report.Config{
 		Host:     instance.Host,
 		Port:     instance.Port,
 		User:     instance.User,
 		Snapshot: instance.Snapshot,
-		SqlMode:  instance.SqlMode,
+		SQLMode:  instance.SQLMode,
 	}
 	sourceBytes := make([][]byte, len(sourceConfigs))
 	var err error
 	for i := range sourceBytes {
-		sourceBytes[i], err = encodeReportConfig(sourceConfigs[i])
+		sourceBytes[i], err = encodeConfig(sourceConfigs[i])
 		if err != nil {
 			return nil, nil, errors.Trace(err)
 		}
 	}
-	targetBytes, err := encodeReportConfig(targetConfig)
+	targetBytes, err := encodeConfig(targetConfig)
 	if err != nil {
 		return nil, nil, errors.Trace(err)
 	}
@@ -290,6 +292,7 @@ func (df *Diff) Equal(ctx context.Context) error {
 	return nil
 }
 
+// StructEqual compare tables from downstream
 func (df *Diff) StructEqual(ctx context.Context) error {
 	tables := df.downstream.GetTables()
 	tableIndex := 0
@@ -464,6 +467,7 @@ func (df *Diff) consume(ctx context.Context, rangeInfo *splitter.RangeInfo) bool
 	return isEqual
 }
 
+// BinGenerate ...
 func (df *Diff) BinGenerate(ctx context.Context, targetSource source.Source, tableRange *splitter.RangeInfo, count int64) (*splitter.RangeInfo, error) {
 	if count <= splitter.SplitThreshold {
 		return tableRange, nil
@@ -570,11 +574,11 @@ func (df *Diff) binSearch(ctx context.Context, targetSource source.Source, table
 			return nil, errors.Trace(err)
 		}
 		return c, nil
-	} else {
-		// TODO: handle the error to foreground
-		log.Fatal("the isEqual1 and isEqual2 cannot be both true")
-		return nil, nil
 	}
+
+	// TODO: handle the error to foreground
+	log.Fatal("the isEqual1 and isEqual2 cannot be both true")
+	return nil, nil
 }
 
 func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *splitter.RangeInfo) (bool, int64, int64, error) {
@@ -583,9 +587,9 @@ func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *spli
 	wg.Add(1)
 	go func() {
 		defer wg.Done()
-		upstreamInfo = df.upstream.GetCountAndMd5(ctx, tableRange)
+		upstreamInfo = df.upstream.GetCountAndMD5(ctx, tableRange)
 	}()
-	downstreamInfo = df.downstream.GetCountAndMd5(ctx, tableRange)
+	downstreamInfo = df.downstream.GetCountAndMD5(ctx, tableRange)
 	wg.Wait()
 
 	if upstreamInfo.Err != nil {
@@ -743,7 +747,6 @@ func (df *Diff) writeSQLs(ctx context.Context) {
 				fixSQLFile, err := os.Create(fixSQLPath)
 				if err != nil {
 					log.Fatal("write sql failed: cannot create file", zap.Strings("sql", dml.sqls), zap.Error(err))
-					continue
 				}
 				// write chunk meta
 				chunkRange := dml.node.ChunkRange
@@ -765,7 +768,7 @@ func (df *Diff) writeSQLs(ctx context.Context) {
 	}
 }
 
-func (df *Diff) removeSQLFiles(checkPointId *chunk.ChunkID) error {
+func (df *Diff) removeSQLFiles(checkPointID *chunk.CID) error {
 	ts := time.Now().Format("2006-01-02T15:04:05Z07:00")
 	dirName := fmt.Sprintf(".trash-%s", ts)
 	folderPath := filepath.Join(df.FixSQLDir, dirName)
@@ -807,17 +810,17 @@ func (df *Diff) removeSQLFiles(checkPointId *chunk.ChunkID) error {
 			if len(fileIDSubstrs) != 3 {
 				return nil
 			}
-			tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := utils.GetChunkIDFromSQLFileName(fileIDSubstrs[2])
+			tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := utils.GetCIDFromSQLFileName(fileIDSubstrs[2])
 			if err != nil {
 				return errors.Trace(err)
 			}
-			fileID := &chunk.ChunkID{
+			fileID := &chunk.CID{
 				TableIndex: tableIndex, BucketIndexLeft: bucketIndexLeft, BucketIndexRight: bucketIndexRight, ChunkIndex: chunkIndex, ChunkCnt: 0,
 			}
 			if err != nil {
 				return errors.Trace(err)
 			}
-			if fileID.Compare(checkPointId) > 0 {
+			if fileID.Compare(checkPointID) > 0 {
 				// move to trash
 				err = os.Rename(oldPath, newPath)
 				if err != nil {
diff --git a/sync_diff_inspector/main.go b/sync_diff_inspector/main.go
index 761fe3f026e..4867738c34c 100644
--- a/sync_diff_inspector/main.go
+++ b/sync_diff_inspector/main.go
@@ -117,7 +117,7 @@ func checkSyncState(ctx context.Context, cfg *config.Config) bool {
 
 	d, err := diff.NewDiff(ctx, cfg)
 	if err != nil {
-		fmt.Printf("An error occured while initializing diff: %s, please check log info in %s for full details\n",
+		fmt.Printf("An error occurred while initializing diff: %s, please check log info in %s for full details\n",
 			err, filepath.Join(cfg.Task.OutputDir, config.LogFileName))
 		log.Fatal("failed to initialize diff process", zap.Error(err))
 		return false
@@ -127,7 +127,7 @@ func checkSyncState(ctx context.Context, cfg *config.Config) bool {
 	if !cfg.CheckDataOnly {
 		err = d.StructEqual(ctx)
 		if err != nil {
-			fmt.Printf("An error occured while comparing table structure: %s, please check log info in %s for full details\n",
+			fmt.Printf("An error occurred while comparing table structure: %s, please check log info in %s for full details\n",
 				err, filepath.Join(cfg.Task.OutputDir, config.LogFileName))
 			log.Fatal("failed to check structure difference", zap.Error(err))
 			return false
@@ -138,7 +138,7 @@ func checkSyncState(ctx context.Context, cfg *config.Config) bool {
 	if !cfg.CheckStructOnly {
 		err = d.Equal(ctx)
 		if err != nil {
-			fmt.Printf("An error occured while comparing table data: %s, please check log info in %s for full details\n",
+			fmt.Printf("An error occurred while comparing table data: %s, please check log info in %s for full details\n",
 				err, filepath.Join(cfg.Task.OutputDir, config.LogFileName))
 			log.Fatal("failed to check data difference", zap.Error(err))
 			return false
diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go
index 3fed728a8d1..27f9ec7538a 100644
--- a/sync_diff_inspector/progress/progress.go
+++ b/sync_diff_inspector/progress/progress.go
@@ -24,7 +24,7 @@ import (
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
 )
 
-type TableProgressPrinter struct {
+type tableProgressPrinter struct {
 	tableList     *list.List
 	tableFailList *list.List
 	tableMap      map[string]*list.Element
@@ -38,58 +38,59 @@ type TableProgressPrinter struct {
 	progress int
 	total    int
 
-	optCh    chan Operator
+	optCh    chan operator
 	finishCh chan struct{}
 }
 
-type table_state_t int
+type tableState int
 
 const (
-	TABLE_STATE_REGISTER                       table_state_t = 0x1
-	TABLE_STATE_PRESTART                       table_state_t = 0x2
-	TABLE_STATE_COMPARING                      table_state_t = 0x4
-	TABLE_STATE_FINISH                         table_state_t = 0x8
-	TABLE_STATE_RESULT_OK                      table_state_t = 0x00
-	TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE     table_state_t = 0x10
-	TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE table_state_t = 0x20
-	TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS     table_state_t = 0x40
-	TABLE_STATE_RESULT_DIFFERENT               table_state_t = 0x80
-	TABLE_STATE_HEAD                           table_state_t = 0xff
-	TABLE_STATE_RESULT_MASK                    table_state_t = 0xff0
-	TABLE_STATE_NOT_EXSIT_UPSTREAM             table_state_t = 0x100
-	TABLE_STATE_NOT_EXSIT_DOWNSTREAM           table_state_t = 0x200
+	tableStateRegister                    tableState = 0x1
+	tableStatePrestart                    tableState = 0x2
+	tableStateComparing                   tableState = 0x4
+	tableStateFinish                      tableState = 0x8
+	tableStateResultOK                    tableState = 0x00
+	tableStateResultFailStructureDone     tableState = 0x10
+	tableStateResultFailStructureContinue tableState = 0x20
+	tableStateResultFailStructurePass     tableState = 0x40
+	tableStateResultDifferent             tableState = 0x80
+	tableStateHead                        tableState = 0xff
+	tableStateResultMask                  tableState = 0xff0
+	tableStateNotExistUpstream            tableState = 0x100
+	tableStateNotExistDownstream          tableState = 0x200
 )
 
+// TableProgress store the progress of one table
 type TableProgress struct {
 	name            string
 	progress        int
 	total           int
-	state           table_state_t
+	state           tableState
 	totalStopUpdate bool
 }
 
-type progress_opt_t int
+type progressOpt int
 
 const (
-	PROGRESS_OPT_INC progress_opt_t = iota
-	PROGRESS_OPT_UPDATE
-	PROGRESS_OPT_REGISTER
-	PROGRESS_OPT_START
-	PROGRESS_OPT_FAIL
-	PROGRESS_OPT_CLOSE
-	PROGRESS_OPT_ERROR
+	progressOptInc progressOpt = iota
+	progressOptUpdate
+	progressOptRegister
+	progressOptStart
+	progressOptFail
+	progressOptClose
+	progressOptError
 )
 
-type Operator struct {
-	optType         progress_opt_t
+type operator struct {
+	optType         progressOpt
 	name            string
 	total           int
-	state           table_state_t
+	state           tableState
 	totalStopUpdate bool
 }
 
-func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressPrinter {
-	tpp := &TableProgressPrinter{
+func newTableProgressPrinter(tableNums int, finishTableNums int) *tableProgressPrinter {
+	tpp := &tableProgressPrinter{
 		tableList:     list.New(),
 		tableFailList: list.New(),
 		tableMap:      make(map[string]*list.Element),
@@ -102,7 +103,7 @@ func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressP
 		progress: 0,
 		total:    0,
 
-		optCh:    make(chan Operator, 16),
+		optCh:    make(chan operator, 16),
 		finishCh: make(chan struct{}),
 	}
 	tpp.init()
@@ -111,77 +112,77 @@ func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressP
 	return tpp
 }
 
-func (tpp *TableProgressPrinter) SetOutput(output io.Writer) {
+func (tpp *tableProgressPrinter) SetOutput(output io.Writer) {
 	tpp.output = output
 }
 
-func (tpp *TableProgressPrinter) Inc(name string) {
-	tpp.optCh <- Operator{
-		optType: PROGRESS_OPT_INC,
+func (tpp *tableProgressPrinter) Inc(name string) {
+	tpp.optCh <- operator{
+		optType: progressOptInc,
 		name:    name,
 	}
 }
 
-func (tpp *TableProgressPrinter) UpdateTotal(name string, total int, stopUpdate bool) {
-	tpp.optCh <- Operator{
-		optType:         PROGRESS_OPT_UPDATE,
+func (tpp *tableProgressPrinter) UpdateTotal(name string, total int, stopUpdate bool) {
+	tpp.optCh <- operator{
+		optType:         progressOptUpdate,
 		name:            name,
 		total:           total,
 		totalStopUpdate: stopUpdate,
 	}
 }
 
-func (tpp *TableProgressPrinter) RegisterTable(name string, isFailed bool, isDone bool, isExist int) {
-	var state table_state_t
+func (tpp *tableProgressPrinter) RegisterTable(name string, isFailed bool, isDone bool, isExist int) {
+	var state tableState
 	if isFailed {
 		if isDone {
 			switch isExist {
 			case common.UpstreamTableLackFlag:
-				state = TABLE_STATE_NOT_EXSIT_UPSTREAM | TABLE_STATE_REGISTER
+				state = tableStateNotExistUpstream | tableStateRegister
 			case common.DownstreamTableLackFlag:
-				state = TABLE_STATE_NOT_EXSIT_DOWNSTREAM | TABLE_STATE_REGISTER
+				state = tableStateNotExistDownstream | tableStateRegister
 			default:
-				state = TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE | TABLE_STATE_REGISTER
+				state = tableStateResultFailStructureDone | tableStateRegister
 			}
 		} else {
-			state = TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE | TABLE_STATE_REGISTER
+			state = tableStateResultFailStructureContinue | tableStateRegister
 		}
 	} else {
-		state = TABLE_STATE_REGISTER
+		state = tableStateRegister
 	}
-	tpp.optCh <- Operator{
-		optType: PROGRESS_OPT_REGISTER,
+	tpp.optCh <- operator{
+		optType: progressOptRegister,
 		name:    name,
 		state:   state,
 	}
 }
 
-func (tpp *TableProgressPrinter) StartTable(name string, total int, stopUpdate bool) {
-	tpp.optCh <- Operator{
-		optType:         PROGRESS_OPT_START,
+func (tpp *tableProgressPrinter) StartTable(name string, total int, stopUpdate bool) {
+	tpp.optCh <- operator{
+		optType:         progressOptStart,
 		name:            name,
 		total:           total,
-		state:           TABLE_STATE_PRESTART,
+		state:           tableStatePrestart,
 		totalStopUpdate: stopUpdate,
 	}
 }
 
-func (tpp *TableProgressPrinter) FailTable(name string) {
-	tpp.optCh <- Operator{
-		optType: PROGRESS_OPT_FAIL,
+func (tpp *tableProgressPrinter) FailTable(name string) {
+	tpp.optCh <- operator{
+		optType: progressOptFail,
 		name:    name,
-		state:   TABLE_STATE_RESULT_DIFFERENT,
+		state:   tableStateResultDifferent,
 	}
 }
 
-func (tpp *TableProgressPrinter) Close() {
-	tpp.optCh <- Operator{
-		optType: PROGRESS_OPT_CLOSE,
+func (tpp *tableProgressPrinter) Close() {
+	tpp.optCh <- operator{
+		optType: progressOptClose,
 	}
 	<-tpp.finishCh
 }
 
-func (tpp *TableProgressPrinter) PrintSummary() {
+func (tpp *tableProgressPrinter) PrintSummary() {
 	var cleanStr, fixStr string
 	cleanStr = "\x1b[1A\x1b[J"
 	fixStr = "\nSummary:\n\n"
@@ -195,17 +196,17 @@ func (tpp *TableProgressPrinter) PrintSummary() {
 		SkippedNum := 0
 		for p := tpp.tableFailList.Front(); p != nil; p = p.Next() {
 			tp := p.Value.(*TableProgress)
-			if tp.state&(TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE|TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE) != 0 {
+			if tp.state&(tableStateResultFailStructureDone|tableStateResultFailStructureContinue) != 0 {
 				fixStr = fmt.Sprintf("%sThe structure of %s is not equal.\n", fixStr, tp.name)
 			}
-			if tp.state&(TABLE_STATE_RESULT_DIFFERENT) != 0 {
+			if tp.state&(tableStateResultDifferent) != 0 {
 				fixStr = fmt.Sprintf("%sThe data of %s is not equal.\n", fixStr, tp.name)
 			}
-			if tp.state&(TABLE_STATE_NOT_EXSIT_DOWNSTREAM) != 0 {
+			if tp.state&(tableStateNotExistDownstream) != 0 {
 				fixStr = fmt.Sprintf("%sThe data of %s does not exist in downstream database.\n", fixStr, tp.name)
 				SkippedNum++
 			}
-			if tp.state&(TABLE_STATE_NOT_EXSIT_UPSTREAM) != 0 {
+			if tp.state&(tableStateNotExistUpstream) != 0 {
 				fixStr = fmt.Sprintf("%sThe data of %s does not exist in upstream database.\n", fixStr, tp.name)
 				SkippedNum++
 			}
@@ -219,9 +220,9 @@ func (tpp *TableProgressPrinter) PrintSummary() {
 	fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr)
 }
 
-func (tpp *TableProgressPrinter) Error(err error) {
-	tpp.optCh <- Operator{
-		optType: PROGRESS_OPT_ERROR,
+func (tpp *tableProgressPrinter) Error(err error) {
+	tpp.optCh <- operator{
+		optType: progressOptError,
 	}
 	<-tpp.finishCh
 	var cleanStr, fixStr string
@@ -230,15 +231,15 @@ func (tpp *TableProgressPrinter) Error(err error) {
 	fmt.Fprintf(tpp.output, "%s%s", cleanStr, fixStr)
 }
 
-func (tpp *TableProgressPrinter) init() {
+func (tpp *tableProgressPrinter) init() {
 	tpp.tableList.PushBack(&TableProgress{
-		state: TABLE_STATE_HEAD,
+		state: tableStateHead,
 	})
 
 	tpp.output = os.Stdout
 }
 
-func (tpp *TableProgressPrinter) serve() {
+func (tpp *tableProgressPrinter) serve() {
 	tick := time.NewTicker(200 * time.Millisecond)
 
 	for {
@@ -247,27 +248,27 @@ func (tpp *TableProgressPrinter) serve() {
 			tpp.flush(false)
 		case opt := <-tpp.optCh:
 			switch opt.optType {
-			case PROGRESS_OPT_CLOSE:
+			case progressOptClose:
 				tpp.flush(false)
 				tpp.finishCh <- struct{}{}
 				return
-			case PROGRESS_OPT_ERROR:
+			case progressOptError:
 				tpp.finishCh <- struct{}{}
 				return
-			case PROGRESS_OPT_INC:
+			case progressOptInc:
 				if e, ok := tpp.tableMap[opt.name]; ok {
 					tp := e.Value.(*TableProgress)
 					tp.progress++
 					tpp.progress++
 					if tp.progress >= tp.total && tp.totalStopUpdate {
-						tp.state = (tp.state & TABLE_STATE_RESULT_MASK) | TABLE_STATE_FINISH
+						tp.state = (tp.state & tableStateResultMask) | tableStateFinish
 						tpp.progress -= tp.progress
 						tpp.total -= tp.total
 						delete(tpp.tableMap, opt.name)
 						tpp.flush(true)
 					}
 				}
-			case PROGRESS_OPT_REGISTER:
+			case progressOptRegister:
 				if _, ok := tpp.tableMap[opt.name]; !ok {
 					e := tpp.tableList.PushBack(&TableProgress{
 						name:            opt.name,
@@ -278,38 +279,38 @@ func (tpp *TableProgressPrinter) serve() {
 					})
 					tpp.tableMap[opt.name] = e
 				}
-			case PROGRESS_OPT_START:
+			case progressOptStart:
 				e, ok := tpp.tableMap[opt.name]
 				if !ok {
 					e = tpp.tableList.PushBack(&TableProgress{
 						name:            opt.name,
 						progress:        0,
 						total:           opt.total,
-						state:           opt.state | TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS,
+						state:           opt.state | tableStateResultFailStructurePass,
 						totalStopUpdate: opt.totalStopUpdate,
 					})
 					tpp.tableMap[opt.name] = e
 				} else {
 					tp := e.Value.(*TableProgress)
-					tp.state ^= TABLE_STATE_REGISTER | opt.state
+					tp.state ^= tableStateRegister | opt.state
 					tp.progress = 0
 					tp.total = opt.total
 					tp.totalStopUpdate = opt.totalStopUpdate
 				}
-				if e.Value.(*TableProgress).state&TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE == 0 {
+				if e.Value.(*TableProgress).state&tableStateResultFailStructureDone == 0 {
 					tpp.total += opt.total
 				} else {
 					delete(tpp.tableMap, opt.name)
 				}
 				tpp.flush(true)
-			case PROGRESS_OPT_UPDATE:
+			case progressOptUpdate:
 				if e, ok := tpp.tableMap[opt.name]; ok {
 					tp := e.Value.(*TableProgress)
 					tpp.total += opt.total
 					tp.total += opt.total
 					tp.totalStopUpdate = opt.totalStopUpdate
 				}
-			case PROGRESS_OPT_FAIL:
+			case progressOptFail:
 				if e, ok := tpp.tableMap[opt.name]; ok {
 					tp := e.Value.(*TableProgress)
 					tp.state |= opt.state
@@ -320,8 +321,8 @@ func (tpp *TableProgressPrinter) serve() {
 	}
 }
 
-// flush flush info
-func (tpp *TableProgressPrinter) flush(stateIsChanged bool) {
+// flush info
+func (tpp *tableProgressPrinter) flush(stateIsChanged bool) {
 	/*
 	 * A total of 15 tables need to be compared
 	 *
@@ -348,22 +349,22 @@ func (tpp *TableProgressPrinter) flush(stateIsChanged bool) {
 			// 4. structure is different and data is same
 			// 5. structure is different and data is different
 			switch tp.state & 0xf {
-			case TABLE_STATE_PRESTART:
-				switch tp.state & TABLE_STATE_RESULT_MASK {
-				case TABLE_STATE_RESULT_OK:
+			case tableStatePrestart:
+				switch tp.state & tableStateResultMask {
+				case tableStateResultOK:
 					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... equivalent\n", fixStr, tp.name)
 					dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
 					tpp.lines++
 					tpp.progressTableNums++
-					tp.state = TABLE_STATE_COMPARING
-				case TABLE_STATE_NOT_EXSIT_UPSTREAM, TABLE_STATE_NOT_EXSIT_DOWNSTREAM:
+					tp.state = tableStateComparing
+				case tableStateNotExistUpstream, tableStateNotExistDownstream:
 					dynStr = fmt.Sprintf("%sComparing the table data of %s ...skipped\n", dynStr, tp.name)
 					tpp.tableFailList.PushBack(tp)
 					preNode := p.Prev()
 					tpp.tableList.Remove(p)
 					p = preNode
 					tpp.finishTableNums++
-				case TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE:
+				case tableStateResultFailStructureDone:
 					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name)
 					tpp.tableFailList.PushBack(tp)
 					// we have empty node as list head, so p is not nil
@@ -371,29 +372,29 @@ func (tpp *TableProgressPrinter) flush(stateIsChanged bool) {
 					tpp.tableList.Remove(p)
 					p = preNode
 					tpp.finishTableNums++
-				case TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE:
+				case tableStateResultFailStructureContinue:
 					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name)
 					dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
 					tpp.lines++
 					tpp.progressTableNums++
-					tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART
-				case TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS:
+					tp.state ^= tableStateComparing | tableStatePrestart
+				case tableStateResultFailStructurePass:
 					fixStr = fmt.Sprintf("%sComparing the table structure of %s ... skip\n", fixStr, tp.name)
 					dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
 					tpp.lines++
 					tpp.progressTableNums++
-					tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART
+					tp.state ^= tableStateComparing | tableStatePrestart
 				}
-			case TABLE_STATE_COMPARING:
+			case tableStateComparing:
 				dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name)
 				tpp.lines++
-			case TABLE_STATE_FINISH:
-				if tp.state&TABLE_STATE_RESULT_DIFFERENT == 0 {
+			case tableStateFinish:
+				if tp.state&tableStateResultDifferent == 0 {
 					fixStr = fmt.Sprintf("%sComparing the table data of %s ... equivalent\n", fixStr, tp.name)
 				} else {
 					fixStr = fmt.Sprintf("%sComparing the table data of %s ... failure\n", fixStr, tp.name)
 				}
-				if tp.state&TABLE_STATE_RESULT_MASK != 0 {
+				if tp.state&tableStateResultMask != 0 {
 					tpp.tableFailList.PushBack(tp)
 				}
 				// we have empty node as list head, so p is not nil
@@ -418,62 +419,72 @@ func (tpp *TableProgressPrinter) flush(stateIsChanged bool) {
 	fmt.Fprintf(tpp.output, "Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total)
 }
 
-var progress_ *TableProgressPrinter = nil
+var progress *tableProgressPrinter = nil
 
+// Init initialize the printer
 func Init(tableNums, finishTableNums int) {
-	progress_ = NewTableProgressPrinter(tableNums, finishTableNums)
+	progress = newTableProgressPrinter(tableNums, finishTableNums)
 }
 
+// Inc update the progress of one table
 func Inc(name string) {
-	if progress_ != nil {
-		progress_.Inc(name)
+	if progress != nil {
+		progress.Inc(name)
 	}
 }
 
+// UpdateTotal the total for given table
 func UpdateTotal(name string, total int, stopUpdate bool) {
-	if progress_ != nil {
-		progress_.UpdateTotal(name, total, stopUpdate)
+	if progress != nil {
+		progress.UpdateTotal(name, total, stopUpdate)
 	}
 }
 
+// RegisterTable register a new table
 func RegisterTable(name string, isFailed bool, isDone bool, isExist int) {
-	if progress_ != nil {
-		progress_.RegisterTable(name, isFailed, isDone, isExist)
+	if progress != nil {
+		progress.RegisterTable(name, isFailed, isDone, isExist)
 	}
 }
 
+// StartTable start a table
 func StartTable(name string, total int, stopUpdate bool) {
-	if progress_ != nil {
-		progress_.StartTable(name, total, stopUpdate)
+	if progress != nil {
+		progress.StartTable(name, total, stopUpdate)
 	}
 }
 
+// FailTable stop a table
 func FailTable(name string) {
-	if progress_ != nil {
-		progress_.FailTable(name)
+	if progress != nil {
+		progress.FailTable(name)
 	}
 }
 
+// Close close the progress printer
 func Close() {
-	if progress_ != nil {
-		progress_.Close()
+	if progress != nil {
+		progress.Close()
 	}
 }
 
+// PrintSummary print the summary
 func PrintSummary() {
-	if progress_ != nil {
-		progress_.PrintSummary()
+	if progress != nil {
+		progress.PrintSummary()
 	}
 }
 
+// Error pass the error into progress printer
 func Error(err error) {
-	if progress_ != nil {
-		progress_.Error(err)
+	if progress != nil {
+		progress.Error(err)
 	}
 }
 
+// SetOutput set the output for progress printer
 func SetOutput(output io.Writer) {
-	if progress_ != nil {
-		progress_.SetOutput(output)
+	if progress != nil {
+		progress.SetOutput(output)
 	}
 }
diff --git a/sync_diff_inspector/progress/progress_test.go b/sync_diff_inspector/progress/progress_test.go
index 7393f93c022..3a12bc1122c 100644
--- a/sync_diff_inspector/progress/progress_test.go
+++ b/sync_diff_inspector/progress/progress_test.go
@@ -24,7 +24,7 @@ import (
 )
 
 func TestProgress(t *testing.T) {
-	p := NewTableProgressPrinter(6, 0)
+	p := newTableProgressPrinter(6, 0)
 	p.RegisterTable("1", true, true, common.AllTableExistFlag)
 	p.StartTable("1", 50, true)
 	p.RegisterTable("2", true, false, common.AllTableExistFlag)
@@ -59,7 +59,7 @@ func TestProgress(t *testing.T) {
 }
 
 func TestTableError(t *testing.T) {
-	p := NewTableProgressPrinter(4, 0)
+	p := newTableProgressPrinter(4, 0)
 	p.RegisterTable("1", true, true, common.AllTableExistFlag)
 	p.StartTable("1", 50, true)
 	p.RegisterTable("2", true, true, common.AllTableExistFlag)
diff --git a/sync_diff_inspector/report/report.go b/sync_diff_inspector/report/report.go
index ba58878e8a5..cb74b3b0393 100644
--- a/sync_diff_inspector/report/report.go
+++ b/sync_diff_inspector/report/report.go
@@ -41,17 +41,18 @@ const (
 	// Pass means all data and struct of tables are equal
 	Pass = "pass"
 	// Fail means not all data or struct of tables are equal
-	Fail  = "fail"
+	Fail = "fail"
+	// Error means we meet an error
 	Error = "error"
 )
 
-// ReportConfig stores the config information for the user
-type ReportConfig struct {
+// Config stores the config information for the user
+type Config struct {
 	Host     string `toml:"host"`
 	Port     int    `toml:"port"`
 	User     string `toml:"user"`
 	Snapshot string `toml:"snapshot,omitempty"`
-	SqlMode  string `toml:"sql-mode,omitempty"`
+	SQLMode  string `toml:"sql-mode,omitempty"`
 }
 
 // TableResult saves the check result for every table.
@@ -234,6 +235,7 @@ func (r *Report) CommitSummary() error {
 	return nil
 }
 
+// Print print the current report
 func (r *Report) Print(w io.Writer) error {
 	var summary strings.Builder
 	if r.Result == Pass && r.SkippedNum == 0 {
@@ -272,7 +274,7 @@ func (r *Report) Print(w io.Writer) error {
 		for schema, tableMap := range r.TableResults {
 			for table, result := range tableMap {
 				if result.MeetError != nil {
-					summary.WriteString(fmt.Sprintf("%s error occured in %s\n", result.MeetError.Error(), dbutil.TableName(schema, table)))
+					summary.WriteString(fmt.Sprintf("%s error occurred in %s\n", result.MeetError.Error(), dbutil.TableName(schema, table)))
 				}
 			}
 		}
@@ -291,6 +293,7 @@ func NewReport(task *config.TaskConfig) *Report {
 	}
 }
 
+// Init initialize the report
 func (r *Report) Init(tableDiffs []*common.TableDiff, sourceConfig [][]byte, targetConfig []byte) {
 	r.StartTime = time.Now()
 	r.SourceConfig = sourceConfig
@@ -325,7 +328,7 @@ func (r *Report) SetTableStructCheckResult(schema, table string, equal bool, ski
 }
 
 // SetTableDataCheckResult sets the data check result for table.
-func (r *Report) SetTableDataCheckResult(schema, table string, equal bool, rowsAdd, rowsDelete int, upCount, downCount int64, id *chunk.ChunkID) {
+func (r *Report) SetTableDataCheckResult(schema, table string, equal bool, rowsAdd, rowsDelete int, upCount, downCount int64, id *chunk.CID) {
 	r.Lock()
 	defer r.Unlock()
 	result := r.TableResults[schema][table]
@@ -368,7 +371,7 @@ func (r *Report) SetTableMeetError(schema, table string, err error) {
 
 // GetSnapshot get the snapshot of the current state of the report, then we can restart the
 // sync-diff and get the correct report state.
-func (r *Report) GetSnapshot(chunkID *chunk.ChunkID, schema, table string) (*Report, error) {
+func (r *Report) GetSnapshot(chunkID *chunk.CID, schema, table string) (*Report, error) {
 	r.RLock()
 	defer r.RUnlock()
 	targetID := utils.UniqueID(schema, table)
@@ -387,7 +390,7 @@ func (r *Report) GetSnapshot(chunkID *chunk.ChunkID, schema, table string) (*Rep
 					MeetError:   result.MeetError,
 				}
 				for id, chunkResult := range result.ChunkMap {
-					sid := new(chunk.ChunkID)
+					sid := new(chunk.CID)
 					err := sid.FromString(id)
 					if err != nil {
 						return nil, errors.Trace(err)
diff --git a/sync_diff_inspector/report/report_test.go b/sync_diff_inspector/report/report_test.go
index ce49d1a8016..6d36ffeb214 100644
--- a/sync_diff_inspector/report/report_test.go
+++ b/sync_diff_inspector/report/report_test.go
@@ -24,7 +24,7 @@ import (
 	"github.com/BurntSushi/toml"
 	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/pingcap/tidb/pkg/parser"
-	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/config"
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
@@ -45,10 +45,10 @@ func TestReport(t *testing.T) {
 
 	report := NewReport(task)
 	createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 	createTableSQL2 := "create table `atest`.`atbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 
 	tableDiffs := []*common.TableDiff{
@@ -77,7 +77,7 @@ func TestReport(t *testing.T) {
 			Collation: "[123]",
 		},
 	}
-	configs := []*ReportConfig{
+	configs := []*Config{
 		{
 			Host: "127.0.0.1",
 			Port: 3306,
@@ -111,37 +111,37 @@ func TestReport(t *testing.T) {
 
 	// Test Table Report
 	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 222, 222, &chunk.ChunkID{1, 1, 1, 1, 2})
+	report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 222, 222, &chunk.CID{1, 1, 1, 1, 2})
 	report.SetTableMeetError("test", "tbl", errors.New("eeee"))
 
-	new_report := NewReport(task)
-	new_report.LoadReport(report)
+	newReport := NewReport(task)
+	newReport.LoadReport(report)
 
-	require.Equal(t, new_report.TotalSize, int64(579))
-	result, ok := new_report.TableResults["test"]["tbl"]
+	require.Equal(t, newReport.TotalSize, int64(579))
+	result, ok := newReport.TableResults["test"]["tbl"]
 	require.True(t, ok)
 	require.Equal(t, result.MeetError.Error(), "eeee")
 	require.True(t, result.DataEqual)
 	require.True(t, result.StructEqual)
 
-	require.Equal(t, new_report.getSortedTables(), [][]string{{"`atest`.`atbl`", "0", "0"}, {"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
-	require.Equal(t, new_report.getDiffRows(), [][]string{})
+	require.Equal(t, newReport.getSortedTables(), [][]string{{"`atest`.`atbl`", "0", "0"}, {"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
+	require.Equal(t, newReport.getDiffRows(), [][]string{})
 
-	new_report.SetTableStructCheckResult("atest", "atbl", true, false, common.AllTableExistFlag)
-	new_report.SetTableDataCheckResult("atest", "atbl", false, 111, 222, 333, 333, &chunk.ChunkID{1, 1, 1, 1, 2})
-	require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
-	require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "true", "+111/-222", "333", "333"}})
+	newReport.SetTableStructCheckResult("atest", "atbl", true, false, common.AllTableExistFlag)
+	newReport.SetTableDataCheckResult("atest", "atbl", false, 111, 222, 333, 333, &chunk.CID{1, 1, 1, 1, 2})
+	require.Equal(t, newReport.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
+	require.Equal(t, newReport.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "true", "+111/-222", "333", "333"}})
 
-	new_report.SetTableStructCheckResult("atest", "atbl", false, false, common.AllTableExistFlag)
-	require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
-	require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "false", "+111/-222", "333", "333"}})
+	newReport.SetTableStructCheckResult("atest", "atbl", false, false, common.AllTableExistFlag)
+	require.Equal(t, newReport.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}})
+	require.Equal(t, newReport.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "false", "+111/-222", "333", "333"}})
 
-	new_report.SetTableStructCheckResult("ctest", "atbl", false, true, common.AllTableExistFlag)
+	newReport.SetTableStructCheckResult("ctest", "atbl", false, true, common.AllTableExistFlag)
 
-	new_report.SetTableStructCheckResult("dtest", "atbl", false, true, common.DownstreamTableLackFlag)
+	newReport.SetTableStructCheckResult("dtest", "atbl", false, true, common.DownstreamTableLackFlag)
 
 	buf := new(bytes.Buffer)
-	new_report.Print(buf)
+	newReport.Print(buf)
 	info := buf.String()
 	require.Contains(t, info, "The structure of `atest`.`atbl` is not equal\n")
 	require.Contains(t, info, "The data of `atest`.`atbl` is not equal\n")
@@ -162,7 +162,7 @@ func TestCalculateTotal(t *testing.T) {
 
 	report := NewReport(task)
 	createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	tableDiffs := []*common.TableDiff{
@@ -173,7 +173,7 @@ func TestCalculateTotal(t *testing.T) {
 			Collation: "[123]",
 		},
 	}
-	configs := []*ReportConfig{
+	configs := []*Config{
 		{
 			Host: "127.0.0.1",
 			Port: 3306,
@@ -209,7 +209,7 @@ func TestCalculateTotal(t *testing.T) {
 func TestPrint(t *testing.T) {
 	report := NewReport(task)
 	createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	tableDiffs := []*common.TableDiff{
@@ -226,7 +226,7 @@ func TestPrint(t *testing.T) {
 			Collation: "[123]",
 		},
 	}
-	configs := []*ReportConfig{
+	configs := []*Config{
 		{
 			Host: "127.0.0.1",
 			Port: 3306,
@@ -256,7 +256,7 @@ func TestPrint(t *testing.T) {
 	var buf *bytes.Buffer
 	// All Pass
 	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 22, 22, &chunk.ChunkID{0, 0, 0, 0, 1})
+	report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 22, 22, &chunk.CID{0, 0, 0, 0, 1})
 	buf = new(bytes.Buffer)
 	report.Print(buf)
 	require.Equal(t, buf.String(), "A total of 0 table have been compared and all are equal.\n"+
@@ -268,20 +268,20 @@ func TestPrint(t *testing.T) {
 	buf = new(bytes.Buffer)
 	report.Print(buf)
 	require.Equal(t, buf.String(), "Error in comparison process:\n"+
-		"123 error occured in `test`.`tbl1`\n"+
+		"123 error occurred in `test`.`tbl1`\n"+
 		"You can view the comparison details through 'output_dir/sync_diff.log'\n")
 }
 
 func TestGetSnapshot(t *testing.T) {
 	report := NewReport(task)
 	createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 	createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 	createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New())
+	tableInfo3, err := dbutiltest.GetTableInfoBySQL(createTableSQL3, parser.New())
 	require.NoError(t, err)
 
 	tableDiffs := []*common.TableDiff{
@@ -302,7 +302,7 @@ func TestGetSnapshot(t *testing.T) {
 			Collation: "[123]",
 		},
 	}
-	configs := []*ReportConfig{
+	configs := []*Config{
 		{
 			Host: "127.0.0.1",
 			Port: 3306,
@@ -330,39 +330,39 @@ func TestGetSnapshot(t *testing.T) {
 	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
 
 	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("test", "tbl", false, 100, 100, 200, 300, &chunk.ChunkID{0, 0, 0, 1, 10})
-	report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 300, 300, &chunk.ChunkID{0, 0, 0, 3, 10})
-	report.SetTableDataCheckResult("test", "tbl", false, 200, 200, 400, 500, &chunk.ChunkID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("test", "tbl", false, 100, 100, 200, 300, &chunk.CID{0, 0, 0, 1, 10})
+	report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 300, 300, &chunk.CID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("test", "tbl", false, 200, 200, 400, 500, &chunk.CID{0, 0, 0, 3, 10})
 
 	report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("atest", "tbl", false, 100, 100, 500, 600, &chunk.ChunkID{0, 0, 0, 0, 10})
-	report.SetTableDataCheckResult("atest", "tbl", true, 0, 0, 600, 600, &chunk.ChunkID{0, 0, 0, 3, 10})
-	report.SetTableDataCheckResult("atest", "tbl", false, 200, 200, 700, 800, &chunk.ChunkID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("atest", "tbl", false, 100, 100, 500, 600, &chunk.CID{0, 0, 0, 0, 10})
+	report.SetTableDataCheckResult("atest", "tbl", true, 0, 0, 600, 600, &chunk.CID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("atest", "tbl", false, 200, 200, 700, 800, &chunk.CID{0, 0, 0, 3, 10})
 
 	report.SetTableStructCheckResult("xtest", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("xtest", "tbl", false, 100, 100, 800, 900, &chunk.ChunkID{0, 0, 0, 0, 10})
-	report.SetTableDataCheckResult("xtest", "tbl", true, 0, 0, 900, 900, &chunk.ChunkID{0, 0, 0, 1, 10})
-	report.SetTableDataCheckResult("xtest", "tbl", false, 200, 200, 1000, 1100, &chunk.ChunkID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("xtest", "tbl", false, 100, 100, 800, 900, &chunk.CID{0, 0, 0, 0, 10})
+	report.SetTableDataCheckResult("xtest", "tbl", true, 0, 0, 900, 900, &chunk.CID{0, 0, 0, 1, 10})
+	report.SetTableDataCheckResult("xtest", "tbl", false, 200, 200, 1000, 1100, &chunk.CID{0, 0, 0, 3, 10})
 
-	report_snap, err := report.GetSnapshot(&chunk.ChunkID{0, 0, 0, 1, 10}, "test", "tbl")
+	reportSnap, err := report.GetSnapshot(&chunk.CID{0, 0, 0, 1, 10}, "test", "tbl")
 	require.NoError(t, err)
-	require.Equal(t, report_snap.TotalSize, report.TotalSize)
-	require.Equal(t, report_snap.Result, report.Result)
+	require.Equal(t, reportSnap.TotalSize, report.TotalSize)
+	require.Equal(t, reportSnap.Result, report.Result)
 	for key, value := range report.TableResults {
-		if _, ok := report_snap.TableResults[key]; !ok {
+		if _, ok := reportSnap.TableResults[key]; !ok {
 			v, ok := value["tbl"]
 			require.True(t, ok)
 			require.Equal(t, v.Schema, "atest")
 			continue
 		}
 
-		if _, ok := report_snap.TableResults[key]["tbl"]; !ok {
+		if _, ok := reportSnap.TableResults[key]["tbl"]; !ok {
 			require.Equal(t, key, "atest")
 			continue
 		}
 
 		v1 := value["tbl"]
-		v2 := report_snap.TableResults[key]["tbl"]
+		v2 := reportSnap.TableResults[key]["tbl"]
 		require.Equal(t, v1.Schema, v2.Schema)
 		require.Equal(t, v1.Table, v2.Table)
 		require.Equal(t, v1.StructEqual, v2.StructEqual)
@@ -372,14 +372,14 @@ func TestGetSnapshot(t *testing.T) {
 		chunkMap1 := v1.ChunkMap
 		chunkMap2 := v2.ChunkMap
 		for id, r1 := range chunkMap1 {
-			sid := new(chunk.ChunkID)
+			sid := new(chunk.CID)
 			if _, ok := chunkMap2[id]; !ok {
 				require.NoError(t, sid.FromString(id))
-				require.Equal(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 3, 10}), 0)
+				require.Equal(t, sid.Compare(&chunk.CID{0, 0, 0, 3, 10}), 0)
 				continue
 			}
 			require.NoError(t, sid.FromString(id))
-			require.True(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 1, 10}) <= 0)
+			require.True(t, sid.Compare(&chunk.CID{0, 0, 0, 1, 10}) <= 0)
 			r2 := chunkMap2[id]
 			require.Equal(t, r1.RowsAdd, r2.RowsAdd)
 			require.Equal(t, r1.RowsDelete, r2.RowsDelete)
@@ -392,16 +392,16 @@ func TestCommitSummary(t *testing.T) {
 	outputDir := "./"
 	report := NewReport(&config.TaskConfig{OutputDir: outputDir, FixDir: task.FixDir})
 	createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 	createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New())
+	tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New())
 	require.NoError(t, err)
 	createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New())
+	tableInfo3, err := dbutiltest.GetTableInfoBySQL(createTableSQL3, parser.New())
 	require.NoError(t, err)
 	createTableSQL4 := "create table `xtest`.`tb1`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
-	tableInfo4, err := dbutil.GetTableInfoBySQL(createTableSQL4, parser.New())
+	tableInfo4, err := dbutiltest.GetTableInfoBySQL(createTableSQL4, parser.New())
 	require.NoError(t, err)
 	tableDiffs := []*common.TableDiff{
 		{
@@ -436,7 +436,7 @@ func TestCommitSummary(t *testing.T) {
 			Collation: "[123]",
 		},
 	}
-	configs := []*ReportConfig{
+	configs := []*Config{
 		{
 			Host: "127.0.0.1",
 			Port: 3306,
@@ -464,19 +464,19 @@ func TestCommitSummary(t *testing.T) {
 	report.Init(tableDiffs, configsBytes[:2], configsBytes[2])
 
 	report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 400, 400, &chunk.ChunkID{0, 0, 0, 1, 10})
+	report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 400, 400, &chunk.CID{0, 0, 0, 1, 10})
 
 	report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("atest", "tbl", false, 100, 200, 500, 600, &chunk.ChunkID{0, 0, 0, 2, 10})
+	report.SetTableDataCheckResult("atest", "tbl", false, 100, 200, 500, 600, &chunk.CID{0, 0, 0, 2, 10})
 
 	report.SetTableStructCheckResult("xtest", "tbl", false, false, common.AllTableExistFlag)
-	report.SetTableDataCheckResult("xtest", "tbl", false, 100, 200, 600, 700, &chunk.ChunkID{0, 0, 0, 3, 10})
+	report.SetTableDataCheckResult("xtest", "tbl", false, 100, 200, 600, 700, &chunk.CID{0, 0, 0, 3, 10})
 
 	report.SetTableStructCheckResult("xtest", "tb1", false, true, common.UpstreamTableLackFlag)
-	report.SetTableDataCheckResult("xtest", "tb1", false, 0, 200, 0, 200, &chunk.ChunkID{0, 0, 0, 4, 10})
+	report.SetTableDataCheckResult("xtest", "tb1", false, 0, 200, 0, 200, &chunk.CID{0, 0, 0, 4, 10})
 
 	report.SetTableStructCheckResult("xtest", "tb2", false, true, common.DownstreamTableLackFlag)
-	report.SetTableDataCheckResult("xtest", "tb2", false, 100, 0, 100, 0, &chunk.ChunkID{0, 0, 0, 5, 10})
+	report.SetTableDataCheckResult("xtest", "tb2", false, 100, 0, 100, 0, &chunk.CID{0, 0, 0, 5, 10})
 
 	err = report.CommitSummary()
 	require.NoError(t, err)
diff --git a/sync_diff_inspector/source/chunks_iter.go b/sync_diff_inspector/source/chunks_iter.go
index 44b051fcb72..0439aba8ea3 100644
--- a/sync_diff_inspector/source/chunks_iter.go
+++ b/sync_diff_inspector/source/chunks_iter.go
@@ -28,7 +28,7 @@ import (
 
 // ChunksIterator is used for single mysql/tidb source.
 type ChunksIterator struct {
-	ID            *chunk.ChunkID
+	ID            *chunk.CID
 	tableAnalyzer TableAnalyzer
 
 	TableDiffs       []*common.TableDiff
@@ -40,6 +40,7 @@ type ChunksIterator struct {
 	cancel context.CancelFunc
 }
 
+// NewChunksIterator returns a new iterator
 func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs []*common.TableDiff, startRange *splitter.RangeInfo, splitThreadCount int) (*ChunksIterator, error) {
 	ctxx, cancel := context.WithCancel(ctx)
 	iter := &ChunksIterator{
@@ -114,7 +115,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 					return
 				case t.chunksCh <- &splitter.RangeInfo{
 					ChunkRange: &chunk.Range{
-						Index: &chunk.ChunkID{
+						Index: &chunk.CID{
 							TableIndex: curTableIndex,
 						},
 						Type:    chunk.Empty,
@@ -162,6 +163,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 	pool.WaitFinished()
 }
 
+// Next returns the next chunk
 func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error) {
 	select {
 	case <-ctx.Done():
@@ -176,6 +178,7 @@ func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error)
 	}
 }
 
+// Close closes the iterator
 func (t *ChunksIterator) Close() {
 	t.cancel()
 }
diff --git a/sync_diff_inspector/source/common/common_test.go b/sync_diff_inspector/source/common/common_test.go
index 467548935bf..5649bbf347d 100644
--- a/sync_diff_inspector/source/common/common_test.go
+++ b/sync_diff_inspector/source/common/common_test.go
@@ -19,13 +19,14 @@ import (
 
 	"github.com/pingcap/tidb/pkg/parser"
 	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
 	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
 	"github.com/stretchr/testify/require"
 )
 
 func TestRowData(t *testing.T) {
 	createTableSQL := "create table test.test(id int(24), name varchar(24), age int(24), primary key(id, name));"
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
 	_, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo)
diff --git a/sync_diff_inspector/source/common/rows.go b/sync_diff_inspector/source/common/rows.go
index 27470c15042..acfb0928ff9 100644
--- a/sync_diff_inspector/source/common/rows.go
+++ b/sync_diff_inspector/source/common/rows.go
@@ -23,6 +23,7 @@ import (
 	"go.uber.org/zap"
 )
 
+// RowData represents a single row
 type RowData struct {
 	Data   map[string]*dbutil.ColumnData
 	Source int
@@ -34,7 +35,12 @@ type RowDatas struct {
 	OrderKeyCols []*model.ColumnInfo
 }
 
-func (r RowDatas) Len() int { return len(r.Rows) }
+// Len returns the number of rows
+func (r RowDatas) Len() int {
+	return len(r.Rows)
+}
+
+// Less compares two rows
 func (r RowDatas) Less(i, j int) bool {
 	for _, col := range r.OrderKeyCols {
 		col1, ok := r.Rows[i].Data[col.Name.O]
@@ -83,7 +89,11 @@ func (r RowDatas) Less(i, j int) bool {
 
 	return false
 }
-func (r RowDatas) Swap(i, j int) { r.Rows[i], r.Rows[j] = r.Rows[j], r.Rows[i] }
+
+// Swap swap two rows
+func (r RowDatas) Swap(i, j int) {
+	r.Rows[i], r.Rows[j] = r.Rows[j], r.Rows[i]
+}
 
 // Push implements heap.Interface's Push function
 func (r *RowDatas) Push(x interface{}) {
diff --git a/sync_diff_inspector/source/common/table_diff.go b/sync_diff_inspector/source/common/table_diff.go
index 2960f0ba7cb..74507a2ad7c 100644
--- a/sync_diff_inspector/source/common/table_diff.go
+++ b/sync_diff_inspector/source/common/table_diff.go
@@ -73,11 +73,15 @@ type TableDiff struct {
 }
 
 const (
-	AllTableExistFlag       = 0
+	// AllTableExistFlag means the table exists in both upstream and downstream
+	AllTableExistFlag = 0
+	// DownstreamTableLackFlag means the table only exists in upstream
 	DownstreamTableLackFlag = -1
-	UpstreamTableLackFlag   = 1
+	// UpstreamTableLackFlag means the table only exists in downstream
+	UpstreamTableLackFlag = 1
 )
 
+// AllTableExist check the status
 func AllTableExist(tableLack int) bool {
 	return tableLack == AllTableExistFlag
 }
diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go
index ff0edd9e8c9..8e3461c0c18 100644
--- a/sync_diff_inspector/source/mysql_shard.go
+++ b/sync_diff_inspector/source/mysql_shard.go
@@ -33,10 +33,12 @@ import (
 	"go.uber.org/zap"
 )
 
+// MySQLTableAnalyzer is used to analyze MySQL table
 type MySQLTableAnalyzer struct {
 	sourceTableMap map[string][]*common.TableShardSource
 }
 
+// AnalyzeSplitter return an iterator for current table
 func (a *MySQLTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) {
 	matchedSources := getMatchedSourcesForTable(a.sourceTableMap, table)
 
@@ -57,6 +59,7 @@ func (a *MySQLTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.
 	return randIter, nil
 }
 
+// MySQLSources represent one table in MySQL
 type MySQLSources struct {
 	tableDiffs []*common.TableDiff
 
@@ -74,16 +77,19 @@ func getMatchedSourcesForTable(sourceTablesMap map[string][]*common.TableShardSo
 	return matchSources
 }
 
+// GetTableAnalyzer get analyzer for current table
 func (s *MySQLSources) GetTableAnalyzer() TableAnalyzer {
 	return &MySQLTableAnalyzer{
 		s.sourceTablesMap,
 	}
 }
 
+// GetRangeIterator get range iterator
 func (s *MySQLSources) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) {
 	return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount)
 }
 
+// Close close the current table
 func (s *MySQLSources) Close() {
 	for _, t := range s.sourceTablesMap {
 		for _, db := range t {
@@ -92,7 +98,8 @@ func (s *MySQLSources) Close() {
 	}
 }
 
-func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
+// GetCountAndMD5 return count and checksum
+func (s *MySQLSources) GetCountAndMD5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
 	beginTime := time.Now()
 	table := s.tableDiffs[tableRange.GetTableIndex()]
 	chunk := tableRange.GetChunk()
@@ -102,7 +109,7 @@ func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter.
 
 	for _, ms := range matchSources {
 		go func(ms *common.TableShardSource) {
-			count, checksum, err := utils.GetCountAndMd5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, chunk.Args)
+			count, checksum, err := utils.GetCountAndMD5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, chunk.Args)
 			infoCh <- &ChecksumInfo{
 				Checksum: checksum,
 				Count:    count,
@@ -137,24 +144,25 @@ func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter.
 	}
 }
 
+// GetCountForLackTable return count for lack table
 func (s *MySQLSources) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 {
 	table := s.tableDiffs[tableRange.GetTableIndex()]
 	var totalCount int64
 
 	matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table)
-	if matchSources != nil {
-		for _, ms := range matchSources {
-			count, _ := dbutil.GetRowCount(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, "", nil)
-			totalCount += count
-		}
+	for _, ms := range matchSources {
+		count, _ := dbutil.GetRowCount(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, "", nil)
+		totalCount += count
 	}
 	return totalCount
 }
 
+// GetTables return all tables
 func (s *MySQLSources) GetTables() []*common.TableDiff {
 	return s.tableDiffs
 }
 
+// GenerateFixSQL generate SQL
 func (s *MySQLSources) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string {
 	switch t {
 	case Insert:
@@ -169,6 +177,7 @@ func (s *MySQLSources) GenerateFixSQL(t DMLType, upstreamData, downstreamData ma
 	return ""
 }
 
+// GetRowsIterator get iterator for current table
 func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) {
 	chunk := tableRange.GetChunk()
 
@@ -187,6 +196,11 @@ func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter
 		rowsQuery, orderKeyCols = utils.GetTableRowsQueryFormat(ms.OriginSchema, ms.OriginTable, table.Info, table.Collation)
 		query := fmt.Sprintf(rowsQuery, chunk.Where)
 		rows, err := ms.DBConn.QueryContext(ctx, query, chunk.Args...)
+		defer func() {
+			if rows != nil {
+				_ = rows.Err()
+			}
+		}()
 		if err != nil {
 			return nil, errors.Trace(err)
 		}
@@ -222,6 +236,7 @@ func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter
 	}, nil
 }
 
+// GetDB get the current DB
 func (s *MySQLSources) GetDB() *sql.DB {
 	// return any of them is ok
 	for _, st := range s.sourceTablesMap {
@@ -233,11 +248,13 @@ func (s *MySQLSources) GetDB() *sql.DB {
 	return nil
 }
 
+// GetSnapshot get the current snapshot
 func (s *MySQLSources) GetSnapshot() string {
 	log.Fatal("unreachable!, mysql doesn't have the snapshot")
 	return ""
 }
 
+// GetSourceStructInfo get the current table info
 func (s *MySQLSources) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) {
 	tableDiff := s.GetTables()[tableIndex]
 	// for tables that do not exist upstream or downstream
@@ -258,6 +275,7 @@ func (s *MySQLSources) GetSourceStructInfo(ctx context.Context, tableIndex int)
 	return sourceTableInfos, nil
 }
 
+// MultiSourceRowsIterator is used to iterate rows from multi source
 type MultiSourceRowsIterator struct {
 	sourceRows     map[int]*sql.Rows
 	sourceRowDatas *common.RowDatas
@@ -271,6 +289,7 @@ func getRowData(rows *sql.Rows) (rowData map[string]*dbutil.ColumnData, err erro
 	return
 }
 
+// Next return the next row
 func (ms *MultiSourceRowsIterator) Next() (map[string]*dbutil.ColumnData, error) {
 	// Before running getSourceRow, heap save one row from all the sources,
 	// otherwise this source has read to the end. Each row should be the smallest in each source.
@@ -297,12 +316,14 @@ func (ms *MultiSourceRowsIterator) Next() (map[string]*dbutil.ColumnData, error)
 	return rowData.Data, nil
 }
 
+// Close return all sources
 func (ms *MultiSourceRowsIterator) Close() {
 	for _, s := range ms.sourceRows {
 		s.Close()
 	}
 }
 
+// NewMySQLSources return sources for MySQL tables
 func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []*config.DataSource, threadCount int, f tableFilter.Filter, skipNonExistingTable bool) (Source, error) {
 	sourceTablesMap := make(map[string][]*common.TableShardSource)
 	// we should get the real table name
@@ -340,20 +361,20 @@ func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []*
 						return nil, errors.Errorf("get route result for %d source %s.%s failed, error %v", i, schema, table, err)
 					}
 				}
-				uniqueId := utils.UniqueID(targetSchema, targetTable)
+				uniqueID := utils.UniqueID(targetSchema, targetTable)
 				isMatched := f.MatchTable(targetSchema, targetTable)
 				if isMatched {
 					// if match the filter, we should respect it and check target has this table later.
-					sourceTablesAfterRoute[uniqueId] = struct{}{}
+					sourceTablesAfterRoute[uniqueID] = struct{}{}
 				}
-				if _, ok := targetUniqueTableMap[uniqueId]; !ok && !(isMatched && skipNonExistingTable) {
+				if _, ok := targetUniqueTableMap[uniqueID]; !ok && !(isMatched && skipNonExistingTable) {
 					continue
 				}
-				maxSourceRouteTableCount[uniqueId]++
-				if _, ok := sourceTablesMap[uniqueId]; !ok {
-					sourceTablesMap[uniqueId] = make([]*common.TableShardSource, 0)
+				maxSourceRouteTableCount[uniqueID]++
+				if _, ok := sourceTablesMap[uniqueID]; !ok {
+					sourceTablesMap[uniqueID] = make([]*common.TableShardSource, 0)
 				}
-				sourceTablesMap[uniqueId] = append(sourceTablesMap[uniqueId], &common.TableShardSource{
+				sourceTablesMap[uniqueID] = append(sourceTablesMap[uniqueID], &common.TableShardSource{
 					TableSource: common.TableSource{
 						OriginSchema: schema,
 						OriginTable:  table,
diff --git a/sync_diff_inspector/source/source.go b/sync_diff_inspector/source/source.go
index a3f7500a019..143187ebbfc 100644
--- a/sync_diff_inspector/source/source.go
+++ b/sync_diff_inspector/source/source.go
@@ -35,20 +35,25 @@ import (
 	"go.uber.org/zap"
 )
 
+// DMLType is the type of DML
 type DMLType int32
 
 const (
+	// Insert means insert
 	Insert DMLType = iota + 1
+	// Delete means delete
 	Delete
+	// Replace means replace
 	Replace
 )
 
 const (
-	ShieldDBName      = "_no__exists__db_"
-	ShieldTableName   = "_no__exists__table_"
-	GetSyncPointQuery = "SELECT primary_ts, secondary_ts FROM tidb_cdc.syncpoint_v1 ORDER BY primary_ts DESC LIMIT 1"
+	shieldDBName      = "_no__exists__db_"
+	shieldTableName   = "_no__exists__table_"
+	getSyncPointQuery = "SELECT primary_ts, secondary_ts FROM tidb_cdc.syncpoint_v1 ORDER BY primary_ts DESC LIMIT 1"
 )
 
+// ChecksumInfo stores checksum and count
 type ChecksumInfo struct {
 	Checksum uint64
 	Count    int64
@@ -71,6 +76,7 @@ type TableAnalyzer interface {
 	AnalyzeSplitter(context.Context, *common.TableDiff, *splitter.RangeInfo) (splitter.ChunkIterator, error)
 }
 
+// Source is the interface for table
 type Source interface {
 	// GetTableAnalyzer pick the proper analyzer for different source.
 	// the implement of this function is different in mysql/tidb.
@@ -82,8 +88,8 @@ type Source interface {
 	// there are many workers consume the range from the channel to compare.
 	GetRangeIterator(context.Context, *splitter.RangeInfo, TableAnalyzer, int) (RangeIterator, error)
 
-	// GetCountAndMd5 gets the md5 result and the count from given range.
-	GetCountAndMd5(context.Context, *splitter.RangeInfo) *ChecksumInfo
+	// GetCountAndMD5 gets the md5 result and the count from given range.
+	GetCountAndMD5(context.Context, *splitter.RangeInfo) *ChecksumInfo
 
 	// GetCountForLackTable gets the count for tables that don't exist upstream or downstream.
 	GetCountForLackTable(context.Context, *splitter.RangeInfo) int64
@@ -112,6 +118,7 @@ type Source interface {
 	Close()
 }
 
+// NewSources returns a new source
 func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, upstream Source, err error) {
 	// init db connection for upstream / downstream.
 	err = initDBConn(ctx, cfg)
@@ -152,8 +159,8 @@ func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, ups
 					if d.Router.AddRule(&router.TableRule{
 						SchemaPattern: tableConfig.Schema,
 						TablePattern:  tableConfig.Table,
-						TargetSchema:  ShieldDBName,
-						TargetTable:   ShieldTableName,
+						TargetSchema:  shieldDBName,
+						TargetTable:   shieldTableName,
 					}) != nil {
 						return nil, nil, errors.Errorf("add shield rule failed [schema =  %s] [table = %s]", tableConfig.Schema, tableConfig.Table)
 					}
@@ -168,8 +175,8 @@ func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, ups
 					if d.Router.AddRule(&router.TableRule{
 						SchemaPattern: tableConfig.Schema,
 						TablePattern:  tableConfig.Table,
-						TargetSchema:  ShieldDBName,
-						TargetTable:   ShieldTableName,
+						TargetSchema:  shieldDBName,
+						TargetTable:   shieldTableName,
 					}) != nil {
 						return nil, nil, errors.Errorf("add shield rule failed [schema =  %s] [table = %s]", tableConfig.Schema, tableConfig.Table)
 					}
@@ -235,9 +242,9 @@ func buildSourceFromCfg(
 	if ok {
 		if len(dbs) == 1 {
 			return NewTiDBSource(ctx, tableDiffs, dbs[0], bucketSpliterPool, f, skipNonExistingTable)
-		} else {
-			log.Fatal("Don't support check table in multiple tidb instance, please specify one tidb instance.")
 		}
+
+		log.Fatal("Don't support check table in multiple tidb instance, please specify one tidb instance.")
 	}
 	return NewMySQLSources(ctx, tableDiffs, dbs, connCount, f, skipNonExistingTable)
 }
@@ -249,14 +256,14 @@ func getAutoSnapshotPosition(cfg *mysql.Config) (string, string, error) {
 	}
 	defer tmpConn.Close()
 	var primaryTs, secondaryTs string
-	err = tmpConn.QueryRow(GetSyncPointQuery).Scan(&primaryTs, &secondaryTs)
+	err = tmpConn.QueryRow(getSyncPointQuery).Scan(&primaryTs, &secondaryTs)
 	if err != nil {
 		return "", "", errors.Annotatef(err, "fetching auto-position tidb_snapshot failed")
 	}
 	return primaryTs, secondaryTs, nil
 }
 
-func initDBConn(ctx context.Context, cfg *config.Config) error {
+func initDBConn(_ context.Context, cfg *config.Config) error {
 	// Fill in tidb_snapshot if it is set to AUTO
 	// This is only supported when set to auto on both target/source.
 	if cfg.Task.TargetInstance.IsAutoSnapshot() {
diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go
index 692452af4b7..9627c825dd4 100644
--- a/sync_diff_inspector/source/source_test.go
+++ b/sync_diff_inspector/source/source_test.go
@@ -56,7 +56,7 @@ type MockChunkIterator struct {
 	ctx       context.Context
 	tableDiff *common.TableDiff
 	rangeInfo *splitter.RangeInfo
-	index     *chunk.ChunkID
+	index     *chunk.CID
 }
 
 const (
@@ -70,7 +70,7 @@ func (m *MockChunkIterator) Next() (*chunk.Range, error) {
 	}
 	m.index.ChunkIndex = m.index.ChunkIndex + 1
 	return &chunk.Range{
-		Index: &chunk.ChunkID{
+		Index: &chunk.CID{
 			TableIndex:       m.index.TableIndex,
 			BucketIndexLeft:  m.index.BucketIndexLeft,
 			BucketIndexRight: m.index.BucketIndexRight,
@@ -86,7 +86,7 @@ func (m *MockChunkIterator) Close() {
 type MockAnalyzer struct{}
 
 func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.TableDiff, rangeInfo *splitter.RangeInfo) (splitter.ChunkIterator, error) {
-	i := &chunk.ChunkID{
+	i := &chunk.CID{
 		TableIndex:       0,
 		BucketIndexLeft:  0,
 		BucketIndexRight: 0,
@@ -184,7 +184,7 @@ func TestTiDBSource(t *testing.T) {
 		require.Equal(t, n, tableCase.rangeInfo.GetTableIndex())
 		countRows := sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456)
 		mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
-		checksum := tidb.GetCountAndMd5(ctx, tableCase.rangeInfo)
+		checksum := tidb.GetCountAndMD5(ctx, tableCase.rangeInfo)
 		require.NoError(t, checksum.Err)
 		require.Equal(t, checksum.Count, int64(123))
 		require.Equal(t, checksum.Checksum, uint64(456))
@@ -399,7 +399,7 @@ func TestMysqlShardSources(t *testing.T) {
 			mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows)
 		}
 
-		checksum := shard.GetCountAndMd5(ctx, tableCase.rangeInfo)
+		checksum := shard.GetCountAndMD5(ctx, tableCase.rangeInfo)
 		require.NoError(t, checksum.Err)
 		require.Equal(t, checksum.Count, int64(len(dbs)))
 		require.Equal(t, checksum.Checksum, resChecksum)
@@ -771,15 +771,17 @@ func TestRouterRules(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 
-	r, err := router.NewTableRouter(false, []*router.TableRule{
-		// make sure this rule works
-		{
-			SchemaPattern: "schema1",
-			TablePattern:  "tbl",
-			TargetSchema:  "schema2",
-			TargetTable:   "tbl",
-		},
-	})
+	r, _ := router.NewTableRouter(
+		false,
+		[]*router.TableRule{
+			// make sure this rule works
+			{
+				SchemaPattern: "schema1",
+				TablePattern:  "tbl",
+				TargetSchema:  "schema2",
+				TargetTable:   "tbl",
+			},
+		})
 	cfg := &config.Config{
 		LogLevel:         "debug",
 		CheckThreadCount: 4,
@@ -851,8 +853,8 @@ func TestRouterRules(t *testing.T) {
 	require.Equal(t, "tbl", targetTable)
 	targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema2", "tbl")
 	require.NoError(t, err)
-	require.Equal(t, ShieldDBName, targetSchema)
-	require.Equal(t, ShieldTableName, targetTable)
+	require.Equal(t, shieldDBName, targetSchema)
+	require.Equal(t, shieldTableName, targetTable)
 	targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema_test", "tbl")
 	require.NoError(t, err)
 	require.Equal(t, "schema_test", targetSchema)
@@ -911,7 +913,7 @@ func TestInitTables(t *testing.T) {
 	rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("", "")
 	mock.ExpectQuery("SHOW VARIABLES LIKE*").WillReturnRows(rows)
 
-	tablesToBeCheck, err = initTables(ctx, cfg)
+	_, err = initTables(ctx, cfg)
 	require.Contains(t, err.Error(), "different config matched to same target table")
 	require.NoError(t, mock.ExpectationsWereMet())
 }
@@ -936,18 +938,18 @@ func TestCheckTableMatched(t *testing.T) {
 	tmap["`test`.`t1`"] = struct{}{}
 	tmap["`test`.`t2`"] = struct{}{}
 
-	tables, err := checkTableMatched(tableDiffs, tmap, smap, false)
+	_, err := checkTableMatched(tableDiffs, tmap, smap, false)
 	require.NoError(t, err)
 
 	smap["`test`.`t3`"] = struct{}{}
-	tables, err = checkTableMatched(tableDiffs, tmap, smap, false)
+	_, err = checkTableMatched(tableDiffs, tmap, smap, false)
 	require.Contains(t, err.Error(), "the target has no table to be compared. source-table is ``test`.`t3``")
 
 	delete(smap, "`test`.`t2`")
-	tables, err = checkTableMatched(tableDiffs, tmap, smap, false)
+	_, err = checkTableMatched(tableDiffs, tmap, smap, false)
 	require.Contains(t, err.Error(), "the source has no table to be compared. target-table is ``test`.`t2``")
 
-	tables, err = checkTableMatched(tableDiffs, tmap, smap, true)
+	tables, err := checkTableMatched(tableDiffs, tmap, smap, true)
 	require.NoError(t, err)
 	require.Equal(t, 0, tables[0].TableLack)
 	require.Equal(t, 1, tables[1].TableLack)
diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go
index 92d80c5d34b..5265bf88eb5 100644
--- a/sync_diff_inspector/source/tidb.go
+++ b/sync_diff_inspector/source/tidb.go
@@ -33,12 +33,14 @@ import (
 	"go.uber.org/zap"
 )
 
+// TiDBTableAnalyzer is used to analyze table
 type TiDBTableAnalyzer struct {
 	dbConn            *sql.DB
 	bucketSpliterPool *utils.WorkerPool
 	sourceTableMap    map[string]*common.TableSource
 }
 
+// AnalyzeSplitter returns a new iterator for TiDB table
 func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) {
 	matchedSource := getMatchSource(a.sourceTableMap, table)
 	// Shallow Copy
@@ -65,14 +67,17 @@ func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.T
 	return randIter, nil
 }
 
+// TiDBRowsIterator is used to iterate rows in TiDB
 type TiDBRowsIterator struct {
 	rows *sql.Rows
 }
 
+// Close closes the iterator
 func (s *TiDBRowsIterator) Close() {
 	s.rows.Close()
 }
 
+// Next gets the next row
 func (s *TiDBRowsIterator) Next() (map[string]*dbutil.ColumnData, error) {
 	if s.rows.Next() {
 		return dbutil.ScanRow(s.rows)
@@ -80,6 +85,7 @@ func (s *TiDBRowsIterator) Next() (map[string]*dbutil.ColumnData, error) {
 	return nil, nil
 }
 
+// TiDBSource represents the table in TiDB
 type TiDBSource struct {
 	tableDiffs     []*common.TableDiff
 	sourceTableMap map[string]*common.TableSource
@@ -91,6 +97,7 @@ type TiDBSource struct {
 	version *semver.Version
 }
 
+// GetTableAnalyzer gets the analyzer for current source
 func (s *TiDBSource) GetTableAnalyzer() TableAnalyzer {
 	return &TiDBTableAnalyzer{
 		s.dbConn,
@@ -111,21 +118,24 @@ func getMatchSource(sourceTableMap map[string]*common.TableSource, table *common
 	return sourceTableMap[uniqueID]
 }
 
+// GetRangeIterator returns a new iterator for TiDB table
 func (s *TiDBSource) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) {
 	return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount)
 }
 
+// Close closes the source
 func (s *TiDBSource) Close() {
 	s.dbConn.Close()
 }
 
-func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
+// GetCountAndMD5 returns the checksum info
+func (s *TiDBSource) GetCountAndMD5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo {
 	beginTime := time.Now()
 	table := s.tableDiffs[tableRange.GetTableIndex()]
 	chunk := tableRange.GetChunk()
 
 	matchSource := getMatchSource(s.sourceTableMap, table)
-	count, checksum, err := utils.GetCountAndMd5Checksum(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, chunk.Where, chunk.Args)
+	count, checksum, err := utils.GetCountAndMD5Checksum(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, chunk.Where, chunk.Args)
 
 	cost := time.Since(beginTime)
 	return &ChecksumInfo{
@@ -136,6 +146,7 @@ func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.Ra
 	}
 }
 
+// GetCountForLackTable returns count for lack table
 func (s *TiDBSource) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 {
 	table := s.tableDiffs[tableRange.GetTableIndex()]
 	matchSource := getMatchSource(s.sourceTableMap, table)
@@ -146,10 +157,12 @@ func (s *TiDBSource) GetCountForLackTable(ctx context.Context, tableRange *split
 	return 0
 }
 
+// GetTables returns all tables
 func (s *TiDBSource) GetTables() []*common.TableDiff {
 	return s.tableDiffs
 }
 
+// GetSourceStructInfo get the table info
 func (s *TiDBSource) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) {
 	var err error
 	tableInfos := make([]*model.TableInfo, 1)
@@ -163,6 +176,7 @@ func (s *TiDBSource) GetSourceStructInfo(ctx context.Context, tableIndex int) ([
 	return tableInfos, nil
 }
 
+// GenerateFixSQL generate SQL
 func (s *TiDBSource) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string {
 	if t == Insert {
 		return utils.GenerateReplaceDML(upstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema)
@@ -177,6 +191,7 @@ func (s *TiDBSource) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[
 	return ""
 }
 
+// GetRowsIterator returns a new iterator
 func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) {
 	chunk := tableRange.GetChunk()
 
@@ -187,6 +202,11 @@ func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.R
 
 	log.Debug("select data", zap.String("sql", query), zap.Reflect("args", chunk.Args))
 	rows, err := s.dbConn.QueryContext(ctx, query, chunk.Args...)
+	defer func() {
+		if rows != nil {
+			_ = rows.Err()
+		}
+	}()
 	if err != nil {
 		return nil, errors.Trace(err)
 	}
@@ -195,14 +215,17 @@ func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.R
 	}, nil
 }
 
+// GetDB get the current DB
 func (s *TiDBSource) GetDB() *sql.DB {
 	return s.dbConn
 }
 
+// GetSnapshot get the current snapshot
 func (s *TiDBSource) GetSnapshot() string {
 	return s.snapshot
 }
 
+// NewTiDBSource return a new TiDB source
 func NewTiDBSource(
 	ctx context.Context,
 	tableDiffs []*common.TableDiff, ds *config.DataSource,
@@ -248,18 +271,18 @@ func NewTiDBSource(
 				}
 			}
 
-			uniqueId := utils.UniqueID(targetSchema, targetTable)
+			uniqueID := utils.UniqueID(targetSchema, targetTable)
 			isMatched := f.MatchTable(targetSchema, targetTable)
 			if isMatched {
 				// if match the filter, we should respect it and check target has this table later.
-				sourceTablesAfterRoute[uniqueId] = struct{}{}
+				sourceTablesAfterRoute[uniqueID] = struct{}{}
 			}
-			if _, ok := targetUniqueTableMap[uniqueId]; ok || (isMatched && skipNonExistingTable) {
-				if _, ok := sourceTableMap[uniqueId]; ok {
+			if _, ok := targetUniqueTableMap[uniqueID]; ok || (isMatched && skipNonExistingTable) {
+				if _, ok := sourceTableMap[uniqueID]; ok {
 					log.Error("TiDB source don't support compare multiple source tables with one downstream table," +
 						" if this happening when diff on same instance is fine. otherwise we are not guarantee this diff result is right")
 				}
-				sourceTableMap[uniqueId] = &common.TableSource{
+				sourceTableMap[uniqueID] = &common.TableSource{
 					OriginSchema: schema,
 					OriginTable:  table,
 				}
diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go
index 46a53f99d28..cfd5c1d6664 100644
--- a/sync_diff_inspector/splitter/bucket.go
+++ b/sync_diff_inspector/splitter/bucket.go
@@ -30,8 +30,10 @@ import (
 	"go.uber.org/zap"
 )
 
+// DefaultChannelBuffer is the default size for channel buffer
 const DefaultChannelBuffer = 1024
 
+// BucketIterator is struct for bucket iterator
 type BucketIterator struct {
 	buckets      []dbutil.Bucket
 	table        *common.TableDiff
@@ -53,10 +55,12 @@ type BucketIterator struct {
 	dbConn *sql.DB
 }
 
+// NewBucketIterator return a new iterator
 func NewBucketIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*BucketIterator, error) {
 	return NewBucketIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil, utils.NewWorkerPool(1, "bucketIter"))
 }
 
+// NewBucketIteratorWithCheckpoint return a new iterator
 func NewBucketIteratorWithCheckpoint(
 	ctx context.Context,
 	progressID string,
@@ -95,10 +99,12 @@ func NewBucketIteratorWithCheckpoint(
 	return bs, nil
 }
 
+// GetIndexID return the index id
 func (s *BucketIterator) GetIndexID() int64 {
 	return s.indexID
 }
 
+// Next return the next chunk
 func (s *BucketIterator) Next() (*chunk.Range, error) {
 	var ok bool
 	if uint(len(s.chunks)) <= s.nextChunk {
@@ -219,6 +225,7 @@ NEXTINDEX:
 	return nil
 }
 
+// Close closes the iterator
 func (s *BucketIterator) Close() {
 	s.cancel()
 }
diff --git a/sync_diff_inspector/splitter/index_fields_test.go b/sync_diff_inspector/splitter/index_fields_test.go
index 6b6cc5768e7..788df79a116 100644
--- a/sync_diff_inspector/splitter/index_fields_test.go
+++ b/sync_diff_inspector/splitter/index_fields_test.go
@@ -17,7 +17,7 @@ import (
 	"testing"
 
 	"github.com/pingcap/tidb/pkg/parser"
-	"github.com/pingcap/tidb/pkg/util/dbutil"
+	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
 	"github.com/stretchr/testify/require"
 )
 
@@ -30,7 +30,7 @@ func TestIndexFieldsSimple(t *testing.T) {
 		"`c` char(120) NOT NULL DEFAULT '', " +
 		"PRIMARY KEY (`id`), KEY `k_1` (`k`))"
 
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 
 	fields, err := indexFieldsFromConfigString("k", tableInfo)
@@ -61,7 +61,7 @@ func TestIndexFieldsComposite(t *testing.T) {
 		"KEY `k_1` (`k`)," +
 		"UNIQUE INDEX `c_1` (`c`))"
 
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 
 	fields, err := indexFieldsFromConfigString("id, k", tableInfo)
@@ -92,7 +92,7 @@ func TestIndexFieldsEmpty(t *testing.T) {
 		"`c` char(120) NOT NULL DEFAULT '', " +
 		"PRIMARY KEY (`id`), KEY `k_1` (`k`))"
 
-	tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New())
+	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New())
 	require.NoError(t, err)
 
 	fields, err := indexFieldsFromConfigString("", tableInfo)
diff --git a/sync_diff_inspector/splitter/limit.go b/sync_diff_inspector/splitter/limit.go
index 3138d53181f..3baa585713a 100644
--- a/sync_diff_inspector/splitter/limit.go
+++ b/sync_diff_inspector/splitter/limit.go
@@ -30,6 +30,7 @@ import (
 	"go.uber.org/zap"
 )
 
+// LimitIterator is the iterator with limit
 type LimitIterator struct {
 	table     *common.TableDiff
 	tagChunk  *chunk.Range
@@ -46,11 +47,19 @@ type LimitIterator struct {
 	columnOffset map[string]int
 }
 
+// NewLimitIterator return a new iterator
 func NewLimitIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*LimitIterator, error) {
 	return NewLimitIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil)
 }
 
-func NewLimitIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*LimitIterator, error) {
+// NewLimitIteratorWithCheckpoint return a new iterator
+func NewLimitIteratorWithCheckpoint(
+	ctx context.Context,
+	progressID string,
+	table *common.TableDiff,
+	dbConn *sql.DB,
+	startRange *RangeInfo,
+) (*LimitIterator, error) {
 	indices, err := utils.GetBetterIndex(ctx, dbConn, table.Schema, table.Table, table.Info)
 	if err != nil {
 		return nil, errors.Trace(err)
@@ -161,10 +170,12 @@ func NewLimitIteratorWithCheckpoint(ctx context.Context, progressID string, tabl
 	return limitIterator, nil
 }
 
+// Close close the iterator
 func (lmt *LimitIterator) Close() {
 	lmt.cancel()
 }
 
+// Next return the next chunk
 func (lmt *LimitIterator) Next() (*chunk.Range, error) {
 	select {
 	case err := <-lmt.errCh:
@@ -177,6 +188,7 @@ func (lmt *LimitIterator) Next() (*chunk.Range, error) {
 	}
 }
 
+// GetIndexID get the current index id
 func (lmt *LimitIterator) GetIndexID() int64 {
 	return lmt.indexID
 }
@@ -199,7 +211,6 @@ func (lmt *LimitIterator) produceChunks(ctx context.Context, bucketID int) {
 		if dataMap == nil {
 			// there is no row in result set
 			chunk.InitChunk(chunkRange, chunk.Limit, bucketID, bucketID, lmt.table.Collation, lmt.table.Range)
-			bucketID++
 			progress.UpdateTotal(lmt.progressID, 1, true)
 			select {
 			case <-ctx.Done():
diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go
index 98771d0eb28..241b2d12e33 100644
--- a/sync_diff_inspector/splitter/random.go
+++ b/sync_diff_inspector/splitter/random.go
@@ -31,6 +31,7 @@ import (
 	"go.uber.org/zap"
 )
 
+// RandomIterator is used to random iterate a table
 type RandomIterator struct {
 	table     *common.TableDiff
 	chunkSize int64
@@ -40,11 +41,19 @@ type RandomIterator struct {
 	dbConn *sql.DB
 }
 
+// NewRandomIterator return a new iterator
 func NewRandomIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*RandomIterator, error) {
 	return NewRandomIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil)
 }
 
-func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*RandomIterator, error) {
+// NewRandomIteratorWithCheckpoint return a new iterator with checkpoint
+func NewRandomIteratorWithCheckpoint(
+	ctx context.Context,
+	progressID string,
+	table *common.TableDiff,
+	dbConn *sql.DB,
+	startRange *RangeInfo,
+) (*RandomIterator, error) {
 	// get the chunk count by data count and chunk size
 	var splitFieldArr []string
 	if len(table.Fields) != 0 {
@@ -141,6 +150,7 @@ func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, tab
 	}, nil
 }
 
+// Next get the next chunk
 func (s *RandomIterator) Next() (*chunk.Range, error) {
 	if uint(len(s.chunks)) <= s.nextChunk {
 		return nil, nil
@@ -159,6 +169,7 @@ func (s *RandomIterator) Next() (*chunk.Range, error) {
 	return c, nil
 }
 
+// Close close the iterator
 func (s *RandomIterator) Close() {
 }
 
diff --git a/sync_diff_inspector/splitter/splitter.go b/sync_diff_inspector/splitter/splitter.go
index d2a43ed1ce2..5fb45bc9024 100644
--- a/sync_diff_inspector/splitter/splitter.go
+++ b/sync_diff_inspector/splitter/splitter.go
@@ -21,6 +21,7 @@ import (
 )
 
 const (
+	// SplitThreshold is the threshold for splitting
 	SplitThreshold = 1000
 )
 
@@ -41,22 +42,27 @@ type RangeInfo struct {
 	ProgressID string `json:"progress-id"`
 }
 
-// GetTableIndex return the index of table diffs.
+// GetTableIndex returns the index of table diffs.
 // IMPORTANT!!!
 // We need to keep the tables order during checkpoint.
 // So we should have to save the config info to checkpoint file too
 func (r *RangeInfo) GetTableIndex() int { return r.ChunkRange.Index.TableIndex }
 
+// GetBucketIndexLeft returns the BucketIndexLeft
 func (r *RangeInfo) GetBucketIndexLeft() int { return r.ChunkRange.Index.BucketIndexLeft }
 
+// GetBucketIndexRight returns the BucketIndexRight
 func (r *RangeInfo) GetBucketIndexRight() int { return r.ChunkRange.Index.BucketIndexRight }
 
+// GetChunkIndex returns the ChunkIndex
 func (r *RangeInfo) GetChunkIndex() int { return r.ChunkRange.Index.ChunkIndex }
 
+// GetChunk returns the chunk
 func (r *RangeInfo) GetChunk() *chunk.Range {
 	return r.ChunkRange
 }
 
+// Copy returns a copy of RangeInfo
 func (r *RangeInfo) Copy() *RangeInfo {
 	return &RangeInfo{
 		ChunkRange: r.ChunkRange.Clone(),
@@ -65,6 +71,7 @@ func (r *RangeInfo) Copy() *RangeInfo {
 	}
 }
 
+// Update updates the current RangeInfo
 func (r *RangeInfo) Update(column, lower, upper string, updateLower, updateUpper bool, collation, limits string) {
 	r.ChunkRange.Update(column, lower, upper, updateLower, updateUpper)
 	conditions, args := r.ChunkRange.ToString(collation)
@@ -72,6 +79,7 @@ func (r *RangeInfo) Update(column, lower, upper string, updateLower, updateUpper
 	r.ChunkRange.Args = args
 }
 
+// ToNode converts RangeInfo to node
 func (r *RangeInfo) ToNode() *checkpoints.Node {
 	return &checkpoints.Node{
 		ChunkRange: r.ChunkRange,
@@ -79,6 +87,7 @@ func (r *RangeInfo) ToNode() *checkpoints.Node {
 	}
 }
 
+// FromNode converts the Node into RangeInfo
 func FromNode(n *checkpoints.Node) *RangeInfo {
 	return &RangeInfo{
 		ChunkRange: n.ChunkRange,
diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go
index 9b16fc5c0ea..5d312bddcad 100644
--- a/sync_diff_inspector/splitter/splitter_test.go
+++ b/sync_diff_inspector/splitter/splitter_test.go
@@ -915,7 +915,7 @@ func TestChunkSize(t *testing.T) {
 	tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
 	require.NoError(t, err)
 
-	tableDiff_noindex := &common.TableDiff{
+	tableDiffNoIndex := &common.TableDiff{
 		Schema:    "test",
 		Table:     "test",
 		Info:      tableInfo,
@@ -923,7 +923,7 @@ func TestChunkSize(t *testing.T) {
 	}
 	// no index
 	createFakeResultForRandomSplit(mock, 1000, nil)
-	randomIter, err = NewRandomIterator(ctx, "", tableDiff_noindex, db)
+	randomIter, err = NewRandomIterator(ctx, "", tableDiffNoIndex, db)
 	require.NoError(t, err)
 	require.Equal(t, randomIter.chunkSize, int64(1001))
 
diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go
index b9604f81129..7aadf2fbc59 100644
--- a/sync_diff_inspector/utils/pd.go
+++ b/sync_diff_inspector/utils/pd.go
@@ -177,6 +177,7 @@ func parseVersion(versionStr string) (*semver.Version, error) {
 	return semver.NewVersion(versionStr)
 }
 
+// TryToGetVersion gets the version of current db.
 // It's OK to failed to get db version
 func TryToGetVersion(ctx context.Context, db *sql.DB) *semver.Version {
 	versionStr, err := dbutil.GetDBVersion(ctx, db)
@@ -267,6 +268,7 @@ func parseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) {
 	return uint64(tso.Int64*1000) << 18, nil
 }
 
+// GetSnapshot gets the snapshot
 func GetSnapshot(ctx context.Context, db *sql.DB) ([]string, error) {
 	query := "SHOW MASTER STATUS;"
 	rows, err := db.QueryContext(ctx, query)
diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go
index 10f73e588d3..6cd2fae078b 100644
--- a/sync_diff_inspector/utils/table.go
+++ b/sync_diff_inspector/utils/table.go
@@ -37,8 +37,8 @@ import (
 )
 
 const (
-	AnnotationClusteredReplaceString    = "${1} /*T![clustered_index] CLUSTERED */${2}\n"
-	AnnotationNonClusteredReplaceString = "${1} /*T![clustered_index] NONCLUSTERED */${2}\n"
+	annotationClusteredReplaceString    = "${1} /*T![clustered_index] CLUSTERED */${2}\n"
+	annotationNonClusteredReplaceString = "${1} /*T![clustered_index] NONCLUSTERED */${2}\n"
 )
 
 func init() {
@@ -116,7 +116,7 @@ func getTableInfoBySQL(ctx *metabuild.Context, createTableSQL string, parser2 *p
 		return table, nil
 	}
 
-	return nil, errors.Errorf("get table info from sql %s failed!", createTableSQL)
+	return nil, errors.Errorf("get table info from sql %s failed", createTableSQL)
 }
 
 func isPKISHandle(
@@ -126,15 +126,20 @@ func isPKISHandle(
 ) bool {
 	query := fmt.Sprintf("SELECT _tidb_rowid FROM %s LIMIT 0;", dbutil.TableName(schemaName, tableName))
 	rows, err := db.QueryContext(ctx, query)
+	defer func() {
+		if rows != nil {
+			_ = rows.Err()
+			rows.Close()
+		}
+	}()
+
 	if err != nil && strings.Contains(err.Error(), "Unknown column") {
 		return true
 	}
-	if rows != nil {
-		rows.Close()
-	}
 	return false
 }
 
+// GetTableInfoWithVersion returns table info under given version.
 func GetTableInfoWithVersion(
 	ctx context.Context,
 	db dbutil.QueryExecutor,
@@ -149,9 +154,9 @@ func GetTableInfoWithVersion(
 	if version != nil && version.Major <= 4 {
 		var replaceString string
 		if isPKISHandle(ctx, db, schemaName, tableName) {
-			replaceString = AnnotationClusteredReplaceString
+			replaceString = annotationClusteredReplaceString
 		} else {
-			replaceString = AnnotationNonClusteredReplaceString
+			replaceString = annotationNonClusteredReplaceString
 		}
 		createTableSQL, err = addClusteredAnnotationForPrimaryKey(createTableSQL, replaceString)
 		if err != nil {
diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go
index 5e4351c1c7e..354ae30eeb9 100644
--- a/sync_diff_inspector/utils/utils.go
+++ b/sync_diff_inspector/utils/utils.go
@@ -39,10 +39,12 @@ import (
 // which yields redacted string when being marshaled.
 type SecretString string
 
+// MarshalJSON return fixed string for SerectString
 func (s SecretString) MarshalJSON() ([]byte, error) {
 	return []byte(`"******"`), nil
 }
 
+// String return fixed string for SerectString
 func (s SecretString) String() string {
 	return "******"
 }
@@ -130,7 +132,7 @@ func (pool *WorkerPool) HasWorker() bool {
 	return len(pool.workers) > 0
 }
 
-// WaitFinished waits till the pool finishs all the tasks.
+// WaitFinished waits till the pool finishes all the tasks.
 func (pool *WorkerPool) WaitFinished() {
 	pool.wg.Wait()
 }
@@ -211,7 +213,7 @@ func GenerateReplaceDML(data map[string]*dbutil.ColumnData, table *model.TableIn
 	return fmt.Sprintf("REPLACE INTO %s(%s) VALUES (%s);", dbutil.TableName(schema, table.Name.O), strings.Join(colNames, ","), strings.Join(values, ","))
 }
 
-// GerateReplaceDMLWithAnnotation returns the replace SQL for the specific 2 rows.
+// GenerateReplaceDMLWithAnnotation returns the replace SQL for the specific 2 rows.
 // And add Annotations to show the different columns.
 func GenerateReplaceDMLWithAnnotation(source, target map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string {
 	sqlColNames := make([]string, 0, len(table.Columns))
@@ -286,7 +288,7 @@ func GenerateReplaceDMLWithAnnotation(source, target map[string]*dbutil.ColumnDa
 	return fmt.Sprintf("/*\n%s*/\nREPLACE INTO %s(%s) VALUES (%s);", tableString.String(), dbutil.TableName(schema, table.Name.O), strings.Join(sqlColNames, ","), strings.Join(sqlValues, ","))
 }
 
-// GerateReplaceDMLWithAnnotation returns the delete SQL for the specific row.
+// GenerateDeleteDML returns the delete SQL for the specific row.
 func GenerateDeleteDML(data map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string {
 	kvs := make([]string, 0, len(table.Columns))
 	for _, col := range table.Columns {
@@ -649,25 +651,25 @@ func CompareData(map1, map2 map[string]*dbutil.ColumnData, orderKeyCols, columns
 				cmp = 1
 			}
 			break
-		} else {
-			num1, err1 := strconv.ParseFloat(string(data1.Data), 64)
-			num2, err2 := strconv.ParseFloat(string(data2.Data), 64)
-			if err1 != nil || err2 != nil {
-				err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", string(data1.Data), string(data2.Data), err1, err2)
-				return
-			}
+		}
 
-			if num1 == num2 {
-				continue
-			}
+		num1, err1 := strconv.ParseFloat(string(data1.Data), 64)
+		num2, err2 := strconv.ParseFloat(string(data2.Data), 64)
+		if err1 != nil || err2 != nil {
+			err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", string(data1.Data), string(data2.Data), err1, err2)
+			return
+		}
 
-			if num1 < num2 {
-				cmp = -1
-			} else {
-				cmp = 1
-			}
-			break
+		if num1 == num2 {
+			continue
 		}
+
+		if num1 < num2 {
+			cmp = -1
+		} else {
+			cmp = 1
+		}
+		break
 	}
 
 	return
@@ -773,8 +775,8 @@ func GetTableSize(ctx context.Context, db *sql.DB, schemaName, tableName string)
 	return dataSize.Int64, nil
 }
 
-// GetCountAndMd5Checksum returns checksum code and count of some data by given condition
-func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, args []interface{}) (int64, uint64, error) {
+// GetCountAndMD5Checksum returns checksum code and count of some data by given condition
+func GetCountAndMD5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, args []interface{}) (int64, uint64, error) {
 	/*
 		calculate MD5 checksum and count example:
 		mysql> SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `a`.`t`;
@@ -964,7 +966,7 @@ func GetBetterIndex(ctx context.Context, db *sql.DB, schema, table string, table
 			return []*model.IndexInfo{index}, nil
 		}
 	}
-	sels := make([]float64, len(indices))
+	sels := make([]float64, 0, len(indices))
 	for _, index := range indices {
 		column := GetColumnsFromIndex(index, tableInfo)[0]
 		selectivity, err := GetSelectivity(ctx, db, schema, table, column.Name.O, tableInfo)
@@ -1002,7 +1004,7 @@ func GetSelectivity(ctx context.Context, db *sql.DB, schemaName, tableName, colu
 func CalculateChunkSize(rowCount int64) int64 {
 	// we assume chunkSize is 50000 for any cluster.
 	chunkSize := int64(50000)
-	if rowCount > int64(chunkSize)*10000 {
+	if rowCount > chunkSize*10000 {
 		// we assume we only need 10k chunks for any table.
 		chunkSize = rowCount / 10000
 	}
@@ -1016,12 +1018,12 @@ func AnalyzeTable(ctx context.Context, db *sql.DB, tableName string) error {
 }
 
 // GetSQLFileName returns filename of fix-SQL identified by chunk's `Index`.
-func GetSQLFileName(index *chunk.ChunkID) string {
+func GetSQLFileName(index *chunk.CID) string {
 	return fmt.Sprintf("%d:%d-%d:%d", index.TableIndex, index.BucketIndexLeft, index.BucketIndexRight, index.ChunkIndex)
 }
 
-// GetChunkIDFromSQLFileName convert the filename to chunk's `Index`.
-func GetChunkIDFromSQLFileName(fileIDStr string) (int, int, int, int, error) {
+// GetCIDFromSQLFileName convert the filename to chunk's `Index`.
+func GetCIDFromSQLFileName(fileIDStr string) (int, int, int, int, error) {
 	ids := strings.Split(fileIDStr, ":")
 	tableIndex, err := strconv.Atoi(ids[0])
 	if err != nil {
@@ -1051,6 +1053,7 @@ func IsRangeTrivial(rangeCond string) bool {
 	return strings.ToLower(rangeCond) == "true"
 }
 
+// IsBinaryColumn checks if the given column is a binary column
 func IsBinaryColumn(col *model.ColumnInfo) bool {
 	// varbinary or binary
 	return (col.GetType() == mysql.TypeVarchar || col.GetType() == mysql.TypeString) && mysql.HasBinaryFlag(col.GetFlag())
diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go
index fde005ebb3c..a768266eca6 100644
--- a/sync_diff_inspector/utils/utils_test.go
+++ b/sync_diff_inspector/utils/utils_test.go
@@ -49,8 +49,8 @@ func TestWorkerPool(t *testing.T) {
 		infoCh <- 2
 	})
 	pool.Apply(func() {
-		new_v := <-infoCh
-		v = new_v
+		newV := <-infoCh
+		v = newV
 		doneCh <- struct{}{}
 	})
 	<-doneCh
@@ -257,7 +257,7 @@ func TestBasicTableUtilOperation(t *testing.T) {
 	require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 1)
 }
 
-func TestGetCountAndMd5Checksum(t *testing.T) {
+func TestGetCountAndMD5Checksum(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 	defer cancel()
 
@@ -271,7 +271,7 @@ func TestGetCountAndMd5Checksum(t *testing.T) {
 
 	mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456))
 
-	count, checksum, err := GetCountAndMd5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", []interface{}{"123", "234"})
+	count, checksum, err := GetCountAndMD5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", []interface{}{"123", "234"})
 	require.NoError(t, err)
 	require.Equal(t, count, int64(123))
 	require.Equal(t, checksum, uint64(0x1c8))
@@ -501,7 +501,7 @@ func TestCalculateChunkSize(t *testing.T) {
 }
 
 func TestGetSQLFileName(t *testing.T) {
-	index := &chunk.ChunkID{
+	index := &chunk.CID{
 		TableIndex:       1,
 		BucketIndexLeft:  2,
 		BucketIndexRight: 3,
@@ -511,8 +511,8 @@ func TestGetSQLFileName(t *testing.T) {
 	require.Equal(t, GetSQLFileName(index), "1:2-3:4")
 }
 
-func TestGetChunkIDFromSQLFileName(t *testing.T) {
-	tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := GetChunkIDFromSQLFileName("11:12-13:14")
+func TestGetCIDFromSQLFileName(t *testing.T) {
+	tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := GetCIDFromSQLFileName("11:12-13:14")
 	require.NoError(t, err)
 	require.Equal(t, tableIndex, 11)
 	require.Equal(t, bucketIndexLeft, 12)
@@ -620,16 +620,16 @@ func TestGenerateSQLBlob(t *testing.T) {
 	}
 
 	cases := []struct {
-		createTableSql string
+		createTableSQL string
 	}{
-		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` tinyblob)"},
-		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` blob)"},
-		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` mediumblob)"},
-		{createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` longblob)"},
+		{"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` tinyblob)"},
+		{"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` blob)"},
+		{"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` mediumblob)"},
+		{"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` longblob)"},
 	}
 
 	for _, c := range cases {
-		tableInfo, err := dbutiltest.GetTableInfoBySQL(c.createTableSql, parser.New())
+		tableInfo, err := dbutiltest.GetTableInfoBySQL(c.createTableSQL, parser.New())
 		require.NoError(t, err)
 
 		replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test")

From ad27384ca805edebdffbe8e9d729e5550bcf11f7 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 17:38:32 +0800
Subject: [PATCH 06/22] Update Makefile

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 82b2501c711..1b185159ef4 100644
--- a/Makefile
+++ b/Makefile
@@ -445,7 +445,7 @@ dm_unit_test_in_verify_ci: check_failpoint_ctl tools/bin/gotestsum tools/bin/goc
 	tools/bin/gocov convert "$(DM_TEST_DIR)/cov.unit_test.out" | tools/bin/gocov-xml > dm-coverage.xml
 	$(FAILPOINT_DISABLE)
 
-dm_integration_test_build: check_failpoint_ctl
+dm_integration_test_build: check_failpoint_ctl sync_diff_inspector
 	$(FAILPOINT_ENABLE)
 	$(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \
 		-coverpkg=github.com/pingcap/tiflow/dm/... \
@@ -475,7 +475,7 @@ dm_integration_test_build_worker: check_failpoint_ctl
 	$(FAILPOINT_DISABLE)
 	./dm/tests/prepare_tools.sh
 
-dm_integration_test_build_master: check_failpoint_ctl
+dm_integration_test_build_master: check_failpoint_ctl sync_diff_inspector
 	$(FAILPOINT_ENABLE)
 	$(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \
 		-coverpkg=github.com/pingcap/tiflow/dm/... \

From 42c8f8cd77e9f47bd548da84bd6b5e0fb611d658 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Mon, 21 Oct 2024 18:00:33 +0800
Subject: [PATCH 07/22] Update Makefile

---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1b185159ef4..82e58f89e13 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 ### Makefile for tiflow
-.PHONY: build test check clean fmt sync-diff-inspector cdc kafka_consumer storage_consumer coverage \
+.PHONY: build test check clean fmt sync_diff_inspector cdc kafka_consumer storage_consumer coverage \
 	integration_test_build integration_test integration_test_mysql integration_test_kafka bank \
 	kafka_docker_integration_test kafka_docker_integration_test_with_build \
 	clean_integration_test_containers \
@@ -499,6 +499,7 @@ install_test_python_dep:
 
 check_third_party_binary_for_dm:
 	@which bin/tidb-server
+	@which bin/sync_diff_inspector
 	@which mysql
 	@which bin/minio
 

From 2b933843f186f3e99845d7d9d1987344eb104f93 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 22 Oct 2024 12:04:28 +0800
Subject: [PATCH 08/22] Fix tests

---
 sync_diff_inspector/config/config_test.go     |  2 +-
 sync_diff_inspector/progress/progress.go      | 42 +++++++++++++++----
 sync_diff_inspector/splitter/splitter_test.go | 22 +++++++---
 sync_diff_inspector/utils/utils_test.go       | 13 +++---
 4 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go
index 5a36caa4504..24526446d35 100644
--- a/sync_diff_inspector/config/config_test.go
+++ b/sync_diff_inspector/config/config_test.go
@@ -38,7 +38,7 @@ func TestParseConfig(t *testing.T) {
 	require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"}))
 	// we change the config from config.toml to config_sharding.toml
 	// this action will raise error.
-	require.Contains(t, cfg.Init().Error(), "failed to init Task: config changes breaking the checkpoint, please use another outputDir and start over again!")
+	require.Contains(t, cfg.Init().Error(), "failed to init Task: config changes breaking the checkpoint, please use another outputDir and start over again")
 
 	require.NoError(t, os.RemoveAll(cfg.Task.OutputDir))
 	require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"}))
diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go
index 27f9ec7538a..bbd9c87b5c9 100644
--- a/sync_diff_inspector/progress/progress.go
+++ b/sync_diff_inspector/progress/progress.go
@@ -19,16 +19,40 @@ import (
 	"io"
 	"os"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
 )
 
+type atomicWriter struct {
+	mu     sync.Mutex
+	writer io.Writer
+}
+
+func (aw *atomicWriter) Set(writer io.Writer) {
+	aw.mu.Lock()
+	defer aw.mu.Unlock()
+	aw.writer = writer
+}
+
+func (aw *atomicWriter) Write(s string, args ...any) {
+	aw.mu.Lock()
+	defer aw.mu.Unlock()
+	fmt.Fprintf(aw.writer, s, args...)
+}
+
+func (aw *atomicWriter) Get() io.Writer {
+	aw.mu.Lock()
+	defer aw.mu.Unlock()
+	return aw.writer
+}
+
 type tableProgressPrinter struct {
 	tableList     *list.List
 	tableFailList *list.List
 	tableMap      map[string]*list.Element
-	output        io.Writer
+	output        atomicWriter
 	lines         int
 
 	progressTableNums int
@@ -108,12 +132,12 @@ func newTableProgressPrinter(tableNums int, finishTableNums int) *tableProgressP
 	}
 	tpp.init()
 	go tpp.serve()
-	fmt.Fprintf(tpp.output, "A total of %d tables need to be compared\n\n\n", tableNums)
+	tpp.output.Write("A total of %d tables need to be compared\n\n\n", tableNums)
 	return tpp
 }
 
 func (tpp *tableProgressPrinter) SetOutput(output io.Writer) {
-	tpp.output = output
+	tpp.output.Set(output)
 }
 
 func (tpp *tableProgressPrinter) Inc(name string) {
@@ -217,7 +241,7 @@ func (tpp *tableProgressPrinter) PrintSummary() {
 		)
 	}
 
-	fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr)
+	tpp.output.Write("%s%s\n", cleanStr, fixStr)
 }
 
 func (tpp *tableProgressPrinter) Error(err error) {
@@ -228,7 +252,7 @@ func (tpp *tableProgressPrinter) Error(err error) {
 	var cleanStr, fixStr string
 	cleanStr = "\x1b[1A\x1b[J"
 	fixStr = fmt.Sprintf("\nError in comparison process:\n%v\n\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n", err)
-	fmt.Fprintf(tpp.output, "%s%s", cleanStr, fixStr)
+	tpp.output.Write("%s%s", cleanStr, fixStr)
 }
 
 func (tpp *tableProgressPrinter) init() {
@@ -236,7 +260,7 @@ func (tpp *tableProgressPrinter) init() {
 		state: tableStateHead,
 	})
 
-	tpp.output = os.Stdout
+	tpp.output.Set(os.Stdout)
 }
 
 func (tpp *tableProgressPrinter) serve() {
@@ -407,16 +431,16 @@ func (tpp *tableProgressPrinter) flush(stateIsChanged bool) {
 		}
 
 		dynStr = fmt.Sprintf("%s_____________________________________________________________________________\n", dynStr)
-		fmt.Fprintf(tpp.output, "%s%s%s", cleanStr, fixStr, dynStr)
+		tpp.output.Write("%s%s%s", cleanStr, fixStr, dynStr)
 	} else {
-		fmt.Fprint(tpp.output, "\x1b[1A\x1b[J")
+		tpp.output.Write("\x1b[1A\x1b[J")
 	}
 	// show bar
 	// 60 '='+'-'
 	coe := float32(tpp.progressTableNums*tpp.progress)/float32(tpp.tableNums*(tpp.total+1)) + float32(tpp.finishTableNums)/float32(tpp.tableNums)
 	numLeft := int(60 * coe)
 	percent := int(100 * coe)
-	fmt.Fprintf(tpp.output, "Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total)
+	tpp.output.Write("Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total)
 }
 
 var progress *tableProgressPrinter = nil
diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go
index 5d312bddcad..760b642c01d 100644
--- a/sync_diff_inspector/splitter/splitter_test.go
+++ b/sync_diff_inspector/splitter/splitter_test.go
@@ -738,8 +738,6 @@ func createFakeResultForRandom(mock sqlmock.Sqlmock, aRandomValues, bRandomValue
 
 func TestLimitSpliter(t *testing.T) {
 	ctx := context.Background()
-	db, mock, err := sqlmock.New()
-	require.NoError(t, err)
 
 	createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))"
 	tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New())
@@ -782,6 +780,10 @@ func TestLimitSpliter(t *testing.T) {
 	}
 
 	for _, testCase := range testCases {
+		db, mock, err := sqlmock.New()
+		require.NoError(t, err)
+		defer db.Close()
+
 		createFakeResultForLimitSplit(mock, testCase.limitAValues, testCase.limitBValues, true)
 
 		iter, err := NewLimitIterator(ctx, "", tableDiff, db)
@@ -801,10 +803,14 @@ func TestLimitSpliter(t *testing.T) {
 		}
 	}
 
+	db2, mock2, err := sqlmock.New()
+	require.NoError(t, err)
+	defer db2.Close()
+
 	// Test Checkpoint
 	stopJ := 2
-	createFakeResultForLimitSplit(mock, testCases[0].limitAValues[:stopJ], testCases[0].limitBValues[:stopJ], true)
-	iter, err := NewLimitIterator(ctx, "", tableDiff, db)
+	createFakeResultForLimitSplit(mock2, testCases[0].limitAValues[:stopJ], testCases[0].limitBValues[:stopJ], true)
+	iter, err := NewLimitIterator(ctx, "", tableDiff, db2)
 	require.NoError(t, err)
 	j := 0
 	var chunk *chunk.Range
@@ -819,8 +825,12 @@ func TestLimitSpliter(t *testing.T) {
 		IndexID:    iter.GetIndexID(),
 	}
 
-	createFakeResultForLimitSplit(mock, testCases[0].limitAValues[stopJ:], testCases[0].limitBValues[stopJ:], true)
-	iter, err = NewLimitIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo)
+	db3, mock3, err := sqlmock.New()
+	require.NoError(t, err)
+	defer db3.Close()
+
+	createFakeResultForLimitSplit(mock3, testCases[0].limitAValues[stopJ:], testCases[0].limitBValues[stopJ:], true)
+	iter, err = NewLimitIteratorWithCheckpoint(ctx, "", tableDiff, db3, rangeInfo)
 	require.NoError(t, err)
 	chunk, err = iter.Next()
 	require.NoError(t, err)
diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go
index a768266eca6..6454e27ef4b 100644
--- a/sync_diff_inspector/utils/utils_test.go
+++ b/sync_diff_inspector/utils/utils_test.go
@@ -402,8 +402,7 @@ func TestGetTableSize(t *testing.T) {
 }
 
 func TestGetBetterIndex(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 	conn, mock, err := sqlmock.New()
 	require.NoError(t, err)
 	defer conn.Close()
@@ -445,9 +444,9 @@ func TestGetBetterIndex(t *testing.T) {
 				{"7", "d"},
 				{"8", "d"},
 				{"9", "e"},
-				{"A", "e"},
-				{"B", "f"},
-				{"C", "f"},
+				{"10", "e"},
+				{"11", "f"},
+				{"12", "f"},
 			},
 			indices:  []string{"a", "b"},
 			sels:     []float64{1.0, 0.5},
@@ -488,8 +487,8 @@ func TestGetBetterIndex(t *testing.T) {
 		require.NoError(t, err)
 		require.Equal(t, sel, tableCase.sels[i])
 	}
-	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2"))
-	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("5"))
+	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"ESL"}).AddRow("5"))
+	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2"))
 	indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo)
 	require.NoError(t, err)
 	require.Equal(t, indices[0].Name.O, tableCase.selected)

From 8b501e45b319f58c79553a872e4310afdb2ea6fd Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 22 Oct 2024 13:55:45 +0800
Subject: [PATCH 09/22] Revert scripts

---
 Makefile                                      |  2 +-
 .../download-compatibility-test-binaries.sh   |  6 +++++
 .../download-integration-test-binaries.sh     |  5 ++++
 dm/tests/mariadb_master_down_and_up/case.sh   |  1 +
 dm/tests/mariadb_master_down_and_up/lib.sh    |  6 +++++
 dm/tests/tiup/lib.sh                          |  6 +++++
 dm/tests/tiup/upgrade-from-v1.sh              |  2 ++
 dm/tests/tiup/upgrade-from-v2.sh              |  2 ++
 dm/tests/tiup/upgrade-tidb.sh                 |  2 ++
 dm/tests/upstream_switch/case.sh              |  1 +
 dm/tests/upstream_switch/lib.sh               |  6 +++++
 scripts/download-integration-test-binaries.sh |  4 +++-
 scripts/download-sync-diff.sh                 | 23 +++++++++++++++++++
 13 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 scripts/download-sync-diff.sh

diff --git a/Makefile b/Makefile
index 82e58f89e13..44f4366ba69 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@
 .DEFAULT_GOAL := default
 
 # Adapted from https://www.thapaliya.com/en/writings/well-documented-makefiles/
-help: ## Display this help ann any documented user-facing targets. Other undocumented targets may be present in the Makefile.
+help: ## Display this help and any documented user-facing targets. Other undocumented targets may be present in the Makefile.
 help:
 	@awk 'BEGIN {FS = ": ##"; printf "Usage:\n  make <target>\n\nTargets:\n"} /^[a-zA-Z0-9_\.\-\/%]+: ##/ { printf "  %-45s %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
 
diff --git a/dm/tests/download-compatibility-test-binaries.sh b/dm/tests/download-compatibility-test-binaries.sh
index e71a740ccda..df29fa3e303 100755
--- a/dm/tests/download-compatibility-test-binaries.sh
+++ b/dm/tests/download-compatibility-test-binaries.sh
@@ -77,6 +77,7 @@ main() {
 	# Define download URLs
 	local download_urls=(
 		"${FILE_SERVER_URL}/download/builds/pingcap/tidb/${tidb_sha1}/centos7/tidb-server.tar.gz"
+		"http://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz"
 		"http://download.pingcap.org/tidb-enterprise-tools-latest-linux-amd64.tar.gz"
 		"${GITHUB_RELEASE_URL}/gh-ost-binary-linux-20200828140552.tar.gz"
 		"${FILE_SERVER_URL}/download/minio.tar.gz"
@@ -97,6 +98,11 @@ main() {
 			extract "$filename" "$THIRD_BIN_DIR" "bin/tidb-server"
 			mv "${THIRD_BIN_DIR}/bin/tidb-server" "$THIRD_BIN_DIR/"
 			;;
+		tidb-enterprise-tools-nightly-linux-amd64.tar.gz)
+			extract "$filename" "$THIRD_BIN_DIR" "tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector"
+			mv "${THIRD_BIN_DIR}/tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector" "$THIRD_BIN_DIR/"
+			rm -rf "${THIRD_BIN_DIR}/tidb-enterprise-tools-nightly-linux-amd64"
+			;;
 		tidb-enterprise-tools-latest-linux-amd64.tar.gz)
 			extract "$filename" "$THIRD_BIN_DIR" "tidb-enterprise-tools-latest-linux-amd64/bin/mydumper"
 			mv "${THIRD_BIN_DIR}/tidb-enterprise-tools-latest-linux-amd64/bin/mydumper" "$THIRD_BIN_DIR/"
diff --git a/dm/tests/download-integration-test-binaries.sh b/dm/tests/download-integration-test-binaries.sh
index d5c96eff7de..6dd88b767a4 100755
--- a/dm/tests/download-integration-test-binaries.sh
+++ b/dm/tests/download-integration-test-binaries.sh
@@ -77,6 +77,7 @@ main() {
 		"${FILE_SERVER_URL}/download/builds/pingcap/tidb/${tidb_sha1}/centos7/tidb-server.tar.gz"
 		"${FILE_SERVER_URL}/download/builds/pingcap/tikv/${tikv_sha1}/centos7/tikv-server.tar.gz"
 		"${FILE_SERVER_URL}/download/builds/pingcap/pd/${pd_sha1}/centos7/pd-server.tar.gz"
+		"${FILE_SERVER_URL}/download/builds/pingcap/tidb-tools/${tidb_tools_sha1}/centos7/tidb-tools.tar.gz"
 		"${GITHUB_RELEASE_URL}/gh-ost-binary-linux-20200828140552.tar.gz"
 		"${FILE_SERVER_URL}/download/minio.tar.gz"
 	)
@@ -104,6 +105,10 @@ main() {
 			tar -xz -C "$THIRD_BIN_DIR" bin/tikv-server -f "${TEMP_DIR}/${filename}"
 			mv "${THIRD_BIN_DIR}/bin/tikv-server" "$THIRD_BIN_DIR/"
 			;;
+		tidb-tools.tar.gz)
+			tar -xz -C "$THIRD_BIN_DIR" 'bin/sync_diff_inspector' -f "${TEMP_DIR}/${filename}"
+			mv "${THIRD_BIN_DIR}/bin/sync_diff_inspector" "$THIRD_BIN_DIR/"
+			;;
 		minio.tar.gz | gh-ost-binary-linux-20200828140552.tar.gz)
 			tar -xz -C "$THIRD_BIN_DIR" -f "${TEMP_DIR}/${filename}"
 			;;
diff --git a/dm/tests/mariadb_master_down_and_up/case.sh b/dm/tests/mariadb_master_down_and_up/case.sh
index dc59fe3e1b2..2941263415c 100644
--- a/dm/tests/mariadb_master_down_and_up/case.sh
+++ b/dm/tests/mariadb_master_down_and_up/case.sh
@@ -108,6 +108,7 @@ function test_master_down_and_up() {
 	cleanup_process
 	clean_data
 	setup_replica
+	install_sync_diff
 	gen_full_data
 	run_dm_components_and_create_source $1
 	start_task
diff --git a/dm/tests/mariadb_master_down_and_up/lib.sh b/dm/tests/mariadb_master_down_and_up/lib.sh
index 4a548c73425..3d38de273e7 100644
--- a/dm/tests/mariadb_master_down_and_up/lib.sh
+++ b/dm/tests/mariadb_master_down_and_up/lib.sh
@@ -27,6 +27,12 @@ function exec_tidb() {
 	echo $2 | mysql -uroot -h127.0.0.1 -P$1
 }
 
+function install_sync_diff() {
+	curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
+	mkdir -p bin
+	mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
+}
+
 function get_master_status() {
 	arr=$(echo "show master status;" | MYSQL_PWD=123456 mysql -uroot -h127.0.0.1 -P3306 | awk 'NR==2')
 	echo $arr
diff --git a/dm/tests/tiup/lib.sh b/dm/tests/tiup/lib.sh
index 441fd2da753..8b57d9355e7 100755
--- a/dm/tests/tiup/lib.sh
+++ b/dm/tests/tiup/lib.sh
@@ -56,6 +56,12 @@ function run_sql_tidb_with_retry() {
 	fi
 }
 
+function install_sync_diff() {
+	curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
+	mkdir -p bin
+	mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
+}
+
 function exec_full_stage() {
 	# drop previous data
 	exec_sql mysql1 3306 "DROP DATABASE IF EXISTS $DB1;"
diff --git a/dm/tests/tiup/upgrade-from-v1.sh b/dm/tests/tiup/upgrade-from-v1.sh
index 75b4244efb0..dcf95ea03fd 100755
--- a/dm/tests/tiup/upgrade-from-v1.sh
+++ b/dm/tests/tiup/upgrade-from-v1.sh
@@ -122,6 +122,8 @@ function destroy_v2_by_tiup() {
 }
 
 function test() {
+	install_sync_diff
+	
 	deploy_v1_by_ansible
 
 	migrate_in_v1
diff --git a/dm/tests/tiup/upgrade-from-v2.sh b/dm/tests/tiup/upgrade-from-v2.sh
index 1a1252e94b2..f5781c3002c 100755
--- a/dm/tests/tiup/upgrade-from-v2.sh
+++ b/dm/tests/tiup/upgrade-from-v2.sh
@@ -170,6 +170,8 @@ function destroy_v2_by_tiup() {
 }
 
 function test() {
+	install_sync_diff
+
 	deploy_previous_v2
 
 	migrate_in_previous_v2
diff --git a/dm/tests/tiup/upgrade-tidb.sh b/dm/tests/tiup/upgrade-tidb.sh
index 1207e512f27..434c74cc7a9 100755
--- a/dm/tests/tiup/upgrade-tidb.sh
+++ b/dm/tests/tiup/upgrade-tidb.sh
@@ -52,6 +52,8 @@ function destroy_v2_by_tiup() {
 
 # run this before upgrade TiDB.
 function before_upgrade() {
+	install_sync_diff
+
 	deploy_dm
 
 	migrate_before_upgrade
diff --git a/dm/tests/upstream_switch/case.sh b/dm/tests/upstream_switch/case.sh
index 185ebdbd878..012b4df8ff3 100644
--- a/dm/tests/upstream_switch/case.sh
+++ b/dm/tests/upstream_switch/case.sh
@@ -208,6 +208,7 @@ function check_master() {
 function test_relay() {
 	cleanup_process
 	check_master
+	install_sync_diff
 	clean_data
 	prepare_binlogs
 	setup_replica
diff --git a/dm/tests/upstream_switch/lib.sh b/dm/tests/upstream_switch/lib.sh
index b11537d988f..65064fb4cb6 100644
--- a/dm/tests/upstream_switch/lib.sh
+++ b/dm/tests/upstream_switch/lib.sh
@@ -30,6 +30,12 @@ function exec_tidb() {
 	echo $2 | mysql -uroot -h$1 -P4000
 }
 
+function install_sync_diff() {
+	curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
+	mkdir -p bin
+	mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
+}
+
 function prepare_more_binlogs() {
 	exec_sql $1 "create database db1 collate latin1_bin;"
 	exec_sql $1 "flush logs;"
diff --git a/scripts/download-integration-test-binaries.sh b/scripts/download-integration-test-binaries.sh
index ec0d8849438..765d848aede 100755
--- a/scripts/download-integration-test-binaries.sh
+++ b/scripts/download-integration-test-binaries.sh
@@ -91,7 +91,7 @@ download_community_binaries() {
 	mv ${THIRD_BIN_DIR}/tiflash ${THIRD_BIN_DIR}/_tiflash
 	mv ${THIRD_BIN_DIR}/_tiflash/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/_tiflash
 	tar -xz -C ${THIRD_BIN_DIR} pd-ctl -f ${TMP_DIR}/$tidb_file_name/ctl-${dist}.tar.gz
-	tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl -f ${TMP_DIR}/$toolkit_tar_name
+	tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl $toolkit_file_name/sync_diff_inspector -f ${TMP_DIR}/$toolkit_tar_name
 	mv ${THIRD_BIN_DIR}/$toolkit_file_name/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/$toolkit_file_name
 
 	# Download additional tools
@@ -147,6 +147,7 @@ download_binaries() {
 	local minio_download_url="${FILE_SERVER_URL}/download/minio.tar.gz"
 	local go_ycsb_download_url="${FILE_SERVER_URL}/download/builds/pingcap/go-ycsb/test-br/go-ycsb"
 	local etcd_download_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/etcd-v3.4.7-linux-amd64.tar.gz"
+	local sync_diff_inspector_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/sync_diff_inspector_hash-a129f096_linux-amd64.tar.gz"
 	local jq_download_url="${FILE_SERVER_URL}/download/builds/pingcap/test/jq-1.6/jq-linux64"
 	local schema_registry_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/schema-registry.tar.gz"
 
@@ -157,6 +158,7 @@ download_binaries() {
 	download_and_extract "$tiflash_download_url" "tiflash.tar.gz"
 	download_and_extract "$minio_download_url" "minio.tar.gz"
 	download_and_extract "$etcd_download_url" "etcd.tar.gz" "etcd-v3.4.7-linux-amd64/etcdctl"
+	download_and_extract "$sync_diff_inspector_url" "sync_diff_inspector.tar.gz"
 	download_and_extract "$schema_registry_url" "schema-registry.tar.gz"
 
 	download_file "$go_ycsb_download_url" "go-ycsb" "${THIRD_BIN_DIR}/go-ycsb"
diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh
new file mode 100644
index 00000000000..e154ed2900c
--- /dev/null
+++ b/scripts/download-sync-diff.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Copyright 2022 PingCAP, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -eu
+
+echo "will download tidb-tools v6.1.0 to get sync_diff_inspector"
+curl -C - --retry 3 -o /tmp/tidb-tools.tar.gz https://download.pingcap.org/tidb-community-toolkit-v6.1.0-linux-amd64.tar.gz
+mkdir -p /tmp/tidb-tools
+tar -zxf /tmp/tidb-tools.tar.gz -C /tmp/tidb-tools
+mv /tmp/tidb-tools/tidb-community-toolkit-v6.1.0-linux-amd64/sync_diff_inspector ./bin/sync_diff_inspector
+rm -r /tmp/tidb-tools
+rm /tmp/tidb-tools.tar.gz
\ No newline at end of file

From ee0b72cd66c7f9d8ddf2be88d1b30fd609199a57 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 22 Oct 2024 13:59:51 +0800
Subject: [PATCH 10/22] Revert scripts

---
 dm/tests/mariadb_master_down_and_up/case.sh | 2 +-
 dm/tests/tiup/upgrade-from-v1.sh            | 2 +-
 scripts/download-sync-diff.sh               | 0
 tests/integration_tests/README.md           | 1 +
 4 files changed, 3 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 scripts/download-sync-diff.sh

diff --git a/dm/tests/mariadb_master_down_and_up/case.sh b/dm/tests/mariadb_master_down_and_up/case.sh
index 2941263415c..e678d3bcfd2 100644
--- a/dm/tests/mariadb_master_down_and_up/case.sh
+++ b/dm/tests/mariadb_master_down_and_up/case.sh
@@ -107,8 +107,8 @@ function clean_task() {
 function test_master_down_and_up() {
 	cleanup_process
 	clean_data
-	setup_replica
 	install_sync_diff
+	setup_replica
 	gen_full_data
 	run_dm_components_and_create_source $1
 	start_task
diff --git a/dm/tests/tiup/upgrade-from-v1.sh b/dm/tests/tiup/upgrade-from-v1.sh
index dcf95ea03fd..3520dd0f7b9 100755
--- a/dm/tests/tiup/upgrade-from-v1.sh
+++ b/dm/tests/tiup/upgrade-from-v1.sh
@@ -123,7 +123,7 @@ function destroy_v2_by_tiup() {
 
 function test() {
 	install_sync_diff
-	
+
 	deploy_v1_by_ansible
 
 	migrate_in_v1
diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh
old mode 100644
new mode 100755
diff --git a/tests/integration_tests/README.md b/tests/integration_tests/README.md
index b2c3d471f85..483f697338f 100644
--- a/tests/integration_tests/README.md
+++ b/tests/integration_tests/README.md
@@ -14,6 +14,7 @@ If you need to specify a version, os or arch, you can use, for example: `make pr
    * `pd-ctl`      # version >= 6.0.0-rc.1
    * `tiflash`     # tiflash binary
    * `libc++.so, libc++abi.so, libgmssl.so, libtiflash_proxy.so` # some necessary so files related to tiflash
+   * `sync_diff_inspector`
    * [go-ycsb](https://github.com/pingcap/go-ycsb)
    * [etcdctl](https://github.com/etcd-io/etcd/tree/master/etcdctl)
    * [jq](https://stedolan.github.io/jq/)

From 82397bc81d0d9ae4ee63022e8371437ff081ccc2 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 22 Oct 2024 14:00:34 +0800
Subject: [PATCH 11/22] Revert scripts

---
 scripts/download-sync-diff.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh
index e154ed2900c..3ee26c6e505 100755
--- a/scripts/download-sync-diff.sh
+++ b/scripts/download-sync-diff.sh
@@ -20,4 +20,4 @@ mkdir -p /tmp/tidb-tools
 tar -zxf /tmp/tidb-tools.tar.gz -C /tmp/tidb-tools
 mv /tmp/tidb-tools/tidb-community-toolkit-v6.1.0-linux-amd64/sync_diff_inspector ./bin/sync_diff_inspector
 rm -r /tmp/tidb-tools
-rm /tmp/tidb-tools.tar.gz
\ No newline at end of file
+rm /tmp/tidb-tools.tar.gz

From 635bd6ecdac99f5a9cba2ae30aa754febd56436a Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 22 Oct 2024 14:16:38 +0800
Subject: [PATCH 12/22] Add TODO

---
 sync_diff_inspector/utils/table.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go
index 6cd2fae078b..4b602bb02fb 100644
--- a/sync_diff_inspector/utils/table.go
+++ b/sync_diff_inspector/utils/table.go
@@ -169,6 +169,7 @@ func GetTableInfoWithVersion(
 	}
 	sctx := mock.NewContext()
 	// unify the timezone to UTC +0:00
+	// TODO(joechenrh): the following code doesn't work on the latest version of tidb.
 	sctx.GetSessionVars().TimeZone = time.UTC
 	sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictTransTables)
 	sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictAllTables)

From ad434cb94cc1e7c1743e5da9197b656d76cf4411 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 22 Oct 2024 15:12:22 +0800
Subject: [PATCH 13/22] Fix tests

---
 Makefile                                  |  9 +++++----
 sync_diff_inspector/source/source_test.go | 22 +++++++---------------
 sync_diff_inspector/utils/utils_test.go   |  2 +-
 3 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index 44f4366ba69..38f578b3845 100644
--- a/Makefile
+++ b/Makefile
@@ -137,7 +137,7 @@ dev: check test
 
 test: unit_test dm_unit_test engine_unit_test
 
-build: cdc dm engine sync_diff_inspector
+build: cdc dm engine
 
 check-makefiles: ## Check the makefiles format. Please run this target after the changes are committed.
 check-makefiles: format-makefiles
@@ -222,13 +222,14 @@ check_third_party_binary:
 	@which bin/pd-server
 	@which bin/tiflash
 	@which bin/pd-ctl
+	@which bin/sync_diff_inspector
 	@which bin/go-ycsb
 	@which bin/etcdctl
 	@which bin/jq
 	@which bin/minio
 	@which bin/bin/schema-registry-start
 
-integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server sync_diff_inspector
+integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server
 	$(FAILPOINT_ENABLE)
 	$(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \
 		-coverpkg=github.com/pingcap/tiflow/... \
@@ -445,7 +446,7 @@ dm_unit_test_in_verify_ci: check_failpoint_ctl tools/bin/gotestsum tools/bin/goc
 	tools/bin/gocov convert "$(DM_TEST_DIR)/cov.unit_test.out" | tools/bin/gocov-xml > dm-coverage.xml
 	$(FAILPOINT_DISABLE)
 
-dm_integration_test_build: check_failpoint_ctl sync_diff_inspector
+dm_integration_test_build: check_failpoint_ctl
 	$(FAILPOINT_ENABLE)
 	$(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \
 		-coverpkg=github.com/pingcap/tiflow/dm/... \
@@ -475,7 +476,7 @@ dm_integration_test_build_worker: check_failpoint_ctl
 	$(FAILPOINT_DISABLE)
 	./dm/tests/prepare_tools.sh
 
-dm_integration_test_build_master: check_failpoint_ctl sync_diff_inspector
+dm_integration_test_build_master: check_failpoint_ctl
 	$(FAILPOINT_ENABLE)
 	$(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \
 		-coverpkg=github.com/pingcap/tiflow/dm/... \
diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go
index 9627c825dd4..bc69832547d 100644
--- a/sync_diff_inspector/source/source_test.go
+++ b/sync_diff_inspector/source/source_test.go
@@ -22,7 +22,6 @@ import (
 	"regexp"
 	"strconv"
 	"testing"
-	"time"
 
 	"github.com/DATA-DOG/go-sqlmock"
 	_ "github.com/go-sql-driver/mysql"
@@ -102,8 +101,7 @@ func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.Ta
 }
 
 func TestTiDBSource(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	conn, mock, err := sqlmock.New()
 	require.NoError(t, err)
@@ -270,8 +268,7 @@ func TestTiDBSource(t *testing.T) {
 }
 
 func TestFallbackToRandomIfRangeIsSet(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	conn, mock, err := sqlmock.New()
 	require.NoError(t, err)
@@ -319,8 +316,7 @@ func TestFallbackToRandomIfRangeIsSet(t *testing.T) {
 }
 
 func TestMysqlShardSources(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	tableCases := []*tableCaseType{
 		{
@@ -443,8 +439,7 @@ func TestMysqlShardSources(t *testing.T) {
 }
 
 func TestMysqlRouter(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	conn, mock, err := sqlmock.New()
 	require.NoError(t, err)
@@ -556,8 +551,7 @@ func TestMysqlRouter(t *testing.T) {
 }
 
 func TestTiDBRouter(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	conn, mock, err := sqlmock.New()
 	require.NoError(t, err)
@@ -668,8 +662,7 @@ func TestSource(t *testing.T) {
 	port, err := strconv.Atoi(portstr)
 	require.NoError(t, err)
 
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	router, err := router.NewTableRouter(false, nil)
 	require.NoError(t, err)
@@ -768,8 +761,7 @@ func TestRouterRules(t *testing.T) {
 	port, err := strconv.Atoi(portStr)
 	require.NoError(t, err)
 
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+	ctx := context.Background()
 
 	r, _ := router.NewTableRouter(
 		false,
diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go
index 6454e27ef4b..d5bd5f8eadc 100644
--- a/sync_diff_inspector/utils/utils_test.go
+++ b/sync_diff_inspector/utils/utils_test.go
@@ -487,7 +487,7 @@ func TestGetBetterIndex(t *testing.T) {
 		require.NoError(t, err)
 		require.Equal(t, sel, tableCase.sels[i])
 	}
-	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"ESL"}).AddRow("5"))
+	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("5"))
 	mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2"))
 	indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo)
 	require.NoError(t, err)

From 2d45cacd975af9670e24b65c55602d49d252a6c9 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Fri, 25 Oct 2024 11:42:27 +0800
Subject: [PATCH 14/22] Revert all changes related to build

---
 .github/workflows/dm_binlog_999999.yaml | 6 +++---
 Makefile                                | 9 +++++----
 dm/tests/README.md                      | 3 ++-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/dm_binlog_999999.yaml b/.github/workflows/dm_binlog_999999.yaml
index ee722f93f88..fa0aaf93899 100644
--- a/.github/workflows/dm_binlog_999999.yaml
+++ b/.github/workflows/dm_binlog_999999.yaml
@@ -44,13 +44,13 @@ jobs:
           key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }}
 
       - name: Build DM binary
-        run: |
-          make dm_integration_test_build
-          make sync_diff_inspector
+        run: make dm_integration_test_build
 
       - name: Setup CI environment
         run: |
           docker-compose -f ./dm/tests/binlog_999999/docker-compose.yml up -d
+          curl http://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz
+          mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/
           curl http://download.pingcap.org/tidb-nightly-linux-amd64.tar.gz | tar xz
           mv tidb-nightly-linux-amd64/bin/tidb-server bin/
           curl -O https://dl.min.io/server/minio/release/linux-amd64/minio
diff --git a/Makefile b/Makefile
index 38f578b3845..63ce12fbfaf 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 ### Makefile for tiflow
-.PHONY: build test check clean fmt sync_diff_inspector cdc kafka_consumer storage_consumer coverage \
+.PHONY: build test check clean fmt cdc kafka_consumer storage_consumer coverage \
 	integration_test_build integration_test integration_test_mysql integration_test_kafka bank \
 	kafka_docker_integration_test kafka_docker_integration_test_with_build \
 	clean_integration_test_containers \
@@ -159,9 +159,6 @@ build-cdc-with-failpoint: ## Build cdc with failpoint enabled.
 cdc:
 	$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc ./cmd/cdc
 
-sync_diff_inspector:
-	$(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/sync_diff_inspector ./sync_diff_inspector/main.go
-
 kafka_consumer:
 	$(CONSUMER_GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc_kafka_consumer ./cmd/kafka-consumer
 
@@ -569,6 +566,7 @@ check_third_party_binary_for_engine:
 	@which mysql || (echo "mysql not found in ${PATH}"; exit 1)
 	@which jq || (echo "jq not found in ${PATH}"; exit 1)
 	@which mc || (echo "mc not found in ${PATH}, you can use 'make bin/mc' and move bin/mc to ${PATH}"; exit 1)
+	@which bin/sync_diff_inspector || (echo "run 'make bin/sync_diff_inspector' to download it if you need")
 
 check_engine_integration_test:
 	./engine/test/utils/check_case.sh
@@ -583,6 +581,9 @@ check_cdc_integration_test:
 bin/mc:
 	./scripts/download-mc.sh
 
+bin/sync_diff_inspector:
+	./scripts/download-sync-diff.sh
+
 define run_engine_unit_test
 	@echo "running unit test for packages:" $(1)
 	mkdir -p $(ENGINE_TEST_DIR)
diff --git a/dm/tests/README.md b/dm/tests/README.md
index 9d3e4432ad5..0fc789263d6 100644
--- a/dm/tests/README.md
+++ b/dm/tests/README.md
@@ -4,6 +4,7 @@
 1. The following executables must be copied or generated or linked into these locations.
 
     * `bin/tidb-server` can be downloaded from [tidb-master-linux-amd64](https://download.pingcap.org/tidb-master-linux-amd64.tar.gz) or installed by [tiup](https://github.com/pingcap/tiup), you can use the command `find ~/.tiup -name tidb-server` to locate `tidb-server` binary file and copy it
+    * `bin/sync_diff_inspector` # can be downloaded from [tidb-enterprise-tools-latest-linux-amd64](http://download.pingcap.org/tidb-enterprise-tools-latest-linux-amd64.tar.gz) or build from [source code](https://github.com/pingcap/tidb-tools)
     * `bin/minio` can be build from (https://github.com/minio/minio)
     * `bin/dm-master.test` # generated by `make dm_integration_test_build`
     * `bin/dm-worker.test` # generated by `make dm_integration_test_build`
@@ -31,7 +32,7 @@
 
 ### Integration Test
 
-1. Run `make dm_integration_test_build` and `make sync_diff_inspector` to generate DM related binary for integration test.
+1. Run `make dm_integration_test_build` to generate DM related binary for integration test.
 
 2. Setup two MySQL servers (the first one: 5.6 ~ 5.7; the second one: 8.0.21, suggest you are same as [CI](https://github.com/PingCAP-QE/ci/blob/main/jenkins/pipelines/ci/dm/dm_ghpr_new_test.groovy#L164-L172)) with [binlog enabled first](https://dev.mysql.com/doc/refman/5.7/en/replication-howto-masterbaseconfig.html) and [set `GTID_MODE=ON`](https://dev.mysql.com/doc/refman/5.7/en/replication-mode-change-online-enable-gtids.html), You need set the mysql port and root password according to the following table.
 

From 6656850b26e78d2a8d2f5f3942867d026c9c50ce Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Fri, 25 Oct 2024 11:44:37 +0800
Subject: [PATCH 15/22] Revert all changes related to build

---
 dm/tests/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dm/tests/README.md b/dm/tests/README.md
index 0fc789263d6..f72fe191fee 100644
--- a/dm/tests/README.md
+++ b/dm/tests/README.md
@@ -32,7 +32,7 @@
 
 ### Integration Test
 
-1. Run `make dm_integration_test_build` to generate DM related binary for integration test.
+1. Run `make dm_integration_test_build` to generate DM related binary for integration test
 
 2. Setup two MySQL servers (the first one: 5.6 ~ 5.7; the second one: 8.0.21, suggest you are same as [CI](https://github.com/PingCAP-QE/ci/blob/main/jenkins/pipelines/ci/dm/dm_ghpr_new_test.groovy#L164-L172)) with [binlog enabled first](https://dev.mysql.com/doc/refman/5.7/en/replication-howto-masterbaseconfig.html) and [set `GTID_MODE=ON`](https://dev.mysql.com/doc/refman/5.7/en/replication-mode-change-online-enable-gtids.html), You need set the mysql port and root password according to the following table.
 

From 8e601b3d1887ad9b03a38064b5e1669a4a1b81c5 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Fri, 25 Oct 2024 13:50:07 +0800
Subject: [PATCH 16/22] remove dep

---
 go.mod                                        |  2 +-
 .../checkpoints/checkpoints.go                | 26 +++++++++++++++++--
 sync_diff_inspector/diff/diff.go              | 11 +++++---
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/go.mod b/go.mod
index d0aec8d663a..8ce5bf41de4 100644
--- a/go.mod
+++ b/go.mod
@@ -346,7 +346,7 @@ require (
 	github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c // indirect
 	github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect
 	github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect
-	github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726
+	github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 // indirect
 	github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go
index 82b4def15ec..ab98187eddf 100644
--- a/sync_diff_inspector/checkpoints/checkpoints.go
+++ b/sync_diff_inspector/checkpoints/checkpoints.go
@@ -17,7 +17,9 @@ import (
 	"container/heap"
 	"context"
 	"encoding/json"
+	"io"
 	"os"
+	"path"
 	"sync"
 
 	"github.com/pingcap/errors"
@@ -25,10 +27,30 @@ import (
 	"github.com/pingcap/tiflow/sync_diff_inspector/chunk"
 	"github.com/pingcap/tiflow/sync_diff_inspector/config"
 	"github.com/pingcap/tiflow/sync_diff_inspector/report"
-	"github.com/siddontang/go/ioutil2"
 	"go.uber.org/zap"
 )
 
+// Write file to temp and atomically move when everything else succeeds.
+func writeFileAtomic(filename string, data []byte, perm os.FileMode) error {
+	dir, name := path.Dir(filename), path.Base(filename)
+	f, err := os.CreateTemp(dir, name)
+	if err != nil {
+		return err
+	}
+	n, err := f.Write(data)
+	f.Close()
+	if err == nil && n < len(data) {
+		err = io.ErrShortWrite
+	} else {
+		err = os.Chmod(f.Name(), perm)
+	}
+	if err != nil {
+		os.Remove(f.Name())
+		return err
+	}
+	return os.Rename(f.Name(), filename)
+}
+
 const (
 	// SuccessState means
 	// for chunk: this chunk's data is equal
@@ -227,7 +249,7 @@ func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node,
 		return nil, errors.Trace(err)
 	}
 
-	if err = ioutil2.WriteFileAtomic(fileName, checkpointData, config.LocalFilePerm); err != nil {
+	if err = writeFileAtomic(fileName, checkpointData, config.LocalFilePerm); err != nil {
 		return nil, err
 	}
 	log.Info("save checkpoint",
diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go
index b2eec9afd17..929be851af6 100644
--- a/sync_diff_inspector/diff/diff.go
+++ b/sync_diff_inspector/diff/diff.go
@@ -41,10 +41,15 @@ import (
 	"github.com/pingcap/tiflow/sync_diff_inspector/source/common"
 	"github.com/pingcap/tiflow/sync_diff_inspector/splitter"
 	"github.com/pingcap/tiflow/sync_diff_inspector/utils"
-	"github.com/siddontang/go/ioutil2"
 	"go.uber.org/zap"
 )
 
+// Check file exists or not
+func fileExists(name string) bool {
+	_, err := os.Stat(name)
+	return !os.IsNotExist(err)
+}
+
 const (
 	// checkpointFile represents the checkpoints' file name which used for save and loads chunks
 	checkpointFile = "sync_diff_checkpoints.pb"
@@ -161,7 +166,7 @@ func (df *Diff) initCheckpoint() error {
 
 	finishTableNums := 0
 	path := filepath.Join(df.CheckpointDir, checkpointFile)
-	if ioutil2.FileExists(path) {
+	if fileExists(path) {
 		node, reportInfo, err := df.cp.LoadChunk(path)
 		if err != nil {
 			return errors.Annotate(err, "the checkpoint load process failed")
@@ -740,7 +745,7 @@ func (df *Diff) writeSQLs(ctx context.Context) {
 				tableDiff := df.downstream.GetTables()[dml.node.GetTableIndex()]
 				fileName := fmt.Sprintf("%s:%s:%s.sql", tableDiff.Schema, tableDiff.Table, utils.GetSQLFileName(dml.node.GetID()))
 				fixSQLPath := filepath.Join(df.FixSQLDir, fileName)
-				if ok := ioutil2.FileExists(fixSQLPath); ok {
+				if fileExists(fixSQLPath) {
 					// unreachable
 					log.Fatal("write sql failed: repeat sql happen", zap.Strings("sql", dml.sqls))
 				}

From ec82382ddfd2bb8db234c8dc1c61bea6cd3188c2 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Fri, 25 Oct 2024 15:55:13 +0800
Subject: [PATCH 17/22] Fix unstable test

---
 sync_diff_inspector/source/chunks_iter.go     | 57 ++++++++++++-------
 sync_diff_inspector/splitter/bucket.go        | 47 ++++++++-------
 sync_diff_inspector/splitter/splitter.go      |  2 +
 sync_diff_inspector/splitter/splitter_test.go |  3 +
 4 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/sync_diff_inspector/source/chunks_iter.go b/sync_diff_inspector/source/chunks_iter.go
index 0439aba8ea3..1aad42fc90c 100644
--- a/sync_diff_inspector/source/chunks_iter.go
+++ b/sync_diff_inspector/source/chunks_iter.go
@@ -28,22 +28,33 @@ import (
 
 // ChunksIterator is used for single mysql/tidb source.
 type ChunksIterator struct {
+	ctx    context.Context
+	cancel context.CancelFunc
+
 	ID            *chunk.CID
 	tableAnalyzer TableAnalyzer
 
 	TableDiffs       []*common.TableDiff
-	nextTableIndex   int
 	chunksCh         chan *splitter.RangeInfo
 	errCh            chan error
 	splitThreadCount int
 
-	cancel context.CancelFunc
+	pool *utils.WorkerPool
 }
 
 // NewChunksIterator returns a new iterator
-func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs []*common.TableDiff, startRange *splitter.RangeInfo, splitThreadCount int) (*ChunksIterator, error) {
+func NewChunksIterator(
+	ctx context.Context,
+	analyzer TableAnalyzer,
+	tableDiffs []*common.TableDiff,
+	startRange *splitter.RangeInfo,
+	splitThreadCount int,
+) (*ChunksIterator, error) {
 	ctxx, cancel := context.WithCancel(ctx)
 	iter := &ChunksIterator{
+		ctx:    ctxx,
+		cancel: cancel,
+
 		splitThreadCount: splitThreadCount,
 		tableAnalyzer:    analyzer,
 		TableDiffs:       tableDiffs,
@@ -51,26 +62,30 @@ func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs [
 		// reserve 30 capacity for each goroutine on average
 		chunksCh: make(chan *splitter.RangeInfo, 30*splitThreadCount),
 		errCh:    make(chan error, len(tableDiffs)),
-		cancel:   cancel,
+		pool:     utils.NewWorkerPool(uint(splitThreadCount), "chunks producer"),
 	}
-	go iter.produceChunks(ctxx, startRange)
+	go iter.produceChunks(startRange)
 	return iter, nil
 }
 
-func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter.RangeInfo) {
-	defer close(t.chunksCh)
-	pool := utils.NewWorkerPool(uint(t.splitThreadCount), "chunks producer")
-	t.nextTableIndex = 0
+func (t *ChunksIterator) produceChunks(startRange *splitter.RangeInfo) {
+	defer func() {
+		t.pool.WaitFinished()
+		close(t.chunksCh)
+	}()
+
+	nextTableIndex := 0
 
 	// If chunkRange
 	if startRange != nil {
 		curIndex := startRange.GetTableIndex()
 		curTable := t.TableDiffs[curIndex]
-		t.nextTableIndex = curIndex + 1
+		nextTableIndex = curIndex + 1
+
 		// if this chunk is empty, data-check for this table should be skipped
 		if startRange.ChunkRange.Type != chunk.Empty {
-			pool.Apply(func() {
-				chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, curTable, startRange)
+			t.pool.Apply(func() {
+				chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(t.ctx, curTable, startRange)
 				if err != nil {
 					t.errCh <- errors.Trace(err)
 					return
@@ -87,7 +102,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 					}
 					c.Index.TableIndex = curIndex
 					select {
-					case <-ctx.Done():
+					case <-t.ctx.Done():
 						log.Info("Stop do produce chunks by context done")
 						return
 					case t.chunksCh <- &splitter.RangeInfo{
@@ -101,16 +116,16 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 		}
 	}
 
-	for ; t.nextTableIndex < len(t.TableDiffs); t.nextTableIndex++ {
-		curTableIndex := t.nextTableIndex
+	for ; nextTableIndex < len(t.TableDiffs); nextTableIndex++ {
+		curTableIndex := nextTableIndex
 		// skip data-check, but still need to send a empty chunk to make checkpoint continuous
 		if t.TableDiffs[curTableIndex].IgnoreDataCheck || !common.AllTableExist(t.TableDiffs[curTableIndex].TableLack) {
-			pool.Apply(func() {
+			t.pool.Apply(func() {
 				table := t.TableDiffs[curTableIndex]
 				progressID := dbutil.TableName(table.Schema, table.Table)
 				progress.StartTable(progressID, 1, true)
 				select {
-				case <-ctx.Done():
+				case <-t.ctx.Done():
 					log.Info("Stop do produce chunks by context done")
 					return
 				case t.chunksCh <- &splitter.RangeInfo{
@@ -129,9 +144,9 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 			continue
 		}
 
-		pool.Apply(func() {
+		t.pool.Apply(func() {
 			table := t.TableDiffs[curTableIndex]
-			chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, table, nil)
+			chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(t.ctx, table, nil)
 			if err != nil {
 				t.errCh <- errors.Trace(err)
 				return
@@ -148,7 +163,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 				}
 				c.Index.TableIndex = curTableIndex
 				select {
-				case <-ctx.Done():
+				case <-t.ctx.Done():
 					log.Info("Stop do produce chunks by context done")
 					return
 				case t.chunksCh <- &splitter.RangeInfo{
@@ -160,7 +175,6 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter
 			}
 		})
 	}
-	pool.WaitFinished()
 }
 
 // Next returns the next chunk
@@ -181,6 +195,7 @@ func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error)
 // Close closes the iterator
 func (t *ChunksIterator) Close() {
 	t.cancel()
+	t.pool.WaitFinished()
 }
 
 // TODO: getCurTableIndexID only used for binary search, should be optimized later.
diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go
index cfd5c1d6664..37fd765729b 100644
--- a/sync_diff_inspector/splitter/bucket.go
+++ b/sync_diff_inspector/splitter/bucket.go
@@ -16,7 +16,6 @@ package splitter
 import (
 	"context"
 	"database/sql"
-	"sync"
 
 	"github.com/pingcap/errors"
 	"github.com/pingcap/failpoint"
@@ -35,12 +34,14 @@ const DefaultChannelBuffer = 1024
 
 // BucketIterator is struct for bucket iterator
 type BucketIterator struct {
+	ctx    context.Context
+	cancel context.CancelFunc
+
 	buckets      []dbutil.Bucket
 	table        *common.TableDiff
 	indexColumns []*model.ColumnInfo
 
 	chunkPool *utils.WorkerPool
-	wg        sync.WaitGroup // control for one bucket in shared chunkPool
 
 	chunkSize int64
 	chunks    []*chunk.Range
@@ -48,7 +49,6 @@ type BucketIterator struct {
 
 	chunksCh   chan []*chunk.Range
 	errCh      chan error
-	cancel     context.CancelFunc
 	indexID    int64
 	progressID string
 
@@ -75,14 +75,16 @@ func NewBucketIteratorWithCheckpoint(
 			table.Range)
 	}
 
-	bctx, cancel := context.WithCancel(ctx)
+	ctx, cancel := context.WithCancel(ctx)
 	bs := &BucketIterator{
+		ctx:    ctx,
+		cancel: cancel,
+
 		table:     table,
 		chunkPool: bucketSpliterPool,
 		chunkSize: table.ChunkSize,
 		chunksCh:  make(chan []*chunk.Range, DefaultChannelBuffer),
 		errCh:     make(chan error, 1),
-		cancel:    cancel,
 		dbConn:    dbConn,
 
 		progressID: progressID,
@@ -94,7 +96,7 @@ func NewBucketIteratorWithCheckpoint(
 
 	// Let the progress bar begins to record the table.
 	progress.StartTable(bs.progressID, 0, false)
-	go bs.produceChunks(bctx, startRange)
+	go bs.produceChunks(startRange)
 
 	return bs, nil
 }
@@ -228,16 +230,19 @@ NEXTINDEX:
 // Close closes the iterator
 func (s *BucketIterator) Close() {
 	s.cancel()
+	s.chunkPool.WaitFinished()
 }
 
-func (s *BucketIterator) splitChunkForBucket(ctx context.Context, firstBucketID, lastBucketID int, beginIndex int, bucketChunkCnt int, splitChunkCnt int, chunkRange *chunk.Range) {
-	s.wg.Add(1)
+func (s *BucketIterator) splitChunkForBucket(
+	firstBucketID, lastBucketID, beginIndex int,
+	bucketChunkCnt, splitChunkCnt int,
+	chunkRange *chunk.Range,
+) {
 	s.chunkPool.Apply(func() {
-		defer s.wg.Done()
-		chunks, err := splitRangeByRandom(ctx, s.dbConn, chunkRange, splitChunkCnt, s.table.Schema, s.table.Table, s.indexColumns, s.table.Range, s.table.Collation)
+		chunks, err := splitRangeByRandom(s.ctx, s.dbConn, chunkRange, splitChunkCnt, s.table.Schema, s.table.Table, s.indexColumns, s.table.Range, s.table.Collation)
 		if err != nil {
 			select {
-			case <-ctx.Done():
+			case <-s.ctx.Done():
 			case s.errCh <- errors.Trace(err):
 			}
 			return
@@ -248,11 +253,11 @@ func (s *BucketIterator) splitChunkForBucket(ctx context.Context, firstBucketID,
 	})
 }
 
-func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInfo) {
+func (s *BucketIterator) produceChunks(startRange *RangeInfo) {
 	defer func() {
-		s.wg.Wait()
-		progress.UpdateTotal(s.progressID, 0, true)
+		s.chunkPool.WaitFinished()
 		close(s.chunksCh)
+		progress.UpdateTotal(s.progressID, 0, true)
 	}()
 	var (
 		lowerValues, upperValues []string
@@ -272,7 +277,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf
 		//       its bucketID is less than len(s.buckets)
 		if c.Index.BucketIndexRight >= len(s.buckets) {
 			select {
-			case <-ctx.Done():
+			case <-s.ctx.Done():
 			case s.errCh <- errors.New("Wrong Bucket: Bucket index of the checkpoint node is larger than buckets' size"):
 			}
 			return
@@ -281,7 +286,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf
 		nextUpperValues, err := dbutil.AnalyzeValuesFromBuckets(s.buckets[c.Index.BucketIndexRight].UpperBound, s.indexColumns)
 		if err != nil {
 			select {
-			case <-ctx.Done():
+			case <-s.ctx.Done():
 			case s.errCh <- errors.Trace(err):
 			}
 			return
@@ -301,7 +306,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf
 				chunkRange.Update(bound.Column, bound.Upper, "", true, false)
 			}
 
-			s.splitChunkForBucket(ctx, c.Index.BucketIndexLeft, c.Index.BucketIndexRight, c.Index.ChunkIndex+1, c.Index.ChunkCnt, leftCnt, chunkRange)
+			s.splitChunkForBucket(c.Index.BucketIndexLeft, c.Index.BucketIndexRight, c.Index.ChunkIndex+1, c.Index.ChunkCnt, leftCnt, chunkRange)
 		}
 	}
 	halfChunkSize := s.chunkSize >> 1
@@ -317,7 +322,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf
 		upperValues, err = dbutil.AnalyzeValuesFromBuckets(s.buckets[i].UpperBound, s.indexColumns)
 		if err != nil {
 			select {
-			case <-ctx.Done():
+			case <-s.ctx.Done():
 			case s.errCh <- errors.Trace(err):
 			}
 			return
@@ -343,10 +348,10 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf
 		if i == firstBucket {
 			//
 			chunkCnt := int((count + halfChunkSize) / s.chunkSize)
-			s.splitChunkForBucket(ctx, firstBucket, i, 0, chunkCnt, chunkCnt, chunkRange)
+			s.splitChunkForBucket(firstBucket, i, 0, chunkCnt, chunkCnt, chunkRange)
 		} else {
 			// use multi-buckets so chunkCnt = 1
-			s.splitChunkForBucket(ctx, firstBucket, i, 0, 1, 1, chunkRange)
+			s.splitChunkForBucket(firstBucket, i, 0, 1, 1, chunkRange)
 		}
 
 		latestCount = s.buckets[i].Count
@@ -368,5 +373,5 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf
 	}
 	// When the table is much less than chunkSize,
 	// it will return a chunk include the whole table.
-	s.splitChunkForBucket(ctx, firstBucket, len(s.buckets), 0, 1, 1, chunkRange)
+	s.splitChunkForBucket(firstBucket, len(s.buckets), 0, 1, 1, chunkRange)
 }
diff --git a/sync_diff_inspector/splitter/splitter.go b/sync_diff_inspector/splitter/splitter.go
index 5fb45bc9024..de4a05ad037 100644
--- a/sync_diff_inspector/splitter/splitter.go
+++ b/sync_diff_inspector/splitter/splitter.go
@@ -29,6 +29,8 @@ const (
 type ChunkIterator interface {
 	// Next seeks the next chunk, return nil if seeks to end.
 	Next() (*chunk.Range, error)
+
+	// Close close the current iterator.
 	Close()
 }
 
diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go
index 760b642c01d..1326f3c0f52 100644
--- a/sync_diff_inspector/splitter/splitter_test.go
+++ b/sync_diff_inspector/splitter/splitter_test.go
@@ -607,6 +607,7 @@ func TestBucketSpliter(t *testing.T) {
 		tableDiff.ChunkSize = testCase.chunkSize
 		iter, err := NewBucketIterator(ctx, "", tableDiff, db)
 		require.NoError(t, err)
+		defer iter.Close()
 
 		obtainChunks := make([]chunkResult, 0, len(testCase.expectResult))
 		nextBeginBucket := 0
@@ -675,6 +676,8 @@ func TestBucketSpliter(t *testing.T) {
 			break
 		}
 	}
+	iter.Close()
+
 	bounds1 := chunk.Bounds
 
 	rangeInfo := &RangeInfo{

From 17c1bf2e45dc3296e9bd61c6734aa756fad690c3 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Fri, 3 Jan 2025 15:19:26 +0800
Subject: [PATCH 18/22] Manually pick new commits

---
 go.mod                                        |   6 +-
 sync_diff_inspector/diff/diff.go              |  94 ++++++++---
 sync_diff_inspector/diff/diff_test.go         | 107 ++++++++++++
 sync_diff_inspector/tests/README.md           |  36 ++++
 .../tests/_utils/check_contains               |  15 ++
 .../tests/_utils/check_contains_count         |  18 ++
 .../tests/_utils/check_contains_regex         |  15 ++
 .../tests/_utils/check_db_status              |  21 +++
 .../tests/_utils/check_not_contains           |  15 ++
 sync_diff_inspector/tests/conf/client.crt     |  25 +++
 sync_diff_inspector/tests/conf/client.key     |  27 +++
 .../tests/conf/generate_script.sh             |  16 ++
 sync_diff_inspector/tests/conf/root.crt       |  31 ++++
 sync_diff_inspector/tests/conf/tidb.crt       |  25 +++
 sync_diff_inspector/tests/conf/tidb.key       |  27 +++
 sync_diff_inspector/tests/importer/run.sh     |  32 ++++
 sync_diff_inspector/tests/run.sh              | 119 ++++++++++++++
 .../checkpoint/config_base.toml               |  53 ++++++
 .../checkpoint/config_base_continous.toml     |  55 +++++++
 .../checkpoint/config_base_rand.toml          |  54 ++++++
 .../sync_diff_inspector/checkpoint/run.sh     | 155 ++++++++++++++++++
 .../config_base_mysql.toml                    |  47 ++++++
 .../sync_diff_inspector/config_base_tidb.toml |  49 ++++++
 .../expression/config.toml                    |  54 ++++++
 .../sync_diff_inspector/expression/run.sh     |  23 +++
 .../sync_diff_inspector/json/config_base.toml |  47 ++++++
 .../tests/sync_diff_inspector/json/data.sql   |   7 +
 .../tests/sync_diff_inspector/json/run.sh     |  41 +++++
 .../tests/sync_diff_inspector/run.sh          |  67 ++++++++
 .../shard/config_base.toml                    |  55 +++++++
 .../shard/config_router_1.toml                |  55 +++++++
 .../shard/config_router_2.toml                |  53 ++++++
 .../shard/config_router_3.toml                |  55 +++++++
 .../shard/config_router_4.toml                |  49 ++++++
 .../shard/config_router_5.toml                |  55 +++++++
 .../tests/sync_diff_inspector/shard/run.sh    | 134 +++++++++++++++
 .../snapshot/config_base.toml                 |  49 ++++++
 .../tests/sync_diff_inspector/snapshot/run.sh |  49 ++++++
 .../table_config/config.toml                  |  60 +++++++
 .../sync_diff_inspector/table_config/run.sh   |  42 +++++
 .../table_skip/config_base.toml               |  49 ++++++
 .../table_skip/config_router.toml             |  61 +++++++
 .../sync_diff_inspector/table_skip/data.sql   |   5 +
 .../sync_diff_inspector/table_skip/run.sh     |  65 ++++++++
 .../sync_diff_inspector/time_zone/config.toml |  48 ++++++
 .../sync_diff_inspector/time_zone/run.sh      |  57 +++++++
 .../tests/sync_diff_inspector/tls/config.toml |  53 ++++++
 .../tests/sync_diff_inspector/tls/run.sh      |  27 +++
 sync_diff_inspector/utils/pd.go               |   4 +-
 49 files changed, 2278 insertions(+), 28 deletions(-)
 create mode 100644 sync_diff_inspector/diff/diff_test.go
 create mode 100644 sync_diff_inspector/tests/README.md
 create mode 100755 sync_diff_inspector/tests/_utils/check_contains
 create mode 100755 sync_diff_inspector/tests/_utils/check_contains_count
 create mode 100755 sync_diff_inspector/tests/_utils/check_contains_regex
 create mode 100755 sync_diff_inspector/tests/_utils/check_db_status
 create mode 100755 sync_diff_inspector/tests/_utils/check_not_contains
 create mode 100644 sync_diff_inspector/tests/conf/client.crt
 create mode 100644 sync_diff_inspector/tests/conf/client.key
 create mode 100644 sync_diff_inspector/tests/conf/generate_script.sh
 create mode 100644 sync_diff_inspector/tests/conf/root.crt
 create mode 100644 sync_diff_inspector/tests/conf/tidb.crt
 create mode 100644 sync_diff_inspector/tests/conf/tidb.key
 create mode 100644 sync_diff_inspector/tests/importer/run.sh
 create mode 100755 sync_diff_inspector/tests/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/json/data.sql
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/json/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml
 create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh

diff --git a/go.mod b/go.mod
index 8ce5bf41de4..0545752ad15 100644
--- a/go.mod
+++ b/go.mod
@@ -215,9 +215,9 @@ require (
 	github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect
 	github.com/99designs/keyring v1.2.1 // indirect
 	github.com/AthenZ/athenz v1.10.39 // indirect
-	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0
-	github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 // indirect
-	github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0 // indirect
+	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0
+	github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 // indirect
+	github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect
 	github.com/DataDog/zstd v1.5.5 // indirect
 	github.com/KyleBanks/depth v1.2.1 // indirect
 	github.com/Masterminds/semver v1.5.0 // indirect
diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go
index 929be851af6..3feeb9e9de1 100644
--- a/sync_diff_inspector/diff/diff.go
+++ b/sync_diff_inspector/diff/diff.go
@@ -50,6 +50,29 @@ func fileExists(name string) bool {
 	return !os.IsNotExist(err)
 }
 
+// GetSnapsnot get the snapshot
+func GetSnapshot(latestSnap []string, snap string, db *sql.DB) string {
+	if len(latestSnap) != 1 {
+		return snap
+	}
+
+	latestSnapshotVal, err := utils.ParseSnapshotToTSO(db, latestSnap[0])
+	if err != nil || latestSnapshotVal == 0 {
+		return snap
+	}
+
+	snapshotVal, err := utils.ParseSnapshotToTSO(db, snap)
+	if err != nil {
+		return latestSnap[0]
+	}
+
+	// compare the snapshot and choose the small one to lock
+	if latestSnapshotVal < snapshotVal {
+		return latestSnap[0]
+	}
+	return snap
+}
+
 const (
 	// checkpointFile represents the checkpoints' file name which used for save and loads chunks
 	checkpointFile = "sync_diff_checkpoints.pb"
@@ -340,15 +363,7 @@ func (df *Diff) startGCKeeperForTiDB(ctx context.Context, db *sql.DB, snap strin
 			return
 		}
 
-		if len(latestSnap) == 1 {
-			if len(snap) == 0 {
-				snap = latestSnap[0]
-			}
-			// compare the snapshot and choose the small one to lock
-			if strings.Compare(latestSnap[0], snap) < 0 {
-				snap = latestSnap[0]
-			}
-		}
+		snap = GetSnapshot(latestSnap, snap, db)
 
 		err = utils.StartGCSavepointUpdateService(ctx, pdCli, db, snap)
 		if err != nil {
@@ -445,7 +460,7 @@ func (df *Diff) consume(ctx context.Context, rangeInfo *splitter.RangeInfo) bool
 		// If an error occurs during the checksum phase, skip the data compare phase.
 		state = checkpoints.FailedState
 		df.report.SetTableMeetError(schema, table, err)
-	} else if !isEqual && df.exportFixSQL {
+	} else if !isEqual {
 		state = checkpoints.FailedState
 		// if the chunk's checksum differ, try to do binary check
 		info := rangeInfo
@@ -610,7 +625,13 @@ func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *spli
 	if upstreamInfo.Count == downstreamInfo.Count && upstreamInfo.Checksum == downstreamInfo.Checksum {
 		return true, upstreamInfo.Count, downstreamInfo.Count, nil
 	}
-	log.Debug("checksum doesn't match", zap.Any("chunk id", tableRange.ChunkRange.Index), zap.String("table", df.workSource.GetTables()[tableRange.GetTableIndex()].Table), zap.Int64("upstream chunk size", upstreamInfo.Count), zap.Int64("downstream chunk size", downstreamInfo.Count), zap.Uint64("upstream checksum", upstreamInfo.Checksum), zap.Uint64("downstream checksum", downstreamInfo.Checksum))
+	log.Debug("checksum doesn't match, need to compare rows",
+		zap.Any("chunk id", tableRange.ChunkRange.Index),
+		zap.String("table", df.workSource.GetTables()[tableRange.GetTableIndex()].Table),
+		zap.Int64("upstream chunk size", upstreamInfo.Count),
+		zap.Int64("downstream chunk size", downstreamInfo.Count),
+		zap.Uint64("upstream checksum", upstreamInfo.Checksum),
+		zap.Uint64("downstream checksum", downstreamInfo.Checksum))
 	return false, upstreamInfo.Count, downstreamInfo.Count, nil
 }
 
@@ -650,11 +671,17 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo,
 		if lastUpstreamData == nil {
 			// don't have source data, so all the targetRows's data is redundant, should be deleted
 			for lastDownstreamData != nil {
-				sql := df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
 				rowsDelete++
-				log.Debug("[delete]", zap.String("sql", sql))
 
-				dml.sqls = append(dml.sqls, sql)
+				if df.exportFixSQL {
+					sql := df.downstream.GenerateFixSQL(
+						source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(),
+					)
+					log.Debug("[delete]", zap.String("sql", sql))
+
+					dml.sqls = append(dml.sqls, sql)
+				}
+
 				equal = false
 				lastDownstreamData, err = downstreamRowsIterator.Next()
 				if err != nil {
@@ -667,11 +694,13 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo,
 		if lastDownstreamData == nil {
 			// target lack some data, should insert the last source datas
 			for lastUpstreamData != nil {
-				sql := df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
 				rowsAdd++
-				log.Debug("[insert]", zap.String("sql", sql))
+				if df.exportFixSQL {
+					sql := df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
+					log.Debug("[insert]", zap.String("sql", sql))
 
-				dml.sqls = append(dml.sqls, sql)
+					dml.sqls = append(dml.sqls, sql)
+				}
 				equal = false
 
 				lastUpstreamData, err = upstreamRowsIterator.Next()
@@ -698,22 +727,34 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo,
 		switch cmp {
 		case 1:
 			// delete
-			sql = df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
 			rowsDelete++
-			log.Debug("[delete]", zap.String("sql", sql))
+			if df.exportFixSQL {
+				sql = df.downstream.GenerateFixSQL(
+					source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(),
+				)
+				log.Debug("[delete]", zap.String("sql", sql))
+			}
 			lastDownstreamData = nil
 		case -1:
 			// insert
-			sql = df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
 			rowsAdd++
-			log.Debug("[insert]", zap.String("sql", sql))
+			if df.exportFixSQL {
+				sql = df.downstream.GenerateFixSQL(
+					source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(),
+				)
+				log.Debug("[insert]", zap.String("sql", sql))
+			}
 			lastUpstreamData = nil
 		case 0:
 			// update
-			sql = df.downstream.GenerateFixSQL(source.Replace, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex())
 			rowsAdd++
 			rowsDelete++
-			log.Debug("[update]", zap.String("sql", sql))
+			if df.exportFixSQL {
+				sql = df.downstream.GenerateFixSQL(
+					source.Replace, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(),
+				)
+				log.Debug("[update]", zap.String("sql", sql))
+			}
 			lastUpstreamData = nil
 			lastDownstreamData = nil
 		}
@@ -722,6 +763,13 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo,
 	}
 	dml.rowAdd = rowsAdd
 	dml.rowDelete = rowsDelete
+
+	log.Debug("compareRows",
+		zap.Bool("equal", equal),
+		zap.Int("rowsAdd", rowsAdd),
+		zap.Int("rowsDelete", rowsDelete),
+		zap.Any("chunk id", rangeInfo.ChunkRange.Index),
+		zap.String("table", df.workSource.GetTables()[rangeInfo.GetTableIndex()].Table))
 	return equal, nil
 }
 
diff --git a/sync_diff_inspector/diff/diff_test.go b/sync_diff_inspector/diff/diff_test.go
new file mode 100644
index 00000000000..e8251a13343
--- /dev/null
+++ b/sync_diff_inspector/diff/diff_test.go
@@ -0,0 +1,107 @@
+// Copyright 2024 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package diff
+
+import (
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"github.com/stretchr/testify/require"
+)
+
+func TestGetSnapshot(t *testing.T) {
+	cases := []struct {
+		latestSnapshot []string
+		snapshot       string
+		expected       string
+		snapshotRows   string
+	}{
+		{
+			latestSnapshot: []string{},
+			snapshot:       "1",
+			expected:       "1",
+		},
+		{
+			latestSnapshot: []string{"2"},
+			snapshot:       "",
+			expected:       "2",
+		},
+		{
+			latestSnapshot: []string{"0"},
+			snapshot:       "3",
+			expected:       "3",
+		},
+		{
+			latestSnapshot: []string{"4"},
+			snapshot:       "0",
+			expected:       "0",
+		},
+		{
+			latestSnapshot: []string{"5"},
+			snapshot:       "6",
+			expected:       "5",
+		},
+		{
+			latestSnapshot: []string{"7"},
+			snapshot:       "6",
+			expected:       "6",
+		},
+		{
+			// 2017-10-07 16:45:26
+			latestSnapshot: []string{"395146933305344000"},
+			snapshot:       "2017-10-08 16:45:26",
+			expected:       "395146933305344000",
+			snapshotRows:   "1507452326",
+		},
+		{
+			// 2017-10-07 16:45:26
+			latestSnapshot: []string{"395146933305344000"},
+			snapshot:       "2017-10-06 16:45:26",
+			expected:       "2017-10-06 16:45:26",
+			snapshotRows:   "1507279526",
+		},
+		{
+			latestSnapshot: []string{"1"},
+			snapshot:       "2017-10-06 16:45:26",
+			expected:       "1",
+			snapshotRows:   "1507279526",
+		},
+		{
+			latestSnapshot: []string{"395146933305344000"},
+			snapshot:       "1",
+			expected:       "1",
+		},
+		{
+			// 2090-11-19 22:07:45
+			latestSnapshot: []string{"1000022649077760000"},
+			snapshot:       "2090-11-18 22:07:45",
+			expected:       "2090-11-18 22:07:45",
+			snapshotRows:   "3814697265",
+		},
+	}
+
+	conn, mock, err := sqlmock.New()
+	require.NoError(t, err)
+	defer conn.Close()
+
+	for i, cs := range cases {
+		if len(cs.snapshotRows) > 0 {
+			dataRows := sqlmock.NewRows([]string{""}).AddRow(cs.snapshotRows)
+			mock.ExpectQuery("SELECT unix_timestamp(?)").WillReturnRows(dataRows)
+		}
+		val := GetSnapshot(cs.latestSnapshot, cs.snapshot, conn)
+		require.Equal(t, cs.expected, val, "case %d", i)
+	}
+
+}
diff --git a/sync_diff_inspector/tests/README.md b/sync_diff_inspector/tests/README.md
new file mode 100644
index 00000000000..443b78dd7e8
--- /dev/null
+++ b/sync_diff_inspector/tests/README.md
@@ -0,0 +1,36 @@
+
+
+This folder contains all tests which relies on external service such as TiDB.
+
+## Preparations
+
+1. The following seven executables must be copied or linked into these locations:
+
+   - `bin/pd-server`
+   - `bin/tikv-server`
+   - `bin/tidb-server`
+   - `bin/sync_diff_inspector`
+   - `bin/dumpling`
+   - `bin/loader`
+   - `bin/importer`
+
+2. The following programs must be installed:
+
+   - `mysql`(the CLI client)
+   - `mysqladmin`
+
+3. The user executing the tests must have permission to create the folder
+
+   `/tmp/tidb_tools_test`. All test artifacts will be written into this folder.
+
+## Running
+
+Run `make integration_test` to execute the integration tests. This command will
+
+1. Build binaries.
+2. Check that all executables exist.
+3. Execute `tests/run.sh`
+
+If the first two steps are done before, you could also run `tests/run.sh` directly.
+
+The scrip will find out all `tests/*/run.sh` and run it.
diff --git a/sync_diff_inspector/tests/_utils/check_contains b/sync_diff_inspector/tests/_utils/check_contains
new file mode 100755
index 00000000000..93e7970b76a
--- /dev/null
+++ b/sync_diff_inspector/tests/_utils/check_contains
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# argument 1 is the string need grep
+# argument 2 is the filename
+
+set -eu
+OUT_DIR=/tmp/tidb_tools_test
+
+if ! grep -Fq "$1" "$2"; then
+    echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'"
+    echo "____________________________________"
+    cat "$2"
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    exit 1
+fi
diff --git a/sync_diff_inspector/tests/_utils/check_contains_count b/sync_diff_inspector/tests/_utils/check_contains_count
new file mode 100755
index 00000000000..8308512d789
--- /dev/null
+++ b/sync_diff_inspector/tests/_utils/check_contains_count
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# argument 1 is the string need grep
+# argument 2 is the filename
+# argument 3 is the match count
+
+set -eu
+OUT_DIR=/tmp/tidb_tools_test
+
+count=$(grep -F "$1" "$2" | wc -l)
+
+if [ "$count" -ne "$3" ]; then
+    echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1' $3 times"
+    echo "____________________________________"
+    cat "$2"
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    exit 1
+fi
diff --git a/sync_diff_inspector/tests/_utils/check_contains_regex b/sync_diff_inspector/tests/_utils/check_contains_regex
new file mode 100755
index 00000000000..ce498abbec2
--- /dev/null
+++ b/sync_diff_inspector/tests/_utils/check_contains_regex
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# argument 1 is the string need grep
+# argument 2 is the filename
+
+set -eu
+OUT_DIR=/tmp/tidb_tools_test
+
+if ! grep -q "$1" "$2"; then
+    echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'"
+    echo "____________________________________"
+    cat "$2"
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    exit 1
+fi
diff --git a/sync_diff_inspector/tests/_utils/check_db_status b/sync_diff_inspector/tests/_utils/check_db_status
new file mode 100755
index 00000000000..8dc75739c55
--- /dev/null
+++ b/sync_diff_inspector/tests/_utils/check_db_status
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# argument 1 is the host
+# argument 2 is the port
+# argument 3 is the database service's name
+
+for i in {1..20}
+do
+    if mysqladmin -h "$1" -P "$2" -u root --default-character-set utf8 ping > /dev/null 2>&1
+    then
+        echo "$3 is alive"
+        exit 0
+    fi
+
+    echo "$3 is not alive, will try again"
+    sleep 2
+done
+
+echo "$3 is not alive"
+cat "$4"
+exit 2
diff --git a/sync_diff_inspector/tests/_utils/check_not_contains b/sync_diff_inspector/tests/_utils/check_not_contains
new file mode 100755
index 00000000000..43fd007ad5f
--- /dev/null
+++ b/sync_diff_inspector/tests/_utils/check_not_contains
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# argument 1 is the string need grep
+# argument 2 is the filename
+
+set -eu
+OUT_DIR=/tmp/tidb_binlog_test
+
+if grep -Fq "$1" "$2"; then
+    echo "TEST FAILED: '$2' CONTAIN '$1'"
+    echo "____________________________________"
+    cat "$2"
+    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+    exit 1
+fi
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/conf/client.crt b/sync_diff_inspector/tests/conf/client.crt
new file mode 100644
index 00000000000..0e14073aca2
--- /dev/null
+++ b/sync_diff_inspector/tests/conf/client.crt
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIEKDCCAhACCQC8wBajSPPO6jANBgkqhkiG9w0BAQsFADBCMQswCQYDVQQGEwJY
+WDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBh
+bnkgTHRkMCAXDTIzMDkyMTE1NTEwNVoYDzIwNTEwMjA2MTU1MTA1WjBoMQswCQYD
+VQQGEwJzZDEMMAoGA1UECAwDc2RmMQ0wCwYDVQQHDARzZGZ2MQwwCgYDVQQKDANk
+dnMxDDAKBgNVBAsMA3ZkczEMMAoGA1UEAwwDdmRzMRIwEAYJKoZIhvcNAQkBFgNz
+ZHYwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDsc8jf3AshHroMGGuT
+D7PDIb29IL91BbIm+8LYr1/dCio0mZX7tW4CKlw+OdpJo5oLZGvuvq6BkHPrzCa5
+NvwZGGRuJUEqMEoBk/aDsOJ15JTZ8XEPPNNBNgKT0eq8V+KEBcQ4y8llBhEoFFOJ
+9pYMn+RdK8n2s2qTqufXO6gNgLKHJCvu5cWsQqGnFfIr3vv6iBmaQHi6krjk75Ti
+LgqmwF+u49Iexdd7DWBYyL+YjcVoN0Bjjsk0d4xB+VRE2JNVehWNIvK8bOMfgLy4
+pUNm6PXPNxn8CZDGhKYEZBiTfh/LkXgsc7FawPBtddWwJ17DV6sLqa3LkbomNjHt
+g2gpAgMBAAEwDQYJKoZIhvcNAQELBQADggIBAKRTOQApEZbMyEcROEjIo2xf5Svb
+h/eMsG+9xjh5KvxLrXNogVA/pd3n2tk1TNSgIz9jc+AhixIeFSjzi0KTrtbHoVYT
+iZ0pPPBvbuBfSFeQWxGPvosfARRivSiljo4wsTuHPUNEj2EqQ5NXEzpEKXkTKaD8
+T3vLXSK9h+9GREGTAecVGbxfxfGeW94q3Z0QMA9uNu04Y2g16r/uUdQ5ih7nv1zM
+84RcHTeMCWZV9i3unf4j7Sn5ShnKs7Q18iKQZPZRN4rh7HACe7JAa38Z2DzThPoA
+MaROPpzZ5pUFSVdCYAh5YtwwDEmT6AKyW8LfO5bYLg+LABoGXmDCQr0Zo4zEMVmt
+TCwaSNwvGA65llXXMCxivIERoXmoV+PyNbC6oGE//znC32jWNfxwc12Y7sqBZgXn
+q8D08+XE3OAncCwP7zPBY5MLvYecIPos97qwJg3GECwkV82iIWmSb4xC0E4XIStA
+YXDvjwlRkQ6VS/J9Igl+PdRwIxKPjDsSxnWKH6rshX6zasgJBB31txFQIaX6/DAZ
+ZQpMFvjBxZ1L1q4gpdnq60l4Ok+bj3pz6vHyZ5DD1dKu5yMWfh0jqR9k6fAy5Tay
+ESgqcCB3d8l4MHL8L4vqV8TkA/L5r4h5jpms3oe2+bHASdbV3uwLlqNBLGP5WvOF
+GwsnD7vwZG0TRLPf
+-----END CERTIFICATE-----
diff --git a/sync_diff_inspector/tests/conf/client.key b/sync_diff_inspector/tests/conf/client.key
new file mode 100644
index 00000000000..38b3a840be5
--- /dev/null
+++ b/sync_diff_inspector/tests/conf/client.key
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEogIBAAKCAQEA7HPI39wLIR66DBhrkw+zwyG9vSC/dQWyJvvC2K9f3QoqNJmV
++7VuAipcPjnaSaOaC2Rr7r6ugZBz68wmuTb8GRhkbiVBKjBKAZP2g7DideSU2fFx
+DzzTQTYCk9HqvFfihAXEOMvJZQYRKBRTifaWDJ/kXSvJ9rNqk6rn1zuoDYCyhyQr
+7uXFrEKhpxXyK977+ogZmkB4upK45O+U4i4KpsBfruPSHsXXew1gWMi/mI3FaDdA
+Y47JNHeMQflURNiTVXoVjSLyvGzjH4C8uKVDZuj1zzcZ/AmQxoSmBGQYk34fy5F4
+LHOxWsDwbXXVsCdew1erC6mty5G6JjYx7YNoKQIDAQABAoIBAF5boji7TfGsmmAD
+SsKIDJt9FoOn0t93I23tIAdqjN3flZIiDDlDYKAamW73lUW3tNyA+ZVvoKHxrGDX
+ukhFSfsVwdY2pbeZR58zlbdd2jFjw4bpk82Z30/xpruolH0OsUUwK+riX/8hma0d
+12yB68Uj9XS86b66QHinRhedJeYkxjZG/F94zPXBPh+qqmU0ubf5VOqBFAAQJv7k
+736Z5CG9Hp+mnBvWa+oDKj+fNe6kXwA4DNiZumVTqaCifGldHIwrQuP/RZnuO6WM
+XlA67eiHsHQZQ7j99biAfWunEqktK/B5rxhICSe/bRwXmT852kQUFBlpdMefGtX8
+DzeFS3UCgYEA+d+7h7hFQbmCIkVUsltKmnljqiLVfc6Ee/K6giQldMsijcCRpiDu
+gyLDph/Et9ncFFI1t0zwm8XQyIzVlG69Oq118dxSrRmuRpL53CnsfQ8UCIl1kCxU
+NkoOWrK/UElVl0oOX/xyNVhxbaeAE2gitpiTZvBiiRc+BfqR+QIif78CgYEA8j/Q
+h+F+iP5jd+d/eDe4bE3ekNjX6Xo+HQFwYNSDmcllxGpKa/IR8hzNXH/Z2xJGjLw8
+FIeZ/O3ub8wVOQKjvalg7y6u1pen4HlNT+L6sIUZEy0DaEDyC5iBFPs0uE/0bXkv
+iyoIk6uKBzeUHWKVVjBItpQwwU0Hjd3zZmQxkhcCgYAMa/zsoFKBE/HONlghjbxF
+tacovBaU7IFVkBmJgraB+d874Mjc02JIDqBfT9D9uszgDb4x4JdNhyX78lRjzqWF
+lz33yhYqGM67H29gbI0fInLCgeLgSfPdxwyzoZM1MJAat0nDp88dq8nnw53wWQrw
+vOEHCwg6/HbO4UgEcwC4DQKBgA7EWxbdZRQ+xZt6jieq5eAcQxP7U/YUkJK+Erak
+Xb1TLJPzksPPxs5GeTJJTONw0sIje1oZBgcIDf/cpfKKuaaHG7SY2OmV5xLk8hSL
+lpKKGoQzu0BwrRCN5Fh+E7GklhbSS1alYk52J3zXI31DFC1j1hrjH7G421wHip3U
+P71TAoGAKLpj4ZA4XiyZcejX3C32IuuFP4hUTDmmI/kNuJ8MqolTGGlFdvk2RqKR
+SDW2cGRiPx+h+KmTzWJawZNAuWYCX13yj5+WiIdLl11yT+sWrjQl3MWerv6Mb9Y5
+SNfwucXTV6gMwq0djVszS3kBNP1keypQosFhNZHVLVGEpK7TmwM=
+-----END RSA PRIVATE KEY-----
diff --git a/sync_diff_inspector/tests/conf/generate_script.sh b/sync_diff_inspector/tests/conf/generate_script.sh
new file mode 100644
index 00000000000..385f393e738
--- /dev/null
+++ b/sync_diff_inspector/tests/conf/generate_script.sh
@@ -0,0 +1,16 @@
+# root.key
+openssl genrsa -out root.key 4096
+# root.crt
+openssl req -new -x509 -key root.key -out root.crt -days 10000
+# tidb.key
+openssl genrsa -out tidb.key 2048
+# tidb.csr, DON'T LEAVE WITH EMPTY INFORMATION
+openssl req -new -key tidb.key -out tidb.csr
+# tidb.crt
+openssl x509 -req -days 10000 -CA root.crt -CAkey root.key -CAcreateserial -in tidb.csr -out tidb.crt
+# client.key
+openssl genrsa -out client.key 2048
+# client.csr, DON'T LEAVE WITH EMPTY INFORMATION
+openssl req -new -key client.key -out client.csr
+# client.crt
+openssl x509 -req -days 10000 -CA root.crt -CAkey root.key -CAcreateserial -in client.csr -out client.crt
diff --git a/sync_diff_inspector/tests/conf/root.crt b/sync_diff_inspector/tests/conf/root.crt
new file mode 100644
index 00000000000..29110b65e6c
--- /dev/null
+++ b/sync_diff_inspector/tests/conf/root.crt
@@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFWTCCA0GgAwIBAgIJAKPBAEg3MkuTMA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNV
+BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
+Q29tcGFueSBMdGQwIBcNMjMwOTIxMTQ1OTU0WhgPMjA1MTAyMDYxNDU5NTRaMEIx
+CzAJBgNVBAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0Rl
+ZmF1bHQgQ29tcGFueSBMdGQwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoIC
+AQDwrasqh2GJgq+ggHtd78Ocjx2CkoO+5Pn9QS0fupDN3O6Jo1/2dVBpDj0VE50I
+3bfJ8ubBQbevCJnq/1J5jBwoN+ZtxJ3md2Vwv4cpFHDssUcaepH5fAA4wWDUDHd3
+ShE+90M7LbTZxpSMYA8/JmNhFIDYFoeSAMl1001m/CVHIRFl16ifeKHryFafx8on
+poOyu2Q6MDDCimfbK4p8iaQXeJc/T99Gv6+kbRIJ/HrBD5LU/fR0asSt+9CXm4kH
+7B+GzPDEnrV22Qv2LB/P87GHdit0WyuC613qOD7yAQwS4KE6UCahjYG2gPai6gwM
+tkTwIHw9jRSw8nBKxBPlbL21eZQL/q+6ekVSV+bRVsAXf9svA4HC1d+5O/iWLXqU
+E4TPcCPmfld523UsyBVz62NCIluKZejjdJCZFtvwy8yeZ2IuBcQq+VWoMttAYivk
+c2rq0qE/FokuJR7efy7uQMNzxQgQueMQfn6fARJJu7KF0P0lrUAaMy+iZQfc4+ca
+ozjqHMgjs1yIiYGCwCvzCfWL/qD/EK4bZhhpfsNF5Nb4AcQFRy9k2gJvXuUGu31/
+r59TwVoE/ItQ5zik8OXP2WVCiGeQ+nk3uEkpclA1aFnfXfcYPXsW0fJJAPLIxU3J
+cT6D3ZlDQmhJpri/vVIPCXzJx3yMqOn3hDEqR3ULG00FKQIDAQABo1AwTjAdBgNV
+HQ4EFgQU42kTinG330mWu6zhUQSJ+ZdJLkUwHwYDVR0jBBgwFoAU42kTinG330mW
+u6zhUQSJ+ZdJLkUwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAgEARHhc
+pY8QQVyOpVwrCO/jBNCTfGb6FGjtHLpO9smN6hdtMgjxfDF1Ljuf1SaBM7Up+ina
+1NG1VvFB6cOPn4Nvwv/AmznXje+R4QDD7fjuCW3VHZpzWHQENDz/4pwjaZk85DO7
+P9TuX1OMsGvNElb/576gU6eb1mpg7QX5mk+y+xW7TNFzg+x6pLKr4jxenQ2EIsYV
+6TzGYN85GS21V2yLi8NlMWsd5mhHYlCl92sRip4pb0Tzzp9ErXMDCoer6hVjQGES
+i8rc6r9XNUUka4bU/TyjvAa8BIC75bEvn1FspaifbxBezQVvuojUw5XdC+GJCkpL
+Nmys5H9an+gq1s79VNWhXtlCfvE540WI5CgfCQJuY6I8KAw17FvjcTrGkgay8c+t
+cKpEDO0ZSXNTIrNjIZJE3AhCIRxViWoGDHWWPX8eGz9OAeuswEVoZ77EIczAgZC+
+CvMw5XakoRdEuYl9Sm0x3P0BuqRGSAwmoFM4bFGTeHK39yOzL8NZbBL940Ed0eae
+tM8GPGxtL0x6Cn9yf/Q2jXH/6jElS1v8LXfivAgqiC6h0MNPPZ749XXULIUkSddB
+Z7O7AFG35TXnJ2z00FOBmXAv31Wd78KMiY4DKFSyEovj0HkJsDEnn1WSya+zI4Vw
+FyOO1ffCdJ+jLCC/gfbqI32iJiHWDz8hG9cIApg=
+-----END CERTIFICATE-----
diff --git a/sync_diff_inspector/tests/conf/tidb.crt b/sync_diff_inspector/tests/conf/tidb.crt
new file mode 100644
index 00000000000..49049259e93
--- /dev/null
+++ b/sync_diff_inspector/tests/conf/tidb.crt
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIEJzCCAg8CCQC8wBajSPPO6TANBgkqhkiG9w0BAQsFADBCMQswCQYDVQQGEwJY
+WDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBh
+bnkgTHRkMCAXDTIzMDkyMTE1NTAyNVoYDzIwNTEwMjA2MTU1MDI1WjBnMQswCQYD
+VQQGEwJmZDELMAkGA1UECAwCZXcxDDAKBgNVBAcMA2ZldzENMAsGA1UECgwEZndl
+ZjEMMAoGA1UECwwDd2VkMQwwCgYDVQQDDANkZXcxEjAQBgkqhkiG9w0BCQEWA2Rl
+dzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAN8dJT6PA+6lxtRntEsZ
+gTveN1IvJfvnm1Zdw1Ryv/fEwE3hrGj3M61g+arZq2kgz58NzD6Uiis3sva1G4DK
+4eh/9vscXq6xZ+VR89RhTVD+BqJes0l0+hhd7gT2v6Cn9VhgOP740rp4ZpmlQW6w
++/CBZ0k1dC4XD/QTIpETbmPIlavVATbQNqXDiObjxqVEyOu4QoNWC6PrvjBcO65L
+jWxBnoGD6EH2ow/J6YJQh+U3mNy4iDkEz/sLJPVRaabplBWGfT887iiJighTCEpN
+Be8NeUIKOykHZiefmynhKBz3hv32IuFhMt8u6ZzORC8uC/lID+Mty8Pnn4C2bKQn
+0UsCAwEAATANBgkqhkiG9w0BAQsFAAOCAgEAmYd6xg/15JeIbmbpOJVnQAPvfOPa
+3eZJjhcP0kjGe3ZQeQaW1XSbn/vsx7+rofu8j/yutmqs6HBxFWKkrlRODIcDwECo
+7HZzDUtL3eZr6sPoPadHRV5k8DT6JWsb8rYhbe2VWOh+DxN2YACgD+We4Tdh44Ce
+LKLwqHcBaXDKeoLGhjNQDIaUF1g/D7f3ad7H9vKUd4Pc4hgmOh3zIKido8/14uil
+e/x0dp9uwRdWVBL+BJDZGXGRVQAo+Cg0RRV0xmsjSQ8RJSmB+kb4aQuBHq2J1unR
+mTRCZ7Y2bscdCOCvzumzaEvI+7yUnoXceymW30cKAUnpCMMIiTiDXBmMVxV7B0wq
+RUCIi5uIt66pw/r/02u4aAJyO4jNAbt+Rhpg6bGa0Ng+4YWt1XHRN1jSpcIhR7Gp
+almdVZhG128ZmcEIpJhnTc8bj5DwCMkR5D2gbg6jeFOHz2bFMXavTze30z36B/Uy
+UFhku8ZdlciDL+7jhFTAuAgERwxnCBgO9tu4rL3KbZ3S0GY4CzgDQgiUB5e9h4iY
+tdix5bPKSirirljwGxPrIxKppaxQhgU2pqJ3ZQPgLGG8NkEA0ycbQErnlb2sRtKE
+PPI59DchHn9t7KcuANwg3HsCcL0Ts3/0HWA6hX9uccpNS+HnIjnhr8rdxylHezZw
+33VTbDV0ahqXRiY=
+-----END CERTIFICATE-----
diff --git a/sync_diff_inspector/tests/conf/tidb.key b/sync_diff_inspector/tests/conf/tidb.key
new file mode 100644
index 00000000000..6ad3cd2b6c0
--- /dev/null
+++ b/sync_diff_inspector/tests/conf/tidb.key
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEpAIBAAKCAQEA3x0lPo8D7qXG1Ge0SxmBO943Ui8l++ebVl3DVHK/98TATeGs
+aPczrWD5qtmraSDPnw3MPpSKKzey9rUbgMrh6H/2+xxerrFn5VHz1GFNUP4Gol6z
+SXT6GF3uBPa/oKf1WGA4/vjSunhmmaVBbrD78IFnSTV0LhcP9BMikRNuY8iVq9UB
+NtA2pcOI5uPGpUTI67hCg1YLo+u+MFw7rkuNbEGegYPoQfajD8npglCH5TeY3LiI
+OQTP+wsk9VFppumUFYZ9PzzuKImKCFMISk0F7w15Qgo7KQdmJ5+bKeEoHPeG/fYi
+4WEy3y7pnM5ELy4L+UgP4y3Lw+efgLZspCfRSwIDAQABAoIBAQDbH7fNeg5FmuDt
+CTDkNRdcogE5IP8wKSbBqvLFBLIO+FhZ5RM8P9SsKjrqlj8nz8a5koaOy7nrehe0
+/ugVFKzMeKkrtJA1CB1e9p7/bdTOf74qR9HTiiR6O/4hN+m6MlrewhoYKG5mHwu9
+SLU7rmyxp5W2K95/ybysXQKs8yWOrsMCxnq4N69QvqkThkTdBVNwbBoxIMwe66Yr
+DuEtocherwTeDxRQjSzkK0oafxy0lokLrXp1wGy/goacYNlyfpQmztlkiGxRW/Dq
+2816W3u2sgH86Oy9WbAgiJcjLGfvAq/f2acRBZ4RKX+KiA1KNvgpHWUw6XxBjsIF
+9J+RPNNJAoGBAPD/cHJw2qOSvem/taOCY2RcRk6RPtTdJxrmBjMjlhs0+asrmK5P
+/1dtfx0sZ7TPE7oo6rkD6ZeCj/QsS9dWgJSq18abjGIIkZbLIa4DWZkDrdQoG2Bd
+11F01DVpgfk8F1dieSiL5p0HvEsNZugjeMRQwPem/NoPivZ9sA3aD/t9AoGBAO0A
+tLwGQEEk0xQjGyfsfWjA4IYbb1elZ0AGkgKnQxgkzgwsdJ4HIqNzpT7Jpwr8Iaj1
+wv2VAP3/EpEXpcM82oQG4jX/4W7jc+DWN5mEDvvoQKWgaxuA6OOS3aF71iTs7rcv
+G7Ju6kCKfAHMT1138yRAfKITZaqziNyeX1EgvcpnAoGAPNcv6yREfiEQos9MKtBD
+CVYmRbVzWEfQlIDXtddZENtJ4IWsEO2PN+Ijwhiwwbu5bjjgMP8k3KQQdYMtTlq/
+MUkEGlawlRs2rgvwH78mwnNkUfgiGSz3q8/DtwxAzMv31I6+qZbQDHqkdYoXnak+
+1sjQPnVAxkhAO8Q1SvnvKP0CgYB/MIjN+0DSdRO+U9TICTeIVzJnZiPL0p1lk+Ea
+AW+VbnMRv23aPRQOygpddtTppUPfK/04H5YHubLaIOm5rFfM0PDnb+oom3JdsDjo
+byGneQ3wlPXGLdlOAExm1FGpQWoe7u4bRUD74BYK1P2muK/Ivb7lMCm4gV8qnuei
+X0LbcwKBgQDnxsFKrxrmsSKXIdGGqj6sn8fZwS6TcpyFNkkgItLIoYez0aiSLHe0
+WDBa4VGfhraUxGs3KsRWHIgFAIaJhVqfiVnRTFE08/U2vbB3GF3SntaSZVkFUNaT
+a+LJzaNzrkyikjt42tYQGmX/5W2f/597PfrGxxiWKG6UTT/kHIwDjw==
+-----END RSA PRIVATE KEY-----
diff --git a/sync_diff_inspector/tests/importer/run.sh b/sync_diff_inspector/tests/importer/run.sh
new file mode 100644
index 00000000000..b82a2ef1575
--- /dev/null
+++ b/sync_diff_inspector/tests/importer/run.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+TEST_DATABASE_NAME=checker_test
+IMPORT_EXEC="../bin/importer -c 1 -h ${MYSQL_HOST} -P ${MYSQL_PORT} -D ${TEST_DATABASE_NAME}"
+MYSQL_EXEC="mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root"
+
+init(){
+    check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "."
+    ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};"
+    ${MYSQL_EXEC} -e "create database ${TEST_DATABASE_NAME};"
+}
+
+destroy(){
+    ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};"
+}
+
+testImporter(){
+    ${IMPORT_EXEC} -c 1 -n 10 -t "$1" -i "$2"
+    RESULT=`${MYSQL_EXEC} -e "$3" | sed -n '2p'`
+    if [[ "${RESULT}" != "$4" ]]; then
+        echo "Test importer failed: $1"
+        exit 1
+    fi
+}
+
+set -e
+init
+testImporter "create table ta(a int primary key, b double, c varchar(10), d date unique, e time unique, f timestamp unique, g date unique, h datetime unique, i year unique);" "create unique index u_b on ta(b);" "select count(*) as result from ${TEST_DATABASE_NAME}.ta" "10"
+testImporter "create table tb(a int comment '[[range=1,10]]');" "" "select count(*) as result from ${TEST_DATABASE_NAME}.tb where a <= 10 and a >= 1" "10"
+testImporter "create table tc(a int unique comment '[[step=2]]');" "" "select sum(a) as result from ${TEST_DATABASE_NAME}.tc" "90"
+testImporter "create table td(a int comment '[[set=1,2,3]]');" "" "select count(*) as result from ${TEST_DATABASE_NAME}.td where a <= 3 and a >= 1" "10"
+destroy
diff --git a/sync_diff_inspector/tests/run.sh b/sync_diff_inspector/tests/run.sh
new file mode 100755
index 00000000000..7d0e04a1a1f
--- /dev/null
+++ b/sync_diff_inspector/tests/run.sh
@@ -0,0 +1,119 @@
+#!/bin/sh
+
+set -eu
+
+OUT_DIR=/tmp/tidb_tools_test
+
+# assign default value to mysql config
+if [[ -z ${MYSQL_HOST+x} ]]; then
+    echo "set MYSQL_HOST as default value \"127.0.0.1\""
+    export MYSQL_HOST="127.0.0.1"
+fi
+if [[ -z ${MYSQL_PORT+x} ]]; then
+    echo "set MYSQL_PORT as default value 3306"
+    export MYSQL_PORT=3306
+fi
+
+mkdir -p $OUT_DIR || true
+# to the dir of this script
+cd "$(dirname "$0")"
+
+pwd=$(pwd)
+
+export PATH=$PATH:$pwd/_utils
+export PATH=$PATH:$(dirname $pwd)/bin
+
+rm -rf $OUT_DIR || true
+
+stop_services() {
+    killall -9 tikv-server || true
+    killall -9 pd-server || true
+    killall -9 tidb-server || true
+}
+
+start_services() {
+    stop_services
+
+    echo "Starting PD..."
+    pd-server \
+        --client-urls http://127.0.0.1:2379 \
+        --log-file "$OUT_DIR/pd.log" \
+        --data-dir "$OUT_DIR/pd" &
+    # wait until PD is online...
+    while ! curl -o /dev/null -sf http://127.0.0.1:2379/pd/api/v1/version; do
+        sleep 1
+    done
+
+    # Tries to limit the max number of open files under the system limit
+    cat - > "$OUT_DIR/tikv-config.toml" <<EOF
+[rocksdb]
+max-open-files = 4096
+[raftdb]
+max-open-files = 4096
+[raftstore]
+# true (default value) for high reliability, this can prevent data loss when power failure.
+sync-log = false
+EOF
+
+    echo "Starting TiKV..."
+    tikv-server \
+        --pd 127.0.0.1:2379 \
+        -A 127.0.0.1:20160 \
+        --log-file "$OUT_DIR/tikv.log" \
+        -C "$OUT_DIR/tikv-config.toml" \
+        -s "$OUT_DIR/tikv" &
+    sleep 2
+
+    # support tls connection
+    cat - > "$OUT_DIR/tidb-config.toml" <<EOF
+[security]
+ssl-ca = "$pwd/conf/root.crt"
+ssl-cert = "$pwd/conf/tidb.crt"
+ssl-key = "$pwd/conf/tidb.key"
+EOF
+
+    echo "Starting TiDB..."
+    tidb-server \
+        -P 4000 \
+        --store tikv \
+        --path 127.0.0.1:2379 \
+        --config "$OUT_DIR/tidb-config.toml" \
+        --log-file "$OUT_DIR/tidb.log" &
+
+    echo "Verifying TiDB is started..."
+    check_db_status "127.0.0.1" 4000 "tidb" "$OUT_DIR/tidb.log"
+
+    echo "Starting Upstream TiDB..."
+    tidb-server \
+        -P 4001 \
+        --path=$OUT_DIR/tidb \
+        --status=20080 \
+        --log-file "$OUT_DIR/down_tidb.log" \
+        -socket "$OUT_DIR/down_tidb.sock" &
+
+    echo "Verifying Upstream TiDB is started..."
+    check_db_status "127.0.0.1" 4001 "tidb" "$OUT_DIR/down_tidb.log"
+}
+
+trap stop_services EXIT
+start_services
+
+# set to the case name you want to run only for debug
+do_case=""
+
+for script in ./*/run.sh; do
+    test_name="$(basename "$(dirname "$script")")"
+    if [[ $do_case != "" && $test_name != $do_case ]]; then
+        continue
+    fi
+    echo "*******************************************"
+    echo "Running test $script..."
+    echo "*******************************************"
+    PATH="$pwd/../bin:$pwd/_utils:$PATH" \
+    OUT_DIR=$OUT_DIR \
+    TEST_NAME=$test_name \
+    sh "$script"
+done
+
+# with color
+echo "\033[0;36m<<< Run all test success >>>\033[0m"
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml
new file mode 100644
index 00000000000..37629b414fa
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml
@@ -0,0 +1,53 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[table-configs]
+[table-configs.config1]
+target-tables = ["diff_test.test"]
+chunk-size = 10
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+	target-check-tables = ["diff_test.test"]
+
+    # extra table config
+	target-configs= ["config1"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml
new file mode 100644
index 00000000000..b83552c0cd7
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml
@@ -0,0 +1,55 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[table-configs]
+[table-configs.config1]
+target-tables = ["diff_test.test"]
+chunk-size = 50
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+	target-check-tables = ["diff_test.test", "diff_test.ttt"]
+
+    # extra table config
+	target-configs= ["config1"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml
new file mode 100644
index 00000000000..50d15de73b7
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml
@@ -0,0 +1,54 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.mysql]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[table-configs]
+[table-configs.config1]
+target-tables = ["diff_test.test"]
+chunk-size = 500
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "mysql"
+
+    # tables need to check.
+    target-check-tables = ["diff_test.test"]
+
+    # extra table config
+    target-configs= ["config1"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh
new file mode 100644
index 00000000000..9aa782c7735
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh
@@ -0,0 +1,155 @@
+#!/bin/sh
+
+set -ex
+
+cd "$(dirname "$0")"
+
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+
+# create table diff_test.test(`table` int, aa int, b varchar(10), c float, d datetime, primary key(a), key(aa));
+
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+
+echo "================test bucket checkpoint================="
+echo "---------1. chunk is in the last of the bucket---------"
+export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/check-one-bucket=return();\
+github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\
+main/wait-for-checkpoint=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+# Save the last chunk's info, 
+# to which we will check whether the first chunk's info is next in the next running.
+last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}')
+echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4
+last_chunk_bound=$(echo $last_chunk_info | awk -F ' ' '{print $1}')
+echo "$last_chunk_bound"
+last_chunk_index=$(echo $last_chunk_info | awk -F '=' '{print $2}')
+echo "$last_chunk_index"
+OLD_IFS="$IFS"
+IFS=":"
+last_chunk_index_array=($last_chunk_index)
+IFS="$OLD_IFS"
+for s in ${last_chunk_index_array[@]}
+do
+echo "$s"
+done
+# chunkIndex should be the last Index
+[[ $((${last_chunk_index_array[2]} + 1)) -eq ${last_chunk_index_array[3]} ]] || exit 1
+# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. 
+bucket_index_right=$(($(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}') + 1))
+echo $bucket_index_right
+
+rm -f $OUT_DIR/sync_diff.log
+export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1')
+echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound
+cat $OUT_DIR/first_chunk_bound
+echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index
+cat $OUT_DIR/first_chunk_index
+# Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before.
+check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound
+check_contains_regex ".:${bucket_index_right}-.:0:." $OUT_DIR/first_chunk_index
+
+echo "--------2. chunk is in the middle of the bucket--------"
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/check-one-bucket=return();\
+github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\
+github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\
+main/wait-for-checkpoint=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+# Save the last chunk's info, 
+# to which we will check whether the first chunk's info is next in the next running.
+last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}')
+echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4
+last_chunk_bound=$(echo $last_chunk_info | awk -F ' ' '{print $1}')
+echo "$last_chunk_bound"
+last_chunk_index=$(echo $last_chunk_info | awk -F '=' '{print $2}')
+echo "$last_chunk_index"
+OLD_IFS="$IFS"
+IFS=":"
+last_chunk_index_array=($last_chunk_index)
+IFS="$OLD_IFS"
+for s in ${last_chunk_index_array[@]}
+do
+echo "$s"
+done
+# chunkIndex should be the last Index
+[[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1
+# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. 
+bucket_index_left=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $1}')
+bucket_index_right=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}')
+echo "${bucket_index_left}-${bucket_index_right}"
+
+rm -f $OUT_DIR/sync_diff.log
+export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1')
+echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound
+cat $OUT_DIR/first_chunk_bound
+echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index
+cat $OUT_DIR/first_chunk_index
+# Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before.
+check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound
+check_contains_regex ".:${bucket_index_left}-${bucket_index_right}:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index
+
+
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_rand.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+
+echo "================test random checkpoint================="
+echo "--------------1. chunk is in the middle----------------"
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\
+github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\
+main/wait-for-checkpoint=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+# Save the last chunk's info, 
+# to which we will check whether the first chunk's info is next in the next running.
+last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}')
+echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4
+last_chunk_bound=$(echo $last_chunk_info | awk -F ' ' '{print $1}')
+echo "$last_chunk_bound"
+last_chunk_index=$(echo $last_chunk_info | awk -F '=' '{print $2}')
+echo "$last_chunk_index"
+OLD_IFS="$IFS"
+IFS=":"
+last_chunk_index_array=($last_chunk_index)
+IFS="$OLD_IFS"
+for s in ${last_chunk_index_array[@]}
+do
+echo "$s"
+done
+# chunkIndex should be the last Index
+[[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1
+
+rm -f $OUT_DIR/sync_diff.log
+export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1')
+echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound
+cat $OUT_DIR/first_chunk_bound
+echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index
+cat $OUT_DIR/first_chunk_index
+# Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before.
+check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound
+check_contains_regex ".:0-0:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index
+
+
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_continous.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+echo "================test checkpoint continous================="
+# add a table have different table-structs of upstream and downstream
+# so data-check will be skipped
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create table IF NOT EXISTS diff_test.ttt(a int, aa int, primary key(a), key(aa));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table IF NOT EXISTS diff_test.ttt(a int, b int, primary key(a), key(b));"
+export GO_FAILPOINTS="main/wait-for-checkpoint=return()"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output || true
+grep 'save checkpoint' $OUT_DIR/sync_diff.log | awk 'END {print}' > $OUT_DIR/checkpoint_info
+check_not_contains 'has-upper\":true' $OUT_DIR/checkpoint_info
+
+export GO_FAILPOINTS=""
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml b/sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml
new file mode 100644
index 00000000000..f56695ba72a
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml
@@ -0,0 +1,47 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["diff_test.test"]
+
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml b/sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml
new file mode 100644
index 00000000000..726db6c1e00
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml
@@ -0,0 +1,49 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.tidb1]
+    host = "127.0.0.1"
+    port = 4001
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["tidb1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["diff_test.test"]
+
+
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml
new file mode 100644
index 00000000000..782ffa97884
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml
@@ -0,0 +1,54 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.tidb1]
+    host = "127.0.0.1"
+    port = 4001
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["tidb1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+	target-check-tables = ["expression_test.diff"]
+
+	target-configs = ["config1"]
+
+######################### Table config #########################
+[table-configs.config1]
+target-tables = ["test1.v"]
+range = "TRUE"
+chunk-size = 1
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh
new file mode 100644
index 00000000000..612fc24cbe8
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+set -ex
+
+cd "$(dirname "$0")"
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql
+rm -rf $OUT_DIR
+rm -rf $FIX_DIR
+mkdir -p $OUT_DIR
+mkdir -p $FIX_DIR
+
+for port in 4000 4001; do
+  mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists expression_test;"
+  mysql -uroot -h 127.0.0.1 -P $port -e "create table expression_test.diff(\`a\`\`;sad\` int, id int);"
+  mysql -uroot -h 127.0.0.1 -P $port -e "alter table expression_test.diff add index i1((\`a\`\`;sad\` + 1 + \`a\`\`;sad\`));"
+  mysql -uroot -h 127.0.0.1 -P $port -e "insert into expression_test.diff values (1,1),(2,2),(3,3);"
+done
+
+echo "check result should be pass"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/expression_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml
new file mode 100644
index 00000000000..fc0d8f8e9a9
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml
@@ -0,0 +1,47 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["json_test.test"]
+
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/data.sql b/sync_diff_inspector/tests/sync_diff_inspector/json/data.sql
new file mode 100644
index 00000000000..4a13f371b9d
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/json/data.sql
@@ -0,0 +1,7 @@
+create database if not exists json_test;
+create table json_test.test (a int, b json, primary key(a));
+
+insert into json_test.test values (1, '{"id": 1, "name":"aaa"}');
+insert into json_test.test values (2, '{"id": 2, "name":"bbb", "sub": {"id": "2-1", "num": 3, "array": ["123", "456", "789"], "num_array": [123, 456, 789]}}');
+insert into json_test.test values (3, '{"name":"ccc", "id": 3}');
+insert into json_test.test values (4, '{"id": 4, "bool": true, "name":"aaa"}');
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh
new file mode 100644
index 00000000000..30824a26fdf
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+set -e
+
+cd "$(dirname "$0")"
+
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql
+
+# tidb
+mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql
+
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+cat config.toml | sed 's/export-fix-sql = true/export-fix-sql = false/' > config_nofix.toml
+diff config.toml config_nofix.toml || true
+
+echo "compare json tables, check result should be pass"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "compare json tables without fixsql, check result should be pass"
+sync_diff_inspector --config=./config_nofix.toml > $OUT_DIR/json_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "update data to make it different, and data should not be equal"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": true, \"name\":\"aaa\"}');"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": false, \"name\":\"aaa\"}');"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "update data to make it different, and downstream json data is NULL"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "replace into json_test.test values (5, NULL);"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/run.sh
new file mode 100644
index 00000000000..338f57baec0
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/run.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+set -ex
+
+cd "$(dirname "$0")"
+
+# check mysql status
+check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "."
+
+BASE_DIR=/tmp/tidb_tools_test/sync_diff_inspector
+OUT_DIR=$BASE_DIR/output
+
+
+mkdir -p $OUT_DIR || true
+
+echo "use importer to generate test data"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create database if not exists diff_test"
+# TODO: run `importer -t "create table diff_test.test(\`table\` int, b varchar(10), c float, d datetime, primary key(a));" -c 10 -n 10000 -P 4000 -h 127.0.0.1 -D diff_test -b 1000`
+# will exit with parser error, need to fix it in importer later, just change column name by mysql client now
+importer -t "create table diff_test.test(a int, aa int, b varchar(10), c float, d datetime, primary key(a), key(aa));" -c 10 -n 10000 -P 4000 -h 127.0.0.1 -D diff_test -b 1000
+mysql -uroot -h 127.0.0.1 -P 4000 -e "alter table diff_test.test change column a \`table\` int"
+
+echo "dump data and then load to tidb and mysql"
+dumpling --host 127.0.0.1 --port 4000 --user root -o $BASE_DIR/dump_diff -B diff_test -T "diff_test.test"
+loader -h 127.0.0.1 -P 4001 -u root -d $BASE_DIR/dump_diff
+mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -e "create database if not exists tidb_loader"
+loader -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -d $BASE_DIR/dump_diff
+mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -e "select * from diff_test.test limit 10;"
+
+echo "use sync_diff_inspector to compare data"
+# sync diff tidb-tidb
+sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+
+echo "analyze table, and will use tidb's statistical information to split chunks"
+check_contains "split range by random" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+mysql -uroot -h 127.0.0.1 -P 4000 -e "analyze table diff_test.test"
+# run the explain SQL to load the stats after analyze
+mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where aa > 1"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where \`table\` > 1"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "show stats_buckets"
+sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+check_not_contains "split range by random" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "test 'exclude-tables' config"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create table if not exists diff_test.should_not_compare (id int)"
+sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.log
+# doesn't contain the table's result in check report
+check_not_contains "[table=should_not_compare]" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+# sync diff tidb-mysql
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_mysql.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config_base_mysql_.toml
+sync_diff_inspector --config=./config_base_mysql_.toml #> $OUT_DIR/diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+for script in ./*/run.sh; do
+    test_name="$(basename "$(dirname "$script")")"
+    echo "---------------------------------------"
+    echo "Running test $script..."
+    echo "---------------------------------------"
+    sh "$script"
+done
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml
new file mode 100644
index 00000000000..9c34352c958
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml
@@ -0,0 +1,55 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[routes.rule1]
+schema-pattern = "shard_test"      # 匹配数据源的库名，支持通配符 "*" 和 "?"
+table-pattern = "test*"          # 匹配数据源的表名，支持通配符 "*" 和 "?"
+target-schema = "shard_test"         # 目标库名
+target-table = "test" # 目标表名
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["shard_test.test"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml
new file mode 100644
index 00000000000..953fd67cd0a
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml
@@ -0,0 +1,55 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_HOST
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[routes.rule1]
+schema-pattern = "router_test_0"
+table-pattern = "tbl"
+target-schema = "router_test_1"
+target-table = "tbl"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["router_test_1.tbl"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml
new file mode 100644
index 00000000000..a36d0ab3727
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml
@@ -0,0 +1,53 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_HOST
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[routes.rule1]
+schema-pattern = "router_test_0"
+target-schema = "router_test_1"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["router_test_1.tbl"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml
new file mode 100644
index 00000000000..055afef9997
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml
@@ -0,0 +1,55 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_HOST
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[routes.rule1]
+schema-pattern = "other_schema"
+table-pattern = "tbl"
+target-schema = "other_schema"
+target-table = "tbl"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["router_test_1.tbl"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml
new file mode 100644
index 00000000000..3b75fc35312
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml
@@ -0,0 +1,49 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_HOST
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["router_test_1.tbl"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml
new file mode 100644
index 00000000000..8ef05c96bc5
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml
@@ -0,0 +1,55 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+
+######################### Databases config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1"]
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_HOST
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[routes.rule1]
+schema-pattern = "router_test_?"
+table-pattern = "tbl"
+target-schema = "router_test_1"
+target-table = "tbl"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["router_test_1.tbl"]
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh
new file mode 100644
index 00000000000..09fdbfa041a
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh
@@ -0,0 +1,134 @@
+#!/bin/sh
+
+set -e
+
+cd "$(dirname "$0")"
+
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+
+echo "generate data to sharding tables"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists shard_test;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table shard_test.test1(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table shard_test.test2(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));"
+
+# each table only have part of data
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into shard_test.test1 (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test WHERE \`table\`%2=0"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into shard_test.test2 (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test WHERE \`table\`%2=1"
+
+# tidb
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create database if not exists shard_test;"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create table shard_test.test(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into shard_test.test (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test;"
+
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+
+echo "compare sharding tables with one table in downstream, check result should be pass"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "update data in one shard table, and data should not be equal"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "update shard_test.test1 set b = 'abc' limit 1"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "check the router for shard"
+# router_test_0.tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_0;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_0.tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_0.tbl values (1,\"hello1\",1);"
+# Router_test_0.tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists Router_test_0;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_0.tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_0.tbl values (1,\"hello1\",1);"
+# router_test_0.Tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_0;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_0.Tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_0.Tbl values (1,\"hello1\",1);"
+# router_test_1.tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_1;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_1.tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_1.tbl values (1,\"hello1\",1);"
+# Router_test_1.tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists Router_test_1;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_1.tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_1.tbl values (1,\"hello1\",1);"
+# router_test_1.Tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_1;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_1.Tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_1.Tbl values (1,\"hello1\",1);"
+# Router_test_1.Tbl
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists Router_test_1;"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_1.Tbl (id INT(11), name VARCHAR(25), deptId INT(11));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_1.Tbl values (1,\"hello1\",1);"
+
+echo "test router 1: normal rule"
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_1.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+#check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 1
+check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "test router 2: only schema rule"
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_2.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+#check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 1
+check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+
+rm -rf $OUT_DIR/*
+
+echo "test router 3: other rule"
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_3.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+#check_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 2
+check_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "test router 4: no rule"
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_4.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+#check_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 2
+check_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "test router 5: regex rule"
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_5.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+#check_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 2
+check_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "shard test passed"
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml
new file mode 100644
index 00000000000..a8921fb6ad4
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml
@@ -0,0 +1,49 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.tidb1]
+    host = "127.0.0.1"
+    port = 4001
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    #snapshot#
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["tidb1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["diff_test.test"]
+
+
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh
new file mode 100644
index 00000000000..cbd9e765968
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh
@@ -0,0 +1,49 @@
+
+#!/bin/sh
+
+set -e
+
+cd "$(dirname "$0")"
+
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql
+rm -rf $OUT_DIR
+rm -rf $FIX_DIR
+mkdir -p $OUT_DIR
+mkdir -p $FIX_DIR
+
+mysql -uroot -h 127.0.0.1 -P 4000 -e "show master status" > $OUT_DIR/ts.log
+#cat $OUT_DIR/sync_diff.log
+ts=`grep -oE "[0-9]+" $OUT_DIR/ts.log`
+echo "get ts $ts"
+
+echo "delete one data, diff should not passed"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "delete from diff_test.test limit 1"
+
+sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+# move the fix sql file to $FIX_DIR
+mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/
+rm -rf $OUT_DIR/*
+
+echo "use snapshot compare data, test sql mode by the way, will auto discover ANSI_QUOTES thus pass"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ANSI_QUOTES';"
+sleep 10
+mysql -uroot -h 127.0.0.1 -P 4000 -e "show variables like '%sql_mode%'"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "show create table diff_test.test"
+sed "s/#snapshot#/snapshot = \"${ts}\"/g" config_base.toml > config.toml
+echo "use snapshot compare data, data should be equal"
+sync_diff_inspector --config=./config.toml #> $OUT_DIR/snapshot_diff.log
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "execute fix.sql and use base config, and then compare data, data should be equal"
+cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000
+sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+# reset sql mode
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';"
+
+echo "snapshot test passed"
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml
new file mode 100644
index 00000000000..5f5bc45a182
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml
@@ -0,0 +1,60 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.tidb1]
+    host = "127.0.0.1"
+    port = 4001
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["tidb1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["diff_test.test"]
+
+    # extra table config
+    target-configs= ["config1"]
+
+[table-configs]
+[table-configs.config1]
+# tables need to use this specified config.
+# if use this config. target-tables should be a subset of #target-check-tables
+target-tables = ["diff_test.test"]
+
+range = "TRUE"#RANGE"a < 10 OR a > 200"
+index-fields = [""]
+ignore-columns = [""]#IGNORE
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh
new file mode 100644
index 00000000000..ad541dacb6c
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh
@@ -0,0 +1,42 @@
+
+#!/bin/sh
+
+set -e
+
+cd "$(dirname "$0")"
+
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql
+rm -rf $OUT_DIR
+rm -rf $FIX_DIR
+mkdir -p $OUT_DIR
+mkdir -p $FIX_DIR
+
+echo "update data in column b (WHERE \`table\` >= 10 AND \`table\` <= 200), data should not be equal"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "update diff_test.test set b = 'abc' where \`table\` >= 10 AND \`table\` <= 200"
+
+sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+# move the fix sql file to $FIX_DIR
+mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/
+rm -rf $OUT_DIR/*
+
+echo "ignore check column b, check result should be pass"
+sed 's/\[""\]#IGNORE/["b"]/g' config.toml > config_.toml
+sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "set range a < 10 OR a > 200, check result should be pass"
+sed 's/"TRUE"#RANGE"a < 10 OR a > 200"/"`table` < 10 OR `table` > 200"/g' config.toml > config_.toml
+sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "execute fix.sql and use base config, and then compare data, data should be equal"
+cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000
+sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.log || true
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "table_config test passed"
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml
new file mode 100644
index 00000000000..ba341007394
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml
@@ -0,0 +1,49 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+# skip validation for tables that don't exist upstream or downstream
+skip-non-existing-table = true
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.mysql1]
+    host = "127.0.0.1"#MYSQL_HOST
+    port = 3306#MYSQL_PORT
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["skip_test.t*"]
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml
new file mode 100644
index 00000000000..2fa4ededf3e
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml
@@ -0,0 +1,61 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# The number of goroutines created to check data. The number of connections between upstream and downstream databases are slightly greater than this value
+check-thread-count = 4
+
+# If enabled, SQL statements is exported to fix inconsistent tables
+export-fix-sql = true
+
+# Only compares the table structure instead of the data
+check-struct-only = false
+
+# skip validation for tables that don't exist upstream or downstream
+skip-non-existing-table = true
+######################### Datasource config #########################
+[data-sources.mysql1]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+
+    route-rules = ["rule1"]
+
+[data-sources.mysql2]
+    host = "127.0.0.1"
+    port = 3306
+    user = "root"
+    password = ""
+
+    route-rules = ["rule2"]
+
+[data-sources.tidb0]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+
+########################### Routes ###########################
+[routes.rule1]
+schema-pattern = "skip_test"        # Matches the schema name of the data source. Supports the wildcards "*" and "?"
+table-pattern = "t[1-2]"  # Matches the table name of the data source. Supports the wildcards "*" and "?"
+target-schema = "skip_test"         # The name of the schema in the target database
+target-table = "t5"       # The name of the target table
+
+[routes.rule2]
+schema-pattern = "skip_test"
+table-pattern = "t0"
+target-schema = "skip_test"
+target-table = "t5"
+
+######################### Task config #########################
+[task]
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["mysql1", "mysql2"]
+
+    target-instance = "tidb0"
+
+    # The tables of downstream databases to be compared. Each table needs to contain the schema name and the table name, separated by '.'
+    target-check-tables = ["skip_test.t5"]
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql
new file mode 100644
index 00000000000..34a2a745263
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql
@@ -0,0 +1,5 @@
+create database if not exists skip_test;
+create table skip_test.t0 (a int, b int, primary key(a));
+create table skip_test.t1 (a int, b int, primary key(a));
+insert into skip_test.t0 values (1,1);
+insert into skip_test.t1 values (2,2);
\ No newline at end of file
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh
new file mode 100644
index 00000000000..441f7045806
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+set -ex
+
+cd "$(dirname "$0")"
+
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql
+
+# tidb
+mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql
+
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+
+echo "compare tables, check result should be pass"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "make some tables exist only upstream or downstream"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t2 (a int, b int, primary key(a));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t2 values (3,3);"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t3 (a int, b int, primary key(a));"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t3 values (1,1);"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+check_contains "check pass" $OUT_DIR/sync_diff.log
+check_contains "Comparing the table data of \`skip_test\`.\`t2\` ...skipped" $OUT_DIR/table_skip_diff.output
+check_contains "Comparing the table data of \`skip_test\`.\`t3\` ...skipped" $OUT_DIR/table_skip_diff.output
+check_contains "The data of \`skip_test\`.\`t2\` does not exist in downstream database" $OUT_DIR/table_skip_diff.output
+check_contains "The data of \`skip_test\`.\`t3\` does not exist in upstream database" $OUT_DIR/table_skip_diff.output
+check_contains "|      TABLE       | RESULT  | STRUCTURE EQUALITY | DATA DIFF ROWS | UPCOUNT | DOWNCOUNT |" $OUT_DIR/summary.txt
+check_contains "| \`skip_test\`.\`t2\` | skipped | false              | +1/-0          |       1 |         0 |" $OUT_DIR/summary.txt
+check_contains "| \`skip_test\`.\`t3\` | skipped | false              | +0/-1          |       0 |         1 |" $OUT_DIR/summary.txt
+rm -rf $OUT_DIR/*
+
+echo "make some table data not equal"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t1 values (4,4);"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+check_contains "| \`skip_test\`.\`t1\` | succeed | true               | +0/-1          |       1 |         2 |" $OUT_DIR/summary.txt
+rm -rf $OUT_DIR/*
+
+echo "make some table structure not equal"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t4 (a int, b int, c int,primary key(a));"
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t4 values (1,1,1);"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t4 (a int, b int, primary key(a));"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+check_contains "| \`skip_test\`.\`t4\` | succeed | false              | +0/-0          |       0 |         0 |" $OUT_DIR/summary.txt
+check_contains "A total of 5 tables have been compared, 1 tables finished, 2 tables failed, 2 tables skipped" $OUT_DIR/table_skip_diff.output
+cat $OUT_DIR/summary.txt
+rm -rf $OUT_DIR/*
+
+echo "test router case"
+sync_diff_inspector --config=./config_router.toml > $OUT_DIR/table_skip_diff.output || true
+check_contains "check pass" $OUT_DIR/sync_diff.log
+check_contains "| \`skip_test\`.\`t5\` | skipped | false              | +3/-0          |       3 |         0 |" $OUT_DIR/summary.txt
+check_contains "The data of \`skip_test\`.\`t5\` does not exist in downstream database" $OUT_DIR/table_skip_diff.output
+check_contains "A total of 1 tables have been compared, 0 tables finished, 0 tables failed, 1 tables skipped" $OUT_DIR/table_skip_diff.output
+rm -rf $OUT_DIR/*
+
+echo "table_skip test passed"
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml
new file mode 100644
index 00000000000..4f3f813eef1
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml
@@ -0,0 +1,48 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.tidb1]
+    host = "127.0.0.1"
+    port = 4001
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["tidb1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+	target-check-tables = ["tz_test.diff"]
+
+
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh
new file mode 100644
index 00000000000..b96e0d895d8
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+set -ex
+
+cd "$(dirname "$0")"
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql
+rm -rf $OUT_DIR
+rm -rf $FIX_DIR
+mkdir -p $OUT_DIR
+mkdir -p $FIX_DIR
+
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@GLOBAL.SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';"
+sleep 3
+
+for port in 4000 4001; do
+  mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists tz_test"
+  mysql -uroot -h 127.0.0.1 -P $port -e "create table tz_test.diff(id int, dt datetime, ts timestamp);"
+  mysql -uroot -h 127.0.0.1 -P $port -e "insert into tz_test.diff values (1, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
+  mysql -uroot -h 127.0.0.1 -P $port -e "set @@session.time_zone = \"-07:00\"; insert into tz_test.diff values (2, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
+done
+
+echo "check with the same time_zone, check result should be pass"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+# check upstream and downstream time_zone
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = '+08:00'";
+mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = '+00:00'";
+sleep 5
+
+echo "check with different time_zone, check result should be pass again"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+
+echo "set different rows, check result should be failed"
+mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@session.time_zone = '-06:00'; insert into tz_test.diff values (4, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-05:00'; insert into tz_test.diff values (3, '2020-05-17 10:12:13', '2020-05-17 10:12:13');"
+sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output || true
+check_contains "check failed" $OUT_DIR/sync_diff.log
+mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/
+rm -rf $OUT_DIR/*
+
+echo "fix the rows, check result should be pass"
+cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000
+sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
+rm -rf $OUT_DIR/*
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-06:00'; select ts from tz_test.diff where id = 4 or id = 3;" > $OUT_DIR/tmp_sql_timezone
+check_contains "2020-05-17 09:12:13" $OUT_DIR/tmp_sql_timezone
+check_not_contains "2020-05-17 10:12:13" $OUT_DIR/tmp_sql_timezone
+
+# reset time_zone
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = 'SYSTEM'";
+mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = 'SYSTEM'";
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml
new file mode 100644
index 00000000000..323134207ef
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml
@@ -0,0 +1,53 @@
+# Diff Configuration.
+
+######################### Global config #########################
+
+
+# how many goroutines are created to check data
+check-thread-count = 4
+
+# set false if just want compare data by checksum, will skip select data when checksum is not equal.
+# set true if want compare all different rows, will slow down the total compare time.
+export-fix-sql = true
+
+# ignore check table's data
+check-struct-only = false
+
+######################### Databases config #########################
+[data-sources]
+[data-sources.tidb1]
+    host = "127.0.0.1"
+    port = 4001
+    user = "root"
+    password = ""
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+[data-sources.tidb]
+    host = "127.0.0.1"
+    port = 4000
+    user = "root_tls"
+    password = ""
+
+    security.ca-path = "ca-path"#CAPATH
+    security.cert-path = "cert-path"#CERTPATH
+    security.key-path = "key-path"#KEYPATH
+    # remove comment if use tidb's snapshot data
+    # snapshot = "2016-10-08 16:45:26"
+
+######################### Task config #########################
+[task]
+    # 1 fix sql: fix-target-TIDB1.sql
+    # 2 log: sync-diff.log
+    # 3 summary: summary.txt
+    # 4 checkpoint: a dir
+    output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output"
+
+    source-instances = ["tidb1"]
+
+    target-instance = "tidb"
+
+    # tables need to check.
+    target-check-tables = ["diff_test.test"]
+
+
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh
new file mode 100644
index 00000000000..03ed6cbf3e1
--- /dev/null
+++ b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+set -ex
+
+cd "$(dirname "$0")"
+
+CONF_PATH=`cd ../../conf && pwd`
+CA_PATH="$CONF_PATH/root.crt"
+CERT_PATH="$CONF_PATH/client.crt"
+KEY_PATH="$CONF_PATH/client.key"
+OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
+rm -rf $OUT_DIR
+mkdir -p $OUT_DIR
+
+# create user for test tls
+mysql -uroot -h 127.0.0.1 -P 4000 -e "create user 'root_tls'@'%' identified by '' require X509;"
+mysql -uroot -h 127.0.0.1 -P 4000 -e "grant all privileges on *.* to 'root_tls'@'%';"
+mysql -uroot_tls -h 127.0.0.1 -P 4000 --ssl-ca "$CA_PATH" --ssl-cert "$CERT_PATH" --ssl-key "$KEY_PATH"  -e "SHOW STATUS LIKE \"%Ssl%\";"
+
+echo "use sync_diff_inspector to compare data"
+# sync diff tidb-tidb
+CA_PATH_REG=$(echo ${CA_PATH} | sed 's/\//\\\//g')
+CERT_PATH_REG=$(echo ${CERT_PATH} | sed 's/\//\\\//g')
+KEY_PATH_REG=$(echo ${KEY_PATH} | sed 's/\//\\\//g')
+sed "s/\"ca-path\"#CAPATH/\"${CA_PATH_REG}\"/g" config.toml | sed "s/\"cert-path\"#CERTPATH/\"${CERT_PATH_REG}\"/g" | sed "s/\"key-path\"#KEYPATH/\"${KEY_PATH_REG}\"/g" > config_.toml
+sync_diff_inspector --config=./config_.toml > $OUT_DIR/diff.output || (cat $OUT_DIR/diff.output && exit 1)
+check_contains "check pass!!!" $OUT_DIR/sync_diff.log
diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go
index 7aadf2fbc59..af7947f1baf 100644
--- a/sync_diff_inspector/utils/pd.go
+++ b/sync_diff_inspector/utils/pd.go
@@ -208,7 +208,7 @@ func StartGCSavepointUpdateService(ctx context.Context, pdCli pd.Client, db *sql
 		return nil
 	}
 	// get latest snapshot
-	snapshotTS, err := parseSnapshotToTSO(db, snapshot)
+	snapshotTS, err := ParseSnapshotToTSO(db, snapshot)
 	if tidbVersion.Compare(*autoGCSafePointVersion) > 0 {
 		log.Info("tidb support auto gc safepoint", zap.Stringer("version", tidbVersion))
 		if err != nil {
@@ -250,7 +250,7 @@ func updateServiceSafePoint(ctx context.Context, pdClient pd.Client, snapshotTS
 	}
 }
 
-func parseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) {
+func ParseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) {
 	snapshotTS, err := strconv.ParseUint(snapshot, 10, 64)
 	if err == nil {
 		return snapshotTS, nil

From a72a251846dce2dc162d0deab4de32b10f0a6e86 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 7 Jan 2025 09:36:59 +0800
Subject: [PATCH 19/22] update go sum

---
 go.sum | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/go.sum b/go.sum
index f08cf324d1b..78b4ac6db93 100644
--- a/go.sum
+++ b/go.sum
@@ -42,12 +42,12 @@ github.com/99designs/keyring v1.2.1 h1:tYLp1ULvO7i3fI5vE21ReQuj99QFSs7lGm0xWyJo8
 github.com/99designs/keyring v1.2.1/go.mod h1:fc+wB5KTk9wQ9sDx0kFXB3A0MaeGHM9AwRStKOQ5vOA=
 github.com/AthenZ/athenz v1.10.39 h1:mtwHTF/v62ewY2Z5KWhuZgVXftBej1/Tn80zx4DcawY=
 github.com/AthenZ/athenz v1.10.39/go.mod h1:3Tg8HLsiQZp81BJY58JBeU2BR6B/H4/0MQGfCwhHNEA=
-github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0 h1:1nGuui+4POelzDwI7RG56yfQJHCnKvwfMoU7VsEp+Zg=
-github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0/go.mod h1:99EvauvlcJ1U06amZiksfYz/3aFGyIhWGHVyiZXtBAI=
-github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 h1:U2rTu3Ef+7w9FHKIAXM6ZyqF3UOWJZ12zIm8zECAFfg=
-github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg=
-github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0 h1:H+U3Gk9zY56G3u872L82bk4thcsy2Gghb9ExT4Zvm1o=
-github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0/go.mod h1:mgrmMSgaLp9hmax62XQTd0N4aAqSE5E0DulSpVYK7vc=
+github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 h1:JZg6HRh6W6U4OLl6lk7BZ7BLisIzM9dG1R50zUk9C/M=
+github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0/go.mod h1:YL1xnZ6QejvQHWJrX/AvhFl4WW4rqHVoKspWNVwFk0M=
+github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc=
+github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg=
+github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY=
+github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY=
 github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 h1:u/LLAOFgsMv7HmNL4Qufg58y+qElGOt5qv0z1mURkRY=
 github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0/go.mod h1:2e8rMJtl2+2j+HXbTBwnyGpm5Nou7KhvSfxOq8JpTag=
 github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=

From 2a793064f310a26f84bbcbba2c6d416ee58581e0 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 7 Jan 2025 09:59:56 +0800
Subject: [PATCH 20/22] fix format

---
 sync_diff_inspector/diff/diff_test.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sync_diff_inspector/diff/diff_test.go b/sync_diff_inspector/diff/diff_test.go
index e8251a13343..a85c8a817b4 100644
--- a/sync_diff_inspector/diff/diff_test.go
+++ b/sync_diff_inspector/diff/diff_test.go
@@ -103,5 +103,4 @@ func TestGetSnapshot(t *testing.T) {
 		val := GetSnapshot(cs.latestSnapshot, cs.snapshot, conn)
 		require.Equal(t, cs.expected, val, "case %d", i)
 	}
-
 }

From 4e785cbacd6d8d9260bb2424a66772d59e75a066 Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 7 Jan 2025 10:23:18 +0800
Subject: [PATCH 21/22] fix format

---
 .../tests/_utils/check_contains               |  10 +-
 .../tests/_utils/check_contains_count         |  10 +-
 .../tests/_utils/check_contains_regex         |  10 +-
 .../tests/_utils/check_db_status              |  16 +--
 .../tests/_utils/check_not_contains           |  12 +-
 sync_diff_inspector/tests/importer/run.sh     |  26 ++--
 sync_diff_inspector/tests/run.sh              | 128 +++++++++---------
 .../sync_diff_inspector/checkpoint/run.sh     |  63 ++++-----
 .../sync_diff_inspector/expression/run.sh     |  10 +-
 .../tests/sync_diff_inspector/json/run.sh     |  16 +--
 .../tests/sync_diff_inspector/run.sh          |  19 ++-
 .../tests/sync_diff_inspector/shard/run.sh    |  28 ++--
 .../tests/sync_diff_inspector/snapshot/run.sh |  13 +-
 .../sync_diff_inspector/table_config/run.sh   |  13 +-
 .../sync_diff_inspector/table_skip/run.sh     |  16 +--
 .../sync_diff_inspector/time_zone/run.sh      |  26 ++--
 .../tests/sync_diff_inspector/tls/run.sh      |   8 +-
 17 files changed, 207 insertions(+), 217 deletions(-)

diff --git a/sync_diff_inspector/tests/_utils/check_contains b/sync_diff_inspector/tests/_utils/check_contains
index 93e7970b76a..651ee15fb5a 100755
--- a/sync_diff_inspector/tests/_utils/check_contains
+++ b/sync_diff_inspector/tests/_utils/check_contains
@@ -7,9 +7,9 @@ set -eu
 OUT_DIR=/tmp/tidb_tools_test
 
 if ! grep -Fq "$1" "$2"; then
-    echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'"
-    echo "____________________________________"
-    cat "$2"
-    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
-    exit 1
+	echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'"
+	echo "____________________________________"
+	cat "$2"
+	echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+	exit 1
 fi
diff --git a/sync_diff_inspector/tests/_utils/check_contains_count b/sync_diff_inspector/tests/_utils/check_contains_count
index 8308512d789..79dd3c79e3b 100755
--- a/sync_diff_inspector/tests/_utils/check_contains_count
+++ b/sync_diff_inspector/tests/_utils/check_contains_count
@@ -10,9 +10,9 @@ OUT_DIR=/tmp/tidb_tools_test
 count=$(grep -F "$1" "$2" | wc -l)
 
 if [ "$count" -ne "$3" ]; then
-    echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1' $3 times"
-    echo "____________________________________"
-    cat "$2"
-    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
-    exit 1
+	echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1' $3 times"
+	echo "____________________________________"
+	cat "$2"
+	echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+	exit 1
 fi
diff --git a/sync_diff_inspector/tests/_utils/check_contains_regex b/sync_diff_inspector/tests/_utils/check_contains_regex
index ce498abbec2..c8d780a6cc5 100755
--- a/sync_diff_inspector/tests/_utils/check_contains_regex
+++ b/sync_diff_inspector/tests/_utils/check_contains_regex
@@ -7,9 +7,9 @@ set -eu
 OUT_DIR=/tmp/tidb_tools_test
 
 if ! grep -q "$1" "$2"; then
-    echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'"
-    echo "____________________________________"
-    cat "$2"
-    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
-    exit 1
+	echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'"
+	echo "____________________________________"
+	cat "$2"
+	echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+	exit 1
 fi
diff --git a/sync_diff_inspector/tests/_utils/check_db_status b/sync_diff_inspector/tests/_utils/check_db_status
index 8dc75739c55..09945fff4e7 100755
--- a/sync_diff_inspector/tests/_utils/check_db_status
+++ b/sync_diff_inspector/tests/_utils/check_db_status
@@ -4,16 +4,14 @@
 # argument 2 is the port
 # argument 3 is the database service's name
 
-for i in {1..20}
-do
-    if mysqladmin -h "$1" -P "$2" -u root --default-character-set utf8 ping > /dev/null 2>&1
-    then
-        echo "$3 is alive"
-        exit 0
-    fi
+for i in {1..20}; do
+	if mysqladmin -h "$1" -P "$2" -u root --default-character-set utf8 ping >/dev/null 2>&1; then
+		echo "$3 is alive"
+		exit 0
+	fi
 
-    echo "$3 is not alive, will try again"
-    sleep 2
+	echo "$3 is not alive, will try again"
+	sleep 2
 done
 
 echo "$3 is not alive"
diff --git a/sync_diff_inspector/tests/_utils/check_not_contains b/sync_diff_inspector/tests/_utils/check_not_contains
index 43fd007ad5f..915fcd1d603 100755
--- a/sync_diff_inspector/tests/_utils/check_not_contains
+++ b/sync_diff_inspector/tests/_utils/check_not_contains
@@ -7,9 +7,9 @@ set -eu
 OUT_DIR=/tmp/tidb_binlog_test
 
 if grep -Fq "$1" "$2"; then
-    echo "TEST FAILED: '$2' CONTAIN '$1'"
-    echo "____________________________________"
-    cat "$2"
-    echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
-    exit 1
-fi
\ No newline at end of file
+	echo "TEST FAILED: '$2' CONTAIN '$1'"
+	echo "____________________________________"
+	cat "$2"
+	echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
+	exit 1
+fi
diff --git a/sync_diff_inspector/tests/importer/run.sh b/sync_diff_inspector/tests/importer/run.sh
index b82a2ef1575..ef2b2527a9d 100644
--- a/sync_diff_inspector/tests/importer/run.sh
+++ b/sync_diff_inspector/tests/importer/run.sh
@@ -4,23 +4,23 @@ TEST_DATABASE_NAME=checker_test
 IMPORT_EXEC="../bin/importer -c 1 -h ${MYSQL_HOST} -P ${MYSQL_PORT} -D ${TEST_DATABASE_NAME}"
 MYSQL_EXEC="mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root"
 
-init(){
-    check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "."
-    ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};"
-    ${MYSQL_EXEC} -e "create database ${TEST_DATABASE_NAME};"
+init() {
+	check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "."
+	${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};"
+	${MYSQL_EXEC} -e "create database ${TEST_DATABASE_NAME};"
 }
 
-destroy(){
-    ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};"
+destroy() {
+	${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};"
 }
 
-testImporter(){
-    ${IMPORT_EXEC} -c 1 -n 10 -t "$1" -i "$2"
-    RESULT=`${MYSQL_EXEC} -e "$3" | sed -n '2p'`
-    if [[ "${RESULT}" != "$4" ]]; then
-        echo "Test importer failed: $1"
-        exit 1
-    fi
+testImporter() {
+	${IMPORT_EXEC} -c 1 -n 10 -t "$1" -i "$2"
+	RESULT=$(${MYSQL_EXEC} -e "$3" | sed -n '2p')
+	if [[ "${RESULT}" != "$4" ]]; then
+		echo "Test importer failed: $1"
+		exit 1
+	fi
 }
 
 set -e
diff --git a/sync_diff_inspector/tests/run.sh b/sync_diff_inspector/tests/run.sh
index 7d0e04a1a1f..f5f54536b88 100755
--- a/sync_diff_inspector/tests/run.sh
+++ b/sync_diff_inspector/tests/run.sh
@@ -6,12 +6,12 @@ OUT_DIR=/tmp/tidb_tools_test
 
 # assign default value to mysql config
 if [[ -z ${MYSQL_HOST+x} ]]; then
-    echo "set MYSQL_HOST as default value \"127.0.0.1\""
-    export MYSQL_HOST="127.0.0.1"
+	echo "set MYSQL_HOST as default value \"127.0.0.1\""
+	export MYSQL_HOST="127.0.0.1"
 fi
 if [[ -z ${MYSQL_PORT+x} ]]; then
-    echo "set MYSQL_PORT as default value 3306"
-    export MYSQL_PORT=3306
+	echo "set MYSQL_PORT as default value 3306"
+	export MYSQL_PORT=3306
 fi
 
 mkdir -p $OUT_DIR || true
@@ -26,26 +26,26 @@ export PATH=$PATH:$(dirname $pwd)/bin
 rm -rf $OUT_DIR || true
 
 stop_services() {
-    killall -9 tikv-server || true
-    killall -9 pd-server || true
-    killall -9 tidb-server || true
+	killall -9 tikv-server || true
+	killall -9 pd-server || true
+	killall -9 tidb-server || true
 }
 
 start_services() {
-    stop_services
-
-    echo "Starting PD..."
-    pd-server \
-        --client-urls http://127.0.0.1:2379 \
-        --log-file "$OUT_DIR/pd.log" \
-        --data-dir "$OUT_DIR/pd" &
-    # wait until PD is online...
-    while ! curl -o /dev/null -sf http://127.0.0.1:2379/pd/api/v1/version; do
-        sleep 1
-    done
-
-    # Tries to limit the max number of open files under the system limit
-    cat - > "$OUT_DIR/tikv-config.toml" <<EOF
+	stop_services
+
+	echo "Starting PD..."
+	pd-server \
+		--client-urls http://127.0.0.1:2379 \
+		--log-file "$OUT_DIR/pd.log" \
+		--data-dir "$OUT_DIR/pd" &
+	# wait until PD is online...
+	while ! curl -o /dev/null -sf http://127.0.0.1:2379/pd/api/v1/version; do
+		sleep 1
+	done
+
+	# Tries to limit the max number of open files under the system limit
+	cat - >"$OUT_DIR/tikv-config.toml" <<EOF
 [rocksdb]
 max-open-files = 4096
 [raftdb]
@@ -55,44 +55,44 @@ max-open-files = 4096
 sync-log = false
 EOF
 
-    echo "Starting TiKV..."
-    tikv-server \
-        --pd 127.0.0.1:2379 \
-        -A 127.0.0.1:20160 \
-        --log-file "$OUT_DIR/tikv.log" \
-        -C "$OUT_DIR/tikv-config.toml" \
-        -s "$OUT_DIR/tikv" &
-    sleep 2
-
-    # support tls connection
-    cat - > "$OUT_DIR/tidb-config.toml" <<EOF
+	echo "Starting TiKV..."
+	tikv-server \
+		--pd 127.0.0.1:2379 \
+		-A 127.0.0.1:20160 \
+		--log-file "$OUT_DIR/tikv.log" \
+		-C "$OUT_DIR/tikv-config.toml" \
+		-s "$OUT_DIR/tikv" &
+	sleep 2
+
+	# support tls connection
+	cat - >"$OUT_DIR/tidb-config.toml" <<EOF
 [security]
 ssl-ca = "$pwd/conf/root.crt"
 ssl-cert = "$pwd/conf/tidb.crt"
 ssl-key = "$pwd/conf/tidb.key"
 EOF
 
-    echo "Starting TiDB..."
-    tidb-server \
-        -P 4000 \
-        --store tikv \
-        --path 127.0.0.1:2379 \
-        --config "$OUT_DIR/tidb-config.toml" \
-        --log-file "$OUT_DIR/tidb.log" &
-
-    echo "Verifying TiDB is started..."
-    check_db_status "127.0.0.1" 4000 "tidb" "$OUT_DIR/tidb.log"
-
-    echo "Starting Upstream TiDB..."
-    tidb-server \
-        -P 4001 \
-        --path=$OUT_DIR/tidb \
-        --status=20080 \
-        --log-file "$OUT_DIR/down_tidb.log" \
-        -socket "$OUT_DIR/down_tidb.sock" &
-
-    echo "Verifying Upstream TiDB is started..."
-    check_db_status "127.0.0.1" 4001 "tidb" "$OUT_DIR/down_tidb.log"
+	echo "Starting TiDB..."
+	tidb-server \
+		-P 4000 \
+		--store tikv \
+		--path 127.0.0.1:2379 \
+		--config "$OUT_DIR/tidb-config.toml" \
+		--log-file "$OUT_DIR/tidb.log" &
+
+	echo "Verifying TiDB is started..."
+	check_db_status "127.0.0.1" 4000 "tidb" "$OUT_DIR/tidb.log"
+
+	echo "Starting Upstream TiDB..."
+	tidb-server \
+		-P 4001 \
+		--path=$OUT_DIR/tidb \
+		--status=20080 \
+		--log-file "$OUT_DIR/down_tidb.log" \
+		-socket "$OUT_DIR/down_tidb.sock" &
+
+	echo "Verifying Upstream TiDB is started..."
+	check_db_status "127.0.0.1" 4001 "tidb" "$OUT_DIR/down_tidb.log"
 }
 
 trap stop_services EXIT
@@ -102,17 +102,17 @@ start_services
 do_case=""
 
 for script in ./*/run.sh; do
-    test_name="$(basename "$(dirname "$script")")"
-    if [[ $do_case != "" && $test_name != $do_case ]]; then
-        continue
-    fi
-    echo "*******************************************"
-    echo "Running test $script..."
-    echo "*******************************************"
-    PATH="$pwd/../bin:$pwd/_utils:$PATH" \
-    OUT_DIR=$OUT_DIR \
-    TEST_NAME=$test_name \
-    sh "$script"
+	test_name="$(basename "$(dirname "$script")")"
+	if [[ $do_case != "" && $test_name != $do_case ]]; then
+		continue
+	fi
+	echo "*******************************************"
+	echo "Running test $script..."
+	echo "*******************************************"
+	PATH="$pwd/../bin:$pwd/_utils:$PATH" \
+		OUT_DIR=$OUT_DIR \
+		TEST_NAME=$test_name \
+		sh "$script"
 done
 
 # with color
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh
index 9aa782c7735..ac2a1d53016 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh
@@ -10,16 +10,16 @@ mkdir -p $OUT_DIR
 
 # create table diff_test.test(`table` int, aa int, b varchar(10), c float, d datetime, primary key(a), key(aa));
 
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
 
 echo "================test bucket checkpoint================="
 echo "---------1. chunk is in the last of the bucket---------"
 export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/check-one-bucket=return();\
 github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\
 main/wait-for-checkpoint=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
-# Save the last chunk's info, 
+# Save the last chunk's info,
 # to which we will check whether the first chunk's info is next in the next running.
 last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}')
 echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4
@@ -31,23 +31,22 @@ OLD_IFS="$IFS"
 IFS=":"
 last_chunk_index_array=($last_chunk_index)
 IFS="$OLD_IFS"
-for s in ${last_chunk_index_array[@]}
-do
-echo "$s"
+for s in ${last_chunk_index_array[@]}; do
+	echo "$s"
 done
 # chunkIndex should be the last Index
 [[ $((${last_chunk_index_array[2]} + 1)) -eq ${last_chunk_index_array[3]} ]] || exit 1
-# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. 
+# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running.
 bucket_index_right=$(($(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}') + 1))
 echo $bucket_index_right
 
 rm -f $OUT_DIR/sync_diff.log
 export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output
 first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1')
-echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound
+echo $first_chunk_info | awk -F '=' '{print $1}' >$OUT_DIR/first_chunk_bound
 cat $OUT_DIR/first_chunk_bound
-echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index
+echo $first_chunk_info | awk -F '=' '{print $3}' >$OUT_DIR/first_chunk_index
 cat $OUT_DIR/first_chunk_index
 # Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before.
 check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound
@@ -60,9 +59,9 @@ export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter
 github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\
 github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\
 main/wait-for-checkpoint=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
-# Save the last chunk's info, 
+# Save the last chunk's info,
 # to which we will check whether the first chunk's info is next in the next running.
 last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}')
 echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4
@@ -74,31 +73,29 @@ OLD_IFS="$IFS"
 IFS=":"
 last_chunk_index_array=($last_chunk_index)
 IFS="$OLD_IFS"
-for s in ${last_chunk_index_array[@]}
-do
-echo "$s"
+for s in ${last_chunk_index_array[@]}; do
+	echo "$s"
 done
 # chunkIndex should be the last Index
 [[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1
-# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. 
+# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running.
 bucket_index_left=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $1}')
 bucket_index_right=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}')
 echo "${bucket_index_left}-${bucket_index_right}"
 
 rm -f $OUT_DIR/sync_diff.log
 export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output
 first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1')
-echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound
+echo $first_chunk_info | awk -F '=' '{print $1}' >$OUT_DIR/first_chunk_bound
 cat $OUT_DIR/first_chunk_bound
-echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index
+echo $first_chunk_info | awk -F '=' '{print $3}' >$OUT_DIR/first_chunk_index
 cat $OUT_DIR/first_chunk_index
 # Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before.
 check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound
 check_contains_regex ".:${bucket_index_left}-${bucket_index_right}:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index
 
-
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_rand.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_rand.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
 
 echo "================test random checkpoint================="
 echo "--------------1. chunk is in the middle----------------"
@@ -107,9 +104,9 @@ mkdir -p $OUT_DIR
 export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\
 github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\
 main/wait-for-checkpoint=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
-# Save the last chunk's info, 
+# Save the last chunk's info,
 # to which we will check whether the first chunk's info is next in the next running.
 last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}')
 echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4
@@ -121,35 +118,33 @@ OLD_IFS="$IFS"
 IFS=":"
 last_chunk_index_array=($last_chunk_index)
 IFS="$OLD_IFS"
-for s in ${last_chunk_index_array[@]}
-do
-echo "$s"
+for s in ${last_chunk_index_array[@]}; do
+	echo "$s"
 done
 # chunkIndex should be the last Index
 [[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1
 
 rm -f $OUT_DIR/sync_diff.log
 export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output
 first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1')
-echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound
+echo $first_chunk_info | awk -F '=' '{print $1}' >$OUT_DIR/first_chunk_bound
 cat $OUT_DIR/first_chunk_bound
-echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index
+echo $first_chunk_info | awk -F '=' '{print $3}' >$OUT_DIR/first_chunk_index
 cat $OUT_DIR/first_chunk_index
 # Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before.
 check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound
 check_contains_regex ".:0-0:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index
 
-
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_continous.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_continous.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
 echo "================test checkpoint continous================="
 # add a table have different table-structs of upstream and downstream
 # so data-check will be skipped
 mysql -uroot -h 127.0.0.1 -P 4000 -e "create table IF NOT EXISTS diff_test.ttt(a int, aa int, primary key(a), key(aa));"
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table IF NOT EXISTS diff_test.ttt(a int, b int, primary key(a), key(b));"
 export GO_FAILPOINTS="main/wait-for-checkpoint=return()"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output || true
-grep 'save checkpoint' $OUT_DIR/sync_diff.log | awk 'END {print}' > $OUT_DIR/checkpoint_info
+sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output || true
+grep 'save checkpoint' $OUT_DIR/sync_diff.log | awk 'END {print}' >$OUT_DIR/checkpoint_info
 check_not_contains 'has-upper\":true' $OUT_DIR/checkpoint_info
 
-export GO_FAILPOINTS=""
\ No newline at end of file
+export GO_FAILPOINTS=""
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh
index 612fc24cbe8..77336b7461a 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh
@@ -11,13 +11,13 @@ mkdir -p $OUT_DIR
 mkdir -p $FIX_DIR
 
 for port in 4000 4001; do
-  mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists expression_test;"
-  mysql -uroot -h 127.0.0.1 -P $port -e "create table expression_test.diff(\`a\`\`;sad\` int, id int);"
-  mysql -uroot -h 127.0.0.1 -P $port -e "alter table expression_test.diff add index i1((\`a\`\`;sad\` + 1 + \`a\`\`;sad\`));"
-  mysql -uroot -h 127.0.0.1 -P $port -e "insert into expression_test.diff values (1,1),(2,2),(3,3);"
+	mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists expression_test;"
+	mysql -uroot -h 127.0.0.1 -P $port -e "create table expression_test.diff(\`a\`\`;sad\` int, id int);"
+	mysql -uroot -h 127.0.0.1 -P $port -e "alter table expression_test.diff add index i1((\`a\`\`;sad\` + 1 + \`a\`\`;sad\`));"
+	mysql -uroot -h 127.0.0.1 -P $port -e "insert into expression_test.diff values (1,1),(2,2),(3,3);"
 done
 
 echo "check result should be pass"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/expression_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/expression_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh
index 30824a26fdf..16312c44a9a 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh
@@ -8,34 +8,34 @@ OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
 rm -rf $OUT_DIR
 mkdir -p $OUT_DIR
 
-mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} <./data.sql
 
 # tidb
-mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql
+mysql -uroot -h 127.0.0.1 -P 4000 <./data.sql
 
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
-cat config.toml | sed 's/export-fix-sql = true/export-fix-sql = false/' > config_nofix.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
+cat config.toml | sed 's/export-fix-sql = true/export-fix-sql = false/' >config_nofix.toml
 diff config.toml config_nofix.toml || true
 
 echo "compare json tables, check result should be pass"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/json_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "compare json tables without fixsql, check result should be pass"
-sync_diff_inspector --config=./config_nofix.toml > $OUT_DIR/json_diff.output
+sync_diff_inspector --config=./config_nofix.toml >$OUT_DIR/json_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "update data to make it different, and data should not be equal"
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": true, \"name\":\"aaa\"}');"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": false, \"name\":\"aaa\"}');"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/json_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "update data to make it different, and downstream json data is NULL"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "replace into json_test.test values (5, NULL);"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/json_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/run.sh
index 338f57baec0..cc82282f31b 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/run.sh
@@ -10,7 +10,6 @@ check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "."
 BASE_DIR=/tmp/tidb_tools_test/sync_diff_inspector
 OUT_DIR=$BASE_DIR/output
 
-
 mkdir -p $OUT_DIR || true
 
 echo "use importer to generate test data"
@@ -29,7 +28,7 @@ mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -e "select * from diff_test.test
 
 echo "use sync_diff_inspector to compare data"
 # sync diff tidb-tidb
-sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output
+sync_diff_inspector --config=./config_base_tidb.toml >$OUT_DIR/diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 
 echo "analyze table, and will use tidb's statistical information to split chunks"
@@ -40,28 +39,28 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "analyze table diff_test.test"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where aa > 1"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where \`table\` > 1"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "show stats_buckets"
-sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output
+sync_diff_inspector --config=./config_base_tidb.toml >$OUT_DIR/diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 check_not_contains "split range by random" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "test 'exclude-tables' config"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "create table if not exists diff_test.should_not_compare (id int)"
-sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.log
+sync_diff_inspector --config=./config_base_tidb.toml >$OUT_DIR/diff.log
 # doesn't contain the table's result in check report
 check_not_contains "[table=should_not_compare]" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 # sync diff tidb-mysql
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_mysql.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config_base_mysql_.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_mysql.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config_base_mysql_.toml
 sync_diff_inspector --config=./config_base_mysql_.toml #> $OUT_DIR/diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 for script in ./*/run.sh; do
-    test_name="$(basename "$(dirname "$script")")"
-    echo "---------------------------------------"
-    echo "Running test $script..."
-    echo "---------------------------------------"
-    sh "$script"
+	test_name="$(basename "$(dirname "$script")")"
+	echo "---------------------------------------"
+	echo "Running test $script..."
+	echo "---------------------------------------"
+	sh "$script"
 done
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh
index 09fdbfa041a..729059c5cc7 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh
@@ -22,16 +22,16 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "create database if not exists shard_test;"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "create table shard_test.test(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into shard_test.test (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test;"
 
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
 
 echo "compare sharding tables with one table in downstream, check result should be pass"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/shard_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "update data in one shard table, and data should not be equal"
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "update shard_test.test1 set b = 'abc' limit 1"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/shard_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
@@ -66,8 +66,8 @@ mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_1.Tb
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_1.Tbl values (1,\"hello1\",1);"
 
 echo "test router 1: normal rule"
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_1.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
-sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_1.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
+sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true
 check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
@@ -79,8 +79,8 @@ check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_di
 rm -rf $OUT_DIR/*
 
 echo "test router 2: only schema rule"
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_2.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
-sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_2.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
+sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true
 check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
@@ -93,8 +93,8 @@ check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_di
 rm -rf $OUT_DIR/*
 
 echo "test router 3: other rule"
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_3.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
-sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_3.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
+sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true
 check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
@@ -106,8 +106,8 @@ check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.l
 rm -rf $OUT_DIR/*
 
 echo "test router 4: no rule"
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_4.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
-sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_4.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
+sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true
 check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
@@ -119,8 +119,8 @@ check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.l
 rm -rf $OUT_DIR/*
 
 echo "test router 5: regex rule"
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_5.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
-sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_5.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
+sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true
 check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log
 check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log
@@ -131,4 +131,4 @@ check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.l
 check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
-echo "shard test passed"
\ No newline at end of file
+echo "shard test passed"
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh
index cbd9e765968..c34c78b81ec 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh
@@ -1,4 +1,3 @@
-
 #!/bin/sh
 
 set -e
@@ -12,15 +11,15 @@ rm -rf $FIX_DIR
 mkdir -p $OUT_DIR
 mkdir -p $FIX_DIR
 
-mysql -uroot -h 127.0.0.1 -P 4000 -e "show master status" > $OUT_DIR/ts.log
+mysql -uroot -h 127.0.0.1 -P 4000 -e "show master status" >$OUT_DIR/ts.log
 #cat $OUT_DIR/sync_diff.log
-ts=`grep -oE "[0-9]+" $OUT_DIR/ts.log`
+ts=$(grep -oE "[0-9]+" $OUT_DIR/ts.log)
 echo "get ts $ts"
 
 echo "delete one data, diff should not passed"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "delete from diff_test.test limit 1"
 
-sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log || true
+sync_diff_inspector --config=./config_base.toml >$OUT_DIR/snapshot_diff.log || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 # move the fix sql file to $FIX_DIR
 mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/
@@ -31,7 +30,7 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ANSI_QUOTES';"
 sleep 10
 mysql -uroot -h 127.0.0.1 -P 4000 -e "show variables like '%sql_mode%'"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "show create table diff_test.test"
-sed "s/#snapshot#/snapshot = \"${ts}\"/g" config_base.toml > config.toml
+sed "s/#snapshot#/snapshot = \"${ts}\"/g" config_base.toml >config.toml
 echo "use snapshot compare data, data should be equal"
 sync_diff_inspector --config=./config.toml #> $OUT_DIR/snapshot_diff.log
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
@@ -39,11 +38,11 @@ rm -rf $OUT_DIR/*
 
 echo "execute fix.sql and use base config, and then compare data, data should be equal"
 cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000
-sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log
+sync_diff_inspector --config=./config_base.toml >$OUT_DIR/snapshot_diff.log
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 # reset sql mode
 mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';"
 
-echo "snapshot test passed"
\ No newline at end of file
+echo "snapshot test passed"
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh
index ad541dacb6c..fa5b97a702d 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh
@@ -1,4 +1,3 @@
-
 #!/bin/sh
 
 set -e
@@ -15,27 +14,27 @@ mkdir -p $FIX_DIR
 echo "update data in column b (WHERE \`table\` >= 10 AND \`table\` <= 200), data should not be equal"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "update diff_test.test set b = 'abc' where \`table\` >= 10 AND \`table\` <= 200"
 
-sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/ignore_column_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 # move the fix sql file to $FIX_DIR
 mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/
 rm -rf $OUT_DIR/*
 
 echo "ignore check column b, check result should be pass"
-sed 's/\[""\]#IGNORE/["b"]/g' config.toml > config_.toml
-sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true
+sed 's/\[""\]#IGNORE/["b"]/g' config.toml >config_.toml
+sync_diff_inspector --config=./config_.toml >$OUT_DIR/ignore_column_diff.output || true
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "set range a < 10 OR a > 200, check result should be pass"
-sed 's/"TRUE"#RANGE"a < 10 OR a > 200"/"`table` < 10 OR `table` > 200"/g' config.toml > config_.toml
-sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true
+sed 's/"TRUE"#RANGE"a < 10 OR a > 200"/"`table` < 10 OR `table` > 200"/g' config.toml >config_.toml
+sync_diff_inspector --config=./config_.toml >$OUT_DIR/ignore_column_diff.output || true
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "execute fix.sql and use base config, and then compare data, data should be equal"
 cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000
-sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.log || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/ignore_column_diff.log || true
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh
index 441f7045806..4ee4e3b98b1 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh
@@ -8,15 +8,15 @@ OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output
 rm -rf $OUT_DIR
 mkdir -p $OUT_DIR
 
-mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql
+mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} <./data.sql
 
 # tidb
-mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql
+mysql -uroot -h 127.0.0.1 -P 4000 <./data.sql
 
-sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml
+sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml
 
 echo "compare tables, check result should be pass"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
@@ -25,7 +25,7 @@ mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t2 (a
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t2 values (3,3);"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t3 (a int, b int, primary key(a));"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t3 values (1,1);"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true
 check_contains "check pass" $OUT_DIR/sync_diff.log
 check_contains "Comparing the table data of \`skip_test\`.\`t2\` ...skipped" $OUT_DIR/table_skip_diff.output
 check_contains "Comparing the table data of \`skip_test\`.\`t3\` ...skipped" $OUT_DIR/table_skip_diff.output
@@ -38,7 +38,7 @@ rm -rf $OUT_DIR/*
 
 echo "make some table data not equal"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t1 values (4,4);"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 check_contains "| \`skip_test\`.\`t1\` | succeed | true               | +0/-1          |       1 |         2 |" $OUT_DIR/summary.txt
 rm -rf $OUT_DIR/*
@@ -47,7 +47,7 @@ echo "make some table structure not equal"
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t4 (a int, b int, c int,primary key(a));"
 mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t4 values (1,1,1);"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t4 (a int, b int, primary key(a));"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 check_contains "| \`skip_test\`.\`t4\` | succeed | false              | +0/-0          |       0 |         0 |" $OUT_DIR/summary.txt
 check_contains "A total of 5 tables have been compared, 1 tables finished, 2 tables failed, 2 tables skipped" $OUT_DIR/table_skip_diff.output
@@ -55,7 +55,7 @@ cat $OUT_DIR/summary.txt
 rm -rf $OUT_DIR/*
 
 echo "test router case"
-sync_diff_inspector --config=./config_router.toml > $OUT_DIR/table_skip_diff.output || true
+sync_diff_inspector --config=./config_router.toml >$OUT_DIR/table_skip_diff.output || true
 check_contains "check pass" $OUT_DIR/sync_diff.log
 check_contains "| \`skip_test\`.\`t5\` | skipped | false              | +3/-0          |       3 |         0 |" $OUT_DIR/summary.txt
 check_contains "The data of \`skip_test\`.\`t5\` does not exist in downstream database" $OUT_DIR/table_skip_diff.output
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh
index b96e0d895d8..05b015ec8f4 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh
@@ -14,44 +14,44 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@GLOBAL.SQL_MODE='ONLY_FULL_GROUP_BY,
 sleep 3
 
 for port in 4000 4001; do
-  mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists tz_test"
-  mysql -uroot -h 127.0.0.1 -P $port -e "create table tz_test.diff(id int, dt datetime, ts timestamp);"
-  mysql -uroot -h 127.0.0.1 -P $port -e "insert into tz_test.diff values (1, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
-  mysql -uroot -h 127.0.0.1 -P $port -e "set @@session.time_zone = \"-07:00\"; insert into tz_test.diff values (2, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
+	mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists tz_test"
+	mysql -uroot -h 127.0.0.1 -P $port -e "create table tz_test.diff(id int, dt datetime, ts timestamp);"
+	mysql -uroot -h 127.0.0.1 -P $port -e "insert into tz_test.diff values (1, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
+	mysql -uroot -h 127.0.0.1 -P $port -e "set @@session.time_zone = \"-07:00\"; insert into tz_test.diff values (2, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
 done
 
 echo "check with the same time_zone, check result should be pass"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 # check upstream and downstream time_zone
-mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = '+08:00'";
-mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = '+00:00'";
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = '+08:00'"
+mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = '+00:00'"
 sleep 5
 
 echo "check with different time_zone, check result should be pass again"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
 
 echo "set different rows, check result should be failed"
 mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@session.time_zone = '-06:00'; insert into tz_test.diff values (4, '2020-05-17 09:12:13', '2020-05-17 09:12:13');"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-05:00'; insert into tz_test.diff values (3, '2020-05-17 10:12:13', '2020-05-17 10:12:13');"
-sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output || true
+sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output || true
 check_contains "check failed" $OUT_DIR/sync_diff.log
 mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/
 rm -rf $OUT_DIR/*
 
 echo "fix the rows, check result should be pass"
 cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000
-sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output
+sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log
 rm -rf $OUT_DIR/*
-mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-06:00'; select ts from tz_test.diff where id = 4 or id = 3;" > $OUT_DIR/tmp_sql_timezone
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-06:00'; select ts from tz_test.diff where id = 4 or id = 3;" >$OUT_DIR/tmp_sql_timezone
 check_contains "2020-05-17 09:12:13" $OUT_DIR/tmp_sql_timezone
 check_not_contains "2020-05-17 10:12:13" $OUT_DIR/tmp_sql_timezone
 
 # reset time_zone
-mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = 'SYSTEM'";
-mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = 'SYSTEM'";
+mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = 'SYSTEM'"
+mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = 'SYSTEM'"
diff --git a/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh
index 03ed6cbf3e1..cfec9d28126 100644
--- a/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh
+++ b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh
@@ -4,7 +4,7 @@ set -ex
 
 cd "$(dirname "$0")"
 
-CONF_PATH=`cd ../../conf && pwd`
+CONF_PATH=$(cd ../../conf && pwd)
 CA_PATH="$CONF_PATH/root.crt"
 CERT_PATH="$CONF_PATH/client.crt"
 KEY_PATH="$CONF_PATH/client.key"
@@ -15,13 +15,13 @@ mkdir -p $OUT_DIR
 # create user for test tls
 mysql -uroot -h 127.0.0.1 -P 4000 -e "create user 'root_tls'@'%' identified by '' require X509;"
 mysql -uroot -h 127.0.0.1 -P 4000 -e "grant all privileges on *.* to 'root_tls'@'%';"
-mysql -uroot_tls -h 127.0.0.1 -P 4000 --ssl-ca "$CA_PATH" --ssl-cert "$CERT_PATH" --ssl-key "$KEY_PATH"  -e "SHOW STATUS LIKE \"%Ssl%\";"
+mysql -uroot_tls -h 127.0.0.1 -P 4000 --ssl-ca "$CA_PATH" --ssl-cert "$CERT_PATH" --ssl-key "$KEY_PATH" -e "SHOW STATUS LIKE \"%Ssl%\";"
 
 echo "use sync_diff_inspector to compare data"
 # sync diff tidb-tidb
 CA_PATH_REG=$(echo ${CA_PATH} | sed 's/\//\\\//g')
 CERT_PATH_REG=$(echo ${CERT_PATH} | sed 's/\//\\\//g')
 KEY_PATH_REG=$(echo ${KEY_PATH} | sed 's/\//\\\//g')
-sed "s/\"ca-path\"#CAPATH/\"${CA_PATH_REG}\"/g" config.toml | sed "s/\"cert-path\"#CERTPATH/\"${CERT_PATH_REG}\"/g" | sed "s/\"key-path\"#KEYPATH/\"${KEY_PATH_REG}\"/g" > config_.toml
-sync_diff_inspector --config=./config_.toml > $OUT_DIR/diff.output || (cat $OUT_DIR/diff.output && exit 1)
+sed "s/\"ca-path\"#CAPATH/\"${CA_PATH_REG}\"/g" config.toml | sed "s/\"cert-path\"#CERTPATH/\"${CERT_PATH_REG}\"/g" | sed "s/\"key-path\"#KEYPATH/\"${KEY_PATH_REG}\"/g" >config_.toml
+sync_diff_inspector --config=./config_.toml >$OUT_DIR/diff.output || (cat $OUT_DIR/diff.output && exit 1)
 check_contains "check pass!!!" $OUT_DIR/sync_diff.log

From ed2a5dc4c57dd58c5fe2e3c3baab84e478976dac Mon Sep 17 00:00:00 2001
From: Ruihao Chen <ruihao.chen@pingcap.cn>
Date: Tue, 7 Jan 2025 10:48:00 +0800
Subject: [PATCH 22/22] fix lint

---
 sync_diff_inspector/diff/diff.go | 2 +-
 sync_diff_inspector/utils/pd.go  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go
index 3feeb9e9de1..2630897351f 100644
--- a/sync_diff_inspector/diff/diff.go
+++ b/sync_diff_inspector/diff/diff.go
@@ -50,7 +50,7 @@ func fileExists(name string) bool {
 	return !os.IsNotExist(err)
 }
 
-// GetSnapsnot get the snapshot
+// GetSnapshot get the snapshot
 func GetSnapshot(latestSnap []string, snap string, db *sql.DB) string {
 	if len(latestSnap) != 1 {
 		return snap
diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go
index af7947f1baf..c3a4f3cb845 100644
--- a/sync_diff_inspector/utils/pd.go
+++ b/sync_diff_inspector/utils/pd.go
@@ -250,6 +250,7 @@ func updateServiceSafePoint(ctx context.Context, pdClient pd.Client, snapshotTS
 	}
 }
 
+// ParseSnapshotToTSO parse snapshot string to TSO
 func ParseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) {
 	snapshotTS, err := strconv.ParseUint(snapshot, 10, 64)
 	if err == nil {