From 1a013f7a0000695c8223eb87f14a2153ed62b5fd Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 11:32:09 +0800 Subject: [PATCH 01/22] Move sync_diff_inspector into tiflow repo --- .github/workflows/dm_binlog_999999.yaml | 6 +- Makefile | 15 +- dm/tests/README.md | 3 +- .../download-compatibility-test-binaries.sh | 3 - .../download-integration-test-binaries.sh | 1 - dm/tests/mariadb_master_down_and_up/case.sh | 1 - dm/tests/mariadb_master_down_and_up/lib.sh | 6 - dm/tests/tiup/lib.sh | 6 - dm/tests/tiup/upgrade-from-v1.sh | 2 - dm/tests/tiup/upgrade-from-v2.sh | 2 - dm/tests/tiup/upgrade-tidb.sh | 2 - dm/tests/upstream_switch/case.sh | 1 - dm/tests/upstream_switch/lib.sh | 6 - go.mod | 1 + go.sum | 2 + scripts/download-sync-diff.sh | 23 - sync_diff_inspector/README.md | 23 + .../checkpoints/checkpoints.go | 243 ++++ .../checkpoints/checkpoints_test.go | 120 ++ sync_diff_inspector/chunk/chunk.go | 464 ++++++++ sync_diff_inspector/chunk/chunk_test.go | 648 ++++++++++ sync_diff_inspector/config/config.go | 642 ++++++++++ sync_diff_inspector/config/config.toml | 73 ++ .../config/config_conflict.toml | 72 ++ sync_diff_inspector/config/config_dm.toml | 31 + .../config/config_sharding.toml | 99 ++ sync_diff_inspector/config/config_test.go | 112 ++ sync_diff_inspector/config/dm.go | 264 ++++ sync_diff_inspector/config/dm_test.go | 72 ++ sync_diff_inspector/config/template.go | 124 ++ sync_diff_inspector/diff/diff.go | 844 +++++++++++++ sync_diff_inspector/main.go | 150 +++ sync_diff_inspector/progress/progress.go | 480 ++++++++ sync_diff_inspector/progress/progress_test.go | 108 ++ sync_diff_inspector/report/report.go | 419 +++++++ sync_diff_inspector/report/report_test.go | 526 ++++++++ sync_diff_inspector/source/chunks_iter.go | 189 +++ .../source/common/common_test.go | 64 + sync_diff_inspector/source/common/conn.go | 74 ++ .../source/common/conn_test.go | 48 + sync_diff_inspector/source/common/rows.go | 101 ++ .../source/common/table_diff.go | 83 ++ sync_diff_inspector/source/mysql_shard.go | 390 ++++++ sync_diff_inspector/source/source.go | 429 +++++++ sync_diff_inspector/source/source_test.go | 955 +++++++++++++++ sync_diff_inspector/source/tidb.go | 285 +++++ sync_diff_inspector/splitter/bucket.go | 365 ++++++ sync_diff_inspector/splitter/index_fields.go | 111 ++ .../splitter/index_fields_test.go | 106 ++ sync_diff_inspector/splitter/limit.go | 259 ++++ sync_diff_inspector/splitter/random.go | 248 ++++ sync_diff_inspector/splitter/splitter.go | 87 ++ sync_diff_inspector/splitter/splitter_test.go | 936 +++++++++++++++ sync_diff_inspector/utils/pd.go | 288 +++++ sync_diff_inspector/utils/table.go | 187 +++ sync_diff_inspector/utils/utils.go | 1059 +++++++++++++++++ sync_diff_inspector/utils/utils_test.go | 688 +++++++++++ 57 files changed, 12479 insertions(+), 67 deletions(-) delete mode 100755 scripts/download-sync-diff.sh create mode 100644 sync_diff_inspector/README.md create mode 100644 sync_diff_inspector/checkpoints/checkpoints.go create mode 100644 sync_diff_inspector/checkpoints/checkpoints_test.go create mode 100644 sync_diff_inspector/chunk/chunk.go create mode 100644 sync_diff_inspector/chunk/chunk_test.go create mode 100644 sync_diff_inspector/config/config.go create mode 100644 sync_diff_inspector/config/config.toml create mode 100644 sync_diff_inspector/config/config_conflict.toml create mode 100644 sync_diff_inspector/config/config_dm.toml create mode 100644 sync_diff_inspector/config/config_sharding.toml create mode 100644 sync_diff_inspector/config/config_test.go create mode 100644 sync_diff_inspector/config/dm.go create mode 100644 sync_diff_inspector/config/dm_test.go create mode 100644 sync_diff_inspector/config/template.go create mode 100644 sync_diff_inspector/diff/diff.go create mode 100644 sync_diff_inspector/main.go create mode 100644 sync_diff_inspector/progress/progress.go create mode 100644 sync_diff_inspector/progress/progress_test.go create mode 100644 sync_diff_inspector/report/report.go create mode 100644 sync_diff_inspector/report/report_test.go create mode 100644 sync_diff_inspector/source/chunks_iter.go create mode 100644 sync_diff_inspector/source/common/common_test.go create mode 100755 sync_diff_inspector/source/common/conn.go create mode 100644 sync_diff_inspector/source/common/conn_test.go create mode 100644 sync_diff_inspector/source/common/rows.go create mode 100644 sync_diff_inspector/source/common/table_diff.go create mode 100644 sync_diff_inspector/source/mysql_shard.go create mode 100644 sync_diff_inspector/source/source.go create mode 100644 sync_diff_inspector/source/source_test.go create mode 100644 sync_diff_inspector/source/tidb.go create mode 100644 sync_diff_inspector/splitter/bucket.go create mode 100644 sync_diff_inspector/splitter/index_fields.go create mode 100644 sync_diff_inspector/splitter/index_fields_test.go create mode 100644 sync_diff_inspector/splitter/limit.go create mode 100644 sync_diff_inspector/splitter/random.go create mode 100644 sync_diff_inspector/splitter/splitter.go create mode 100644 sync_diff_inspector/splitter/splitter_test.go create mode 100644 sync_diff_inspector/utils/pd.go create mode 100644 sync_diff_inspector/utils/table.go create mode 100644 sync_diff_inspector/utils/utils.go create mode 100644 sync_diff_inspector/utils/utils_test.go diff --git a/.github/workflows/dm_binlog_999999.yaml b/.github/workflows/dm_binlog_999999.yaml index 39ed09558fd..6371b773cab 100644 --- a/.github/workflows/dm_binlog_999999.yaml +++ b/.github/workflows/dm_binlog_999999.yaml @@ -44,13 +44,13 @@ jobs: key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }} - name: Build DM binary - run: make dm_integration_test_build + run: | + make dm_integration_test_build + make sync_diff_inspector - name: Setup CI environment run: | docker-compose -f ./dm/tests/binlog_999999/docker-compose.yml up -d - curl http://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz - mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ curl http://download.pingcap.org/tidb-nightly-linux-amd64.tar.gz | tar xz mv tidb-nightly-linux-amd64/bin/tidb-server bin/ curl -O https://dl.min.io/server/minio/release/linux-amd64/minio diff --git a/Makefile b/Makefile index cb0cc65532d..bd71b9d90f9 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ ### Makefile for tiflow -.PHONY: build test check clean fmt cdc kafka_consumer storage_consumer coverage \ +.PHONY: build test check clean fmt sync-diff-inspector cdc kafka_consumer storage_consumer coverage \ integration_test_build integration_test integration_test_mysql integration_test_kafka bank \ kafka_docker_integration_test kafka_docker_integration_test_with_build \ clean_integration_test_containers \ @@ -13,7 +13,7 @@ .DEFAULT_GOAL := default # Adapted from https://www.thapaliya.com/en/writings/well-documented-makefiles/ -help: ## Display this help and any documented user-facing targets. Other undocumented targets may be present in the Makefile. +help: ## Display this help ann any documented user-facing targets. Other undocumented targets may be present in the Makefile. help: @awk 'BEGIN {FS = ": ##"; printf "Usage:\n make \n\nTargets:\n"} /^[a-zA-Z0-9_\.\-\/%]+: ##/ { printf " %-45s %s\n", $$1, $$2 }' $(MAKEFILE_LIST) @@ -136,7 +136,7 @@ dev: check test test: unit_test dm_unit_test engine_unit_test -build: cdc dm engine +build: cdc dm engine sync_diff_inspector check-makefiles: ## Check the makefiles format. Please run this target after the changes are committed. check-makefiles: format-makefiles @@ -158,6 +158,9 @@ build-cdc-with-failpoint: ## Build cdc with failpoint enabled. cdc: $(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc ./cmd/cdc +sync_diff_inspector: + $(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/sync_diff_inspector ./sync_diff_inspector/main.go + kafka_consumer: $(CONSUMER_GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc_kafka_consumer ./cmd/kafka-consumer @@ -218,7 +221,6 @@ check_third_party_binary: @which bin/pd-server @which bin/tiflash @which bin/pd-ctl - @which bin/sync_diff_inspector @which bin/go-ycsb @which bin/etcdctl @which bin/jq @@ -496,7 +498,6 @@ install_test_python_dep: check_third_party_binary_for_dm: @which bin/tidb-server - @which bin/sync_diff_inspector @which mysql @which bin/minio @@ -565,7 +566,6 @@ check_third_party_binary_for_engine: @which mysql || (echo "mysql not found in ${PATH}"; exit 1) @which jq || (echo "jq not found in ${PATH}"; exit 1) @which mc || (echo "mc not found in ${PATH}, you can use 'make bin/mc' and move bin/mc to ${PATH}"; exit 1) - @which bin/sync_diff_inspector || (echo "run 'make bin/sync_diff_inspector' to download it if you need") check_engine_integration_test: ./engine/test/utils/check_case.sh @@ -580,9 +580,6 @@ check_cdc_integration_test: bin/mc: ./scripts/download-mc.sh -bin/sync_diff_inspector: - ./scripts/download-sync-diff.sh - define run_engine_unit_test @echo "running unit test for packages:" $(1) mkdir -p $(ENGINE_TEST_DIR) diff --git a/dm/tests/README.md b/dm/tests/README.md index f72fe191fee..9d3e4432ad5 100644 --- a/dm/tests/README.md +++ b/dm/tests/README.md @@ -4,7 +4,6 @@ 1. The following executables must be copied or generated or linked into these locations. * `bin/tidb-server` can be downloaded from [tidb-master-linux-amd64](https://download.pingcap.org/tidb-master-linux-amd64.tar.gz) or installed by [tiup](https://github.com/pingcap/tiup), you can use the command `find ~/.tiup -name tidb-server` to locate `tidb-server` binary file and copy it - * `bin/sync_diff_inspector` # can be downloaded from [tidb-enterprise-tools-latest-linux-amd64](http://download.pingcap.org/tidb-enterprise-tools-latest-linux-amd64.tar.gz) or build from [source code](https://github.com/pingcap/tidb-tools) * `bin/minio` can be build from (https://github.com/minio/minio) * `bin/dm-master.test` # generated by `make dm_integration_test_build` * `bin/dm-worker.test` # generated by `make dm_integration_test_build` @@ -32,7 +31,7 @@ ### Integration Test -1. Run `make dm_integration_test_build` to generate DM related binary for integration test +1. Run `make dm_integration_test_build` and `make sync_diff_inspector` to generate DM related binary for integration test. 2. Setup two MySQL servers (the first one: 5.6 ~ 5.7; the second one: 8.0.21, suggest you are same as [CI](https://github.com/PingCAP-QE/ci/blob/main/jenkins/pipelines/ci/dm/dm_ghpr_new_test.groovy#L164-L172)) with [binlog enabled first](https://dev.mysql.com/doc/refman/5.7/en/replication-howto-masterbaseconfig.html) and [set `GTID_MODE=ON`](https://dev.mysql.com/doc/refman/5.7/en/replication-mode-change-online-enable-gtids.html), You need set the mysql port and root password according to the following table. diff --git a/dm/tests/download-compatibility-test-binaries.sh b/dm/tests/download-compatibility-test-binaries.sh index cceb8c4432d..8cab339dd39 100755 --- a/dm/tests/download-compatibility-test-binaries.sh +++ b/dm/tests/download-compatibility-test-binaries.sh @@ -70,9 +70,6 @@ color-green "Download binaries..." download "$tidb_download_url" "tidb-server.tar.gz" "tmp/tidb-server.tar.gz" tar -xz -C third_bin bin/tidb-server -f tmp/tidb-server.tar.gz && mv third_bin/bin/tidb-server third_bin/ -download "$sync_diff_inspector_download_url" "tidb-enterprise-tools-nightly-linux-amd64.tar.gz" "tmp/tidb-enterprise-tools-nightly-linux-amd64.tar.gz" -tar -xz -C third_bin tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector -f tmp/tidb-enterprise-tools-nightly-linux-amd64.tar.gz -mv third_bin/tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector third_bin/ && rm -rf third_bin/tidb-enterprise-tools-nightly-linux-amd64 download "$mydumper_download_url" "tidb-enterprise-tools-latest-linux-amd64.tar.gz" "tmp/tidb-enterprise-tools-latest-linux-amd64.tar.gz" tar -xz -C third_bin tidb-enterprise-tools-latest-linux-amd64/bin/mydumper -f tmp/tidb-enterprise-tools-latest-linux-amd64.tar.gz mv third_bin/tidb-enterprise-tools-latest-linux-amd64/bin/mydumper third_bin/ && rm -rf third_bin/tidb-enterprise-tools-latest-linux-amd64 diff --git a/dm/tests/download-integration-test-binaries.sh b/dm/tests/download-integration-test-binaries.sh index 34dab604e85..6086b1c3367 100755 --- a/dm/tests/download-integration-test-binaries.sh +++ b/dm/tests/download-integration-test-binaries.sh @@ -91,7 +91,6 @@ tar -xz -C third_bin 'bin/*' -f tmp/pd-server.tar.gz && mv third_bin/bin/* third download "$tikv_download_url" "tikv-server.tar.gz" "tmp/tikv-server.tar.gz" tar -xz -C third_bin bin/tikv-server -f tmp/tikv-server.tar.gz && mv third_bin/bin/tikv-server third_bin/ download "$tidb_tools_download_url" "tidb-tools.tar.gz" "tmp/tidb-tools.tar.gz" -tar -xz -C third_bin 'bin/sync_diff_inspector' -f tmp/tidb-tools.tar.gz && mv third_bin/bin/sync_diff_inspector third_bin/ download "$minio_download_url" "minio.tar.gz" "tmp/minio.tar.gz" tar -xz -C third_bin -f tmp/minio.tar.gz download "$gh_os_download_url" "gh-ost-binary-linux-20200828140552.tar.gz" "tmp/gh-ost-binary-linux-20200828140552.tar.gz" diff --git a/dm/tests/mariadb_master_down_and_up/case.sh b/dm/tests/mariadb_master_down_and_up/case.sh index e678d3bcfd2..dc59fe3e1b2 100644 --- a/dm/tests/mariadb_master_down_and_up/case.sh +++ b/dm/tests/mariadb_master_down_and_up/case.sh @@ -107,7 +107,6 @@ function clean_task() { function test_master_down_and_up() { cleanup_process clean_data - install_sync_diff setup_replica gen_full_data run_dm_components_and_create_source $1 diff --git a/dm/tests/mariadb_master_down_and_up/lib.sh b/dm/tests/mariadb_master_down_and_up/lib.sh index 3d38de273e7..4a548c73425 100644 --- a/dm/tests/mariadb_master_down_and_up/lib.sh +++ b/dm/tests/mariadb_master_down_and_up/lib.sh @@ -27,12 +27,6 @@ function exec_tidb() { echo $2 | mysql -uroot -h127.0.0.1 -P$1 } -function install_sync_diff() { - curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz - mkdir -p bin - mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ -} - function get_master_status() { arr=$(echo "show master status;" | MYSQL_PWD=123456 mysql -uroot -h127.0.0.1 -P3306 | awk 'NR==2') echo $arr diff --git a/dm/tests/tiup/lib.sh b/dm/tests/tiup/lib.sh index 8b57d9355e7..441fd2da753 100755 --- a/dm/tests/tiup/lib.sh +++ b/dm/tests/tiup/lib.sh @@ -56,12 +56,6 @@ function run_sql_tidb_with_retry() { fi } -function install_sync_diff() { - curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz - mkdir -p bin - mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ -} - function exec_full_stage() { # drop previous data exec_sql mysql1 3306 "DROP DATABASE IF EXISTS $DB1;" diff --git a/dm/tests/tiup/upgrade-from-v1.sh b/dm/tests/tiup/upgrade-from-v1.sh index 3520dd0f7b9..75b4244efb0 100755 --- a/dm/tests/tiup/upgrade-from-v1.sh +++ b/dm/tests/tiup/upgrade-from-v1.sh @@ -122,8 +122,6 @@ function destroy_v2_by_tiup() { } function test() { - install_sync_diff - deploy_v1_by_ansible migrate_in_v1 diff --git a/dm/tests/tiup/upgrade-from-v2.sh b/dm/tests/tiup/upgrade-from-v2.sh index f5781c3002c..1a1252e94b2 100755 --- a/dm/tests/tiup/upgrade-from-v2.sh +++ b/dm/tests/tiup/upgrade-from-v2.sh @@ -170,8 +170,6 @@ function destroy_v2_by_tiup() { } function test() { - install_sync_diff - deploy_previous_v2 migrate_in_previous_v2 diff --git a/dm/tests/tiup/upgrade-tidb.sh b/dm/tests/tiup/upgrade-tidb.sh index 434c74cc7a9..1207e512f27 100755 --- a/dm/tests/tiup/upgrade-tidb.sh +++ b/dm/tests/tiup/upgrade-tidb.sh @@ -52,8 +52,6 @@ function destroy_v2_by_tiup() { # run this before upgrade TiDB. function before_upgrade() { - install_sync_diff - deploy_dm migrate_before_upgrade diff --git a/dm/tests/upstream_switch/case.sh b/dm/tests/upstream_switch/case.sh index 012b4df8ff3..185ebdbd878 100644 --- a/dm/tests/upstream_switch/case.sh +++ b/dm/tests/upstream_switch/case.sh @@ -208,7 +208,6 @@ function check_master() { function test_relay() { cleanup_process check_master - install_sync_diff clean_data prepare_binlogs setup_replica diff --git a/dm/tests/upstream_switch/lib.sh b/dm/tests/upstream_switch/lib.sh index 65064fb4cb6..b11537d988f 100644 --- a/dm/tests/upstream_switch/lib.sh +++ b/dm/tests/upstream_switch/lib.sh @@ -30,12 +30,6 @@ function exec_tidb() { echo $2 | mysql -uroot -h$1 -P4000 } -function install_sync_diff() { - curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz - mkdir -p bin - mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ -} - function prepare_more_binlogs() { exec_sql $1 "create database db1 collate latin1_bin;" exec_sql $1 "flush logs;" diff --git a/go.mod b/go.mod index 3757a1ef1dd..cc9ee391f34 100644 --- a/go.mod +++ b/go.mod @@ -62,6 +62,7 @@ require ( github.com/mailru/easyjson v0.7.7 github.com/mattn/go-shellwords v1.0.12 github.com/modern-go/reflect2 v1.0.2 + github.com/olekukonko/tablewriter v0.0.5 github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2 github.com/pierrec/lz4/v4 v4.1.18 github.com/pingcap/check v0.0.0-20211026125417-57bd13f7b5f0 diff --git a/go.sum b/go.sum index dd74ef9a83f..3e54b7d9b40 100644 --- a/go.sum +++ b/go.sum @@ -826,6 +826,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0= diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh deleted file mode 100755 index 3ee26c6e505..00000000000 --- a/scripts/download-sync-diff.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2022 PingCAP, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eu - -echo "will download tidb-tools v6.1.0 to get sync_diff_inspector" -curl -C - --retry 3 -o /tmp/tidb-tools.tar.gz https://download.pingcap.org/tidb-community-toolkit-v6.1.0-linux-amd64.tar.gz -mkdir -p /tmp/tidb-tools -tar -zxf /tmp/tidb-tools.tar.gz -C /tmp/tidb-tools -mv /tmp/tidb-tools/tidb-community-toolkit-v6.1.0-linux-amd64/sync_diff_inspector ./bin/sync_diff_inspector -rm -r /tmp/tidb-tools -rm /tmp/tidb-tools.tar.gz diff --git a/sync_diff_inspector/README.md b/sync_diff_inspector/README.md new file mode 100644 index 00000000000..87dc46a10c3 --- /dev/null +++ b/sync_diff_inspector/README.md @@ -0,0 +1,23 @@ +# sync-diff-inspector + +sync-diff-inspector is a tool for comparing two database's data. + +## How to use + +```shell +Usage of diff: + -V, --version print version of sync_diff_inspector + -L, --log-level string log level: debug, info, warn, error, fatal (default "info") + -C, --config string Config file + -T, --template string export a template config file + --dm-addr string the address of DM + --dm-task string identifier of dm task + --check-thread-count int how many goroutines are created to check data (default 4) + --export-fix-sql set true if want to compare rows or set to false will only compare checksum (default true) +``` + +For more details you can read the [config.toml](./config/config.toml), [config_sharding.toml](./config/config_sharding.toml) and [config_dm.toml](./config/config_dm.toml). + +## Documents +- `zh`: [Overview in Chinese](https://docs.pingcap.com/zh/tidb/stable/sync-diff-inspector-overview) +- `en`: [Overview in English](https://docs.pingcap.com/tidb/stable/sync-diff-inspector-overview) diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go new file mode 100644 index 00000000000..010fdd2a9c2 --- /dev/null +++ b/sync_diff_inspector/checkpoints/checkpoints.go @@ -0,0 +1,243 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package checkpoints + +import ( + "container/heap" + "context" + "encoding/json" + "os" + "sync" + + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/report" + + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/siddontang/go/ioutil2" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "go.uber.org/zap" +) + +const ( + // SuccessState + // for chunk: means this chunk's data is equal + // for table: means this all chunk in this table is equal(except ignore chunk) + SuccessState = "success" + + // FailedState + // for chunk: means this chunk's data is not equal + // for table: means some chunks' data is not equal or some chunk check failed in this table + FailedState = "failed" + + // IgnoreState + // for chunk: this chunk is ignored. if it is Empty chunk, will ignore some chunk + // for table: don't have this state + IgnoreState = "ignore" +) + +type Node struct { + State string `json:"state"` // indicate the state ("success" or "failed") of the chunk + + ChunkRange *chunk.Range `json:"chunk-range"` + IndexID int64 `json:"index-id"` +} + +func (n *Node) GetID() *chunk.ChunkID { return n.ChunkRange.Index } + +func (n *Node) GetState() string { return n.State } + +func (n *Node) GetTableIndex() int { return n.ChunkRange.Index.TableIndex } + +func (n *Node) GetBucketIndexLeft() int { return n.ChunkRange.Index.BucketIndexLeft } + +func (n *Node) GetBucketIndexRight() int { return n.ChunkRange.Index.BucketIndexRight } + +func (n *Node) GetChunkIndex() int { return n.ChunkRange.Index.ChunkIndex } + +// IsAdjacent represents whether the next node is adjacent node. +// it's the important logic for checkpoint update. +// we need keep this node save to checkpoint in global order. +func (n *Node) IsAdjacent(next *Node) bool { + if n.GetTableIndex() == next.GetTableIndex()-1 { + if n.ChunkRange.IsLastChunkForTable() && next.ChunkRange.IsFirstChunkForTable() { + return true + } + return false + } + if n.GetTableIndex() == next.GetTableIndex() { + // same table + if n.GetBucketIndexRight() == next.GetBucketIndexLeft()-1 { + if n.ChunkRange.IsLastChunkForBucket() && next.ChunkRange.IsFirstChunkForBucket() { + return true + } + return false + } + if n.GetBucketIndexLeft() == next.GetBucketIndexLeft() { + return n.GetChunkIndex() == next.GetChunkIndex()-1 + } + return false + } + return false +} + +// IsLess represents whether the cur node is less than next node. +func (n *Node) IsLess(next *Node) bool { + if n.GetTableIndex() < next.GetTableIndex() { + return true + } + if n.GetTableIndex() == next.GetTableIndex() { + if n.GetBucketIndexLeft() <= next.GetBucketIndexLeft()-1 { + return true + } + if n.GetBucketIndexLeft() == next.GetBucketIndexLeft() { + return n.GetChunkIndex() < next.GetChunkIndex() + } + return false + } + return false +} + +// heap maintain a Min Heap, which can be accessed by multiple threads and protected by mutex. +type nodeHeap struct { + Nodes []*Node + CurrentSavedNode *Node // CurrentSavedNode save the minimum checker chunk, updated by `GetChunkSnapshot` method + mu *sync.Mutex // protect critical section +} + +// Checkpoint provide the ability to restart the sync-diff process from the +// latest previous exit point (due to error or intention). +type Checkpoint struct { + hp *nodeHeap +} + +// SaveState contains the information of the latest checked chunk and state of `report` +// When sync-diff start from the checkpoint, it will load this information and continue running +type SavedState struct { + Chunk *Node `json:"chunk-info"` + Report *report.Report `json:"report-info"` +} + +// InitCurrentSavedID the method is only used in initialization without lock, be cautious +func (cp *Checkpoint) InitCurrentSavedID(n *Node) { + cp.hp.CurrentSavedNode = n +} + +func (cp *Checkpoint) GetCurrentSavedID() *Node { + cp.hp.mu.Lock() + defer cp.hp.mu.Unlock() + return cp.hp.CurrentSavedNode +} + +func (cp *Checkpoint) Insert(node *Node) { + cp.hp.mu.Lock() + heap.Push(cp.hp, node) + cp.hp.mu.Unlock() +} + +// Len - get the length of the heap +func (hp *nodeHeap) Len() int { return len(hp.Nodes) } + +// Less - determine which is more priority than another +func (hp *nodeHeap) Less(i, j int) bool { + return hp.Nodes[i].IsLess(hp.Nodes[j]) +} + +// Swap - implementation of swap for the heap interface +func (hp *nodeHeap) Swap(i, j int) { + hp.Nodes[i], hp.Nodes[j] = hp.Nodes[j], hp.Nodes[i] +} + +// Push - implementation of push for the heap interface +func (hp *nodeHeap) Push(x interface{}) { + hp.Nodes = append(hp.Nodes, x.(*Node)) +} + +// Pop - implementation of pop for heap interface +func (hp *nodeHeap) Pop() (item interface{}) { + if len(hp.Nodes) == 0 { + return + } + + hp.Nodes, item = hp.Nodes[:len(hp.Nodes)-1], hp.Nodes[len(hp.Nodes)-1] + return +} + +func (cp *Checkpoint) Init() { + hp := &nodeHeap{ + mu: &sync.Mutex{}, + Nodes: make([]*Node, 0), + CurrentSavedNode: &Node{ + ChunkRange: &chunk.Range{ + Index: chunk.GetInitChunkID(), + IsFirst: true, + IsLast: true, + }, + }, + } + heap.Init(hp) + cp.hp = hp +} + +// GetChunkSnapshot get the snapshot of the minimum continuous checked chunk +func (cp *Checkpoint) GetChunkSnapshot() (cur *Node) { + cp.hp.mu.Lock() + defer cp.hp.mu.Unlock() + for cp.hp.Len() != 0 && cp.hp.CurrentSavedNode.IsAdjacent(cp.hp.Nodes[0]) { + cp.hp.CurrentSavedNode = heap.Pop(cp.hp).(*Node) + cur = cp.hp.CurrentSavedNode + } + // wait for next 10s to check + return cur +} + +// SaveChunk saves the chunk to file. +func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, reportInfo *report.Report) (*chunk.ChunkID, error) { + if cur == nil { + return nil, nil + } + + savedState := &SavedState{ + Chunk: cur, + Report: reportInfo, + } + checkpointData, err := json.Marshal(savedState) + if err != nil { + log.Warn("fail to save the chunk to the file", zap.Any("chunk index", cur.GetID()), zap.Error(err)) + return nil, errors.Trace(err) + } + + if err = ioutil2.WriteFileAtomic(fileName, checkpointData, config.LocalFilePerm); err != nil { + return nil, err + } + log.Info("save checkpoint", + zap.Any("chunk", cur), + zap.String("state", cur.GetState())) + return cur.GetID(), nil +} + +// LoadChunk loads chunk info from file `chunk` +func (cp *Checkpoint) LoadChunk(fileName string) (*Node, *report.Report, error) { + bytes, err := os.ReadFile(fileName) + if err != nil { + return nil, nil, errors.Trace(err) + } + n := &SavedState{} + err = json.Unmarshal(bytes, n) + if err != nil { + return nil, nil, errors.Trace(err) + } + return n.Chunk, n.Report, nil +} diff --git a/sync_diff_inspector/checkpoints/checkpoints_test.go b/sync_diff_inspector/checkpoints/checkpoints_test.go new file mode 100644 index 00000000000..29b1a76a586 --- /dev/null +++ b/sync_diff_inspector/checkpoints/checkpoints_test.go @@ -0,0 +1,120 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package checkpoints + +import ( + "context" + "math/rand" + "os" + "strconv" + "sync" + "testing" + "time" + + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/stretchr/testify/require" +) + +func TestSaveChunk(t *testing.T) { + checker := new(Checkpoint) + checker.Init() + ctx := context.Background() + cur := checker.GetChunkSnapshot() + id, err := checker.SaveChunk(ctx, "TestSaveChunk", cur, nil) + require.NoError(t, err) + require.Nil(t, id) + wg := &sync.WaitGroup{} + rounds := 100 + for i := 0; i < rounds; i++ { + wg.Add(1) + go func(i int) { + node := &Node{ + ChunkRange: &chunk.Range{ + Index: &chunk.ChunkID{ + TableIndex: 0, + BucketIndexLeft: i / 10, + BucketIndexRight: i / 10, + ChunkIndex: i % 10, + ChunkCnt: 10, + }, + Bounds: []*chunk.Bound{ + { + HasLower: i != 0, + Lower: strconv.Itoa(i + 1000), + Upper: strconv.Itoa(i + 1000 + 1), + HasUpper: i != rounds, + }, + }, + }, + + State: SuccessState, + } + if rand.Intn(4) == 0 { + time.Sleep(time.Duration(rand.Intn(3)) * time.Second) + } + checker.Insert(node) + wg.Done() + }(i) + } + wg.Wait() + defer os.Remove("TestSaveChunk") + + cur = checker.GetChunkSnapshot() + require.NotNil(t, cur) + id, err = checker.SaveChunk(ctx, "TestSaveChunk", cur, nil) + require.NoError(t, err) + require.Equal(t, id.Compare(&chunk.ChunkID{TableIndex: 0, BucketIndexLeft: 9, BucketIndexRight: 9, ChunkIndex: 9}), 0) +} + +func TestLoadChunk(t *testing.T) { + checker := new(Checkpoint) + checker.Init() + ctx := context.Background() + rounds := 100 + wg := &sync.WaitGroup{} + for i := 0; i < rounds; i++ { + wg.Add(1) + go func(i int) { + node := &Node{ + ChunkRange: &chunk.Range{ + Bounds: []*chunk.Bound{ + { + HasLower: i != 0, + Lower: strconv.Itoa(i + 1000), + Upper: strconv.Itoa(i + 1000 + 1), + HasUpper: i != rounds, + }, + }, + Index: &chunk.ChunkID{ + TableIndex: 0, + BucketIndexLeft: i / 10, + BucketIndexRight: i / 10, + ChunkIndex: i % 10, + ChunkCnt: 10, + }, + }, + } + checker.Insert(node) + wg.Done() + }(i) + } + wg.Wait() + defer os.Remove("TestLoadChunk") + cur := checker.GetChunkSnapshot() + id, err := checker.SaveChunk(ctx, "TestLoadChunk", cur, nil) + require.NoError(t, err) + node, _, err := checker.LoadChunk("TestLoadChunk") + require.NoError(t, err) + require.Equal(t, node.GetID().Compare(id), 0) +} diff --git a/sync_diff_inspector/chunk/chunk.go b/sync_diff_inspector/chunk/chunk.go new file mode 100644 index 00000000000..6943f413d96 --- /dev/null +++ b/sync_diff_inspector/chunk/chunk.go @@ -0,0 +1,464 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunk + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/util/dbutil" + "go.uber.org/zap" +) + +const ( + lt = "<" + lte = "<=" + gt = ">" +) + +type ChunkType int + +const ( + Bucket ChunkType = iota + 1 + Random + Limit + Others + Empty +) + +// Bound represents a bound for a column +type Bound struct { + Column string `json:"column"` + Lower string `json:"lower"` + Upper string `json:"upper"` + + HasLower bool `json:"has-lower"` + HasUpper bool `json:"has-upper"` +} + +// ChunkID is to identify the sequence of chunks +type ChunkID struct { + TableIndex int `json:"table-index"` + // we especially treat random split has only one bucket + // which is the whole table + // range is [left, right] + BucketIndexLeft int `json:"bucket-index-left"` + BucketIndexRight int `json:"bucket-index-right"` + ChunkIndex int `json:"chunk-index"` + // `ChunkCnt` is the number of chunks in this bucket + // We can compare `ChunkIndex` and `ChunkCnt` to know + // whether this chunk is the last one + ChunkCnt int `json:"chunk-count"` +} + +func GetInitChunkID() *ChunkID { + return &ChunkID{ + TableIndex: -1, + BucketIndexLeft: -1, + BucketIndexRight: -1, + ChunkIndex: -1, + ChunkCnt: 0, + } +} + +func (c *ChunkID) Compare(o *ChunkID) int { + if c.TableIndex < o.TableIndex { + return -1 + } + if c.TableIndex > o.TableIndex { + return 1 + } + + // c.TableIndex == o.TableIndex + if c.BucketIndexLeft < o.BucketIndexLeft { + return -1 + } + if c.BucketIndexLeft > o.BucketIndexLeft { + return 1 + } + // c.BucketIndexLeft == o.BucketIndexLeft + if c.ChunkIndex < o.ChunkIndex { + return -1 + } + if c.ChunkIndex == o.ChunkIndex { + return 0 + } + return 1 +} + +func (c *ChunkID) Copy() *ChunkID { + cp := *c + return &cp +} + +func (c *ChunkID) ToString() string { + return fmt.Sprintf("%d:%d-%d:%d:%d", c.TableIndex, c.BucketIndexLeft, c.BucketIndexRight, c.ChunkIndex, c.ChunkCnt) +} + +func (c *ChunkID) FromString(s string) error { + ids := strings.Split(s, ":") + tableIndex, err := strconv.Atoi(ids[0]) + if err != nil { + return errors.Trace(err) + } + + bucketIndex := strings.Split(ids[1], "-") + bucketIndexLeft, err := strconv.Atoi(bucketIndex[0]) + if err != nil { + return errors.Trace(err) + } + bucketIndexRight, err := strconv.Atoi(bucketIndex[1]) + if err != nil { + return errors.Trace(err) + } + + chunkIndex, err := strconv.Atoi(ids[2]) + if err != nil { + return errors.Trace(err) + } + chunkCnt, err := strconv.Atoi(ids[3]) + if err != nil { + return errors.Trace(err) + } + c.TableIndex, c.BucketIndexLeft, c.BucketIndexRight, c.ChunkIndex, c.ChunkCnt = tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, chunkCnt + return nil +} + +// Range represents chunk range +type Range struct { + Index *ChunkID `json:"index"` + Type ChunkType `json:"type"` + Bounds []*Bound `json:"bounds"` + IsFirst bool `json:"is-first"` + IsLast bool `json:"is-last"` + + Where string `json:"where"` + Args []interface{} `json:"args"` + + columnOffset map[string]int +} + +func (r *Range) IsFirstChunkForBucket() bool { + return r.Index.ChunkIndex == 0 +} + +func (r *Range) IsLastChunkForBucket() bool { + return r.Index.ChunkIndex == r.Index.ChunkCnt-1 +} + +// NewChunkRange return a Range. +func NewChunkRange() *Range { + return &Range{ + Bounds: make([]*Bound, 0, 2), + columnOffset: make(map[string]int), + Index: &ChunkID{}, + } +} + +// NewChunkRangeOffset return a Range in sequence +func NewChunkRangeOffset(columnOffset map[string]int) *Range { + bounds := make([]*Bound, len(columnOffset)) + for column, offset := range columnOffset { + bounds[offset] = &Bound{ + Column: column, + HasLower: false, + HasUpper: false, + } + } + return &Range{ + Bounds: bounds, + columnOffset: columnOffset, + } +} + +func (c *Range) IsLastChunkForTable() bool { + if c.IsLast { + return true + } + // calculate from bounds + for _, b := range c.Bounds { + if b.HasUpper { + return false + } + } + return true +} + +func (c *Range) IsFirstChunkForTable() bool { + if c.IsFirst { + return true + } + // calculate from bounds + for _, b := range c.Bounds { + if b.HasLower { + return false + } + } + return true +} + +// String returns the string of Range, used for log. +func (c *Range) String() string { + chunkBytes, err := json.Marshal(c) + if err != nil { + log.Warn("fail to encode chunk into string", zap.Error(err)) + return "" + } + + return string(chunkBytes) +} + +func (c *Range) ToString(collation string) (string, []interface{}) { + if collation != "" { + collation = fmt.Sprintf(" COLLATE '%s'", collation) + } + + /* for example: + there is a bucket in TiDB, and the lowerbound and upperbound are (A, B1, C1), (A, B2, C2), and the columns are `a`, `b` and `c`, + this bucket's data range is (a = A) AND (b > B1 or (b == B1 and c > C1)) AND (b < B2 or (b == B2 and c <= C2)) + */ + + sameCondition := make([]string, 0, 1) + lowerCondition := make([]string, 0, 1) + upperCondition := make([]string, 0, 1) + sameArgs := make([]interface{}, 0, 1) + lowerArgs := make([]interface{}, 0, 1) + upperArgs := make([]interface{}, 0, 1) + + preConditionForLower := make([]string, 0, 1) + preConditionForUpper := make([]string, 0, 1) + preConditionArgsForLower := make([]interface{}, 0, 1) + preConditionArgsForUpper := make([]interface{}, 0, 1) + + i := 0 + for ; i < len(c.Bounds); i++ { + bound := c.Bounds[i] + if !(bound.HasLower && bound.HasUpper) { + break + } + + if bound.Lower != bound.Upper { + break + } + + sameCondition = append(sameCondition, fmt.Sprintf("%s%s = ?", dbutil.ColumnName(bound.Column), collation)) + sameArgs = append(sameArgs, bound.Lower) + } + + if i == len(c.Bounds) && i > 0 { + // All the columns are equal in bounds, should return FALSE! + return "FALSE", nil + } + + for ; i < len(c.Bounds); i++ { + bound := c.Bounds[i] + lowerSymbol := gt + upperSymbol := lt + if i == len(c.Bounds)-1 { + upperSymbol = lte + } + + if bound.HasLower { + if len(preConditionForLower) > 0 { + lowerCondition = append(lowerCondition, fmt.Sprintf("(%s AND %s%s %s ?)", strings.Join(preConditionForLower, " AND "), dbutil.ColumnName(bound.Column), collation, lowerSymbol)) + lowerArgs = append(append(lowerArgs, preConditionArgsForLower...), bound.Lower) + } else { + lowerCondition = append(lowerCondition, fmt.Sprintf("(%s%s %s ?)", dbutil.ColumnName(bound.Column), collation, lowerSymbol)) + lowerArgs = append(lowerArgs, bound.Lower) + } + preConditionForLower = append(preConditionForLower, fmt.Sprintf("%s%s = ?", dbutil.ColumnName(bound.Column), collation)) + preConditionArgsForLower = append(preConditionArgsForLower, bound.Lower) + } + + if bound.HasUpper { + if len(preConditionForUpper) > 0 { + upperCondition = append(upperCondition, fmt.Sprintf("(%s AND %s%s %s ?)", strings.Join(preConditionForUpper, " AND "), dbutil.ColumnName(bound.Column), collation, upperSymbol)) + upperArgs = append(append(upperArgs, preConditionArgsForUpper...), bound.Upper) + } else { + upperCondition = append(upperCondition, fmt.Sprintf("(%s%s %s ?)", dbutil.ColumnName(bound.Column), collation, upperSymbol)) + upperArgs = append(upperArgs, bound.Upper) + } + preConditionForUpper = append(preConditionForUpper, fmt.Sprintf("%s%s = ?", dbutil.ColumnName(bound.Column), collation)) + preConditionArgsForUpper = append(preConditionArgsForUpper, bound.Upper) + } + } + + if len(sameCondition) == 0 { + if len(upperCondition) == 0 && len(lowerCondition) == 0 { + return "TRUE", nil + } + + if len(upperCondition) == 0 { + return strings.Join(lowerCondition, " OR "), lowerArgs + } + + if len(lowerCondition) == 0 { + return strings.Join(upperCondition, " OR "), upperArgs + } + + return fmt.Sprintf("(%s) AND (%s)", strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(lowerArgs, upperArgs...) + } else { + if len(upperCondition) == 0 && len(lowerCondition) == 0 { + return strings.Join(sameCondition, " AND "), sameArgs + } + + if len(upperCondition) == 0 { + return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR ")), append(sameArgs, lowerArgs...) + } + + if len(lowerCondition) == 0 { + return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(upperCondition, " OR ")), append(sameArgs, upperArgs...) + } + + return fmt.Sprintf("(%s) AND (%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(append(sameArgs, lowerArgs...), upperArgs...) + } +} + +func (c *Range) ToMeta() string { + lowerCondition := make([]string, 0, 1) + upperCondition := make([]string, 0, 1) + columnName := make([]string, 0, 1) + for _, bound := range c.Bounds { + columnName = append(columnName, bound.Column) + if bound.HasLower { + lowerCondition = append(lowerCondition, bound.Lower) + } + if bound.HasUpper { + upperCondition = append(upperCondition, bound.Upper) + } + } + if len(upperCondition) == 0 && len(lowerCondition) == 0 { + return "range in sequence: Full" + } + if len(upperCondition) == 0 { + return fmt.Sprintf("range in sequence: (%s) < (%s)", strings.Join(lowerCondition, ","), strings.Join(columnName, ",")) + } + if len(lowerCondition) == 0 { + return fmt.Sprintf("range in sequence: (%s) <= (%s)", strings.Join(columnName, ","), strings.Join(upperCondition, ",")) + } + return fmt.Sprintf("range in sequence: (%s) < (%s) <= (%s)", strings.Join(lowerCondition, ","), strings.Join(columnName, ","), strings.Join(upperCondition, ",")) +} + +func (c *Range) addBound(bound *Bound) { + c.Bounds = append(c.Bounds, bound) + c.columnOffset[bound.Column] = len(c.Bounds) - 1 +} + +func (c *Range) Update(column, lower, upper string, updateLower, updateUpper bool) { + if offset, ok := c.columnOffset[column]; ok { + // update the bound + if updateLower { + c.Bounds[offset].Lower = lower + c.Bounds[offset].HasLower = true + } + if updateUpper { + c.Bounds[offset].Upper = upper + c.Bounds[offset].HasUpper = true + } + + return + } + + // add a new bound + c.addBound(&Bound{ + Column: column, + Lower: lower, + Upper: upper, + HasLower: updateLower, + HasUpper: updateUpper, + }) +} + +func (c *Range) Copy() *Range { + newChunk := NewChunkRange() + for _, bound := range c.Bounds { + newChunk.addBound(&Bound{ + Column: bound.Column, + Lower: bound.Lower, + Upper: bound.Upper, + HasLower: bound.HasLower, + HasUpper: bound.HasUpper, + }) + } + + return newChunk +} + +func (c *Range) Clone() *Range { + newChunk := NewChunkRange() + for _, bound := range c.Bounds { + newChunk.addBound(&Bound{ + Column: bound.Column, + Lower: bound.Lower, + Upper: bound.Upper, + HasLower: bound.HasLower, + HasUpper: bound.HasUpper, + }) + } + newChunk.Type = c.Type + newChunk.Where = c.Where + newChunk.Args = c.Args + for i, v := range c.columnOffset { + newChunk.columnOffset[i] = v + } + newChunk.Index = c.Index.Copy() + newChunk.IsFirst = c.IsFirst + newChunk.IsLast = c.IsLast + return newChunk +} + +func (c *Range) CopyAndUpdate(column, lower, upper string, updateLower, updateUpper bool) *Range { + newChunk := c.Copy() + newChunk.Update(column, lower, upper, updateLower, updateUpper) + return newChunk +} + +// Notice: chunk may contain not only one bucket, which can be expressed as a range [3, 5], +// +// And `lastBucketID` means the `5` and `firstBucketID` means the `3`. +func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, index int, collation, limits string, chunkCnt int) { + if chunks == nil { + return + } + for _, chunk := range chunks { + conditions, args := chunk.ToString(collation) + chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits) + chunk.Args = args + chunk.Index = &ChunkID{ + BucketIndexLeft: firstBucketID, + BucketIndexRight: lastBucketID, + ChunkIndex: index, + ChunkCnt: chunkCnt, + } + chunk.Type = t + index++ + } +} + +func InitChunk(chunk *Range, t ChunkType, firstBucketID, lastBucketID int, collation, limits string) { + conditions, args := chunk.ToString(collation) + chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits) + chunk.Args = args + chunk.Index = &ChunkID{ + BucketIndexLeft: firstBucketID, + BucketIndexRight: lastBucketID, + ChunkIndex: 0, + ChunkCnt: 1, + } + chunk.Type = t +} diff --git a/sync_diff_inspector/chunk/chunk_test.go b/sync_diff_inspector/chunk/chunk_test.go new file mode 100644 index 00000000000..b5d62dd9449 --- /dev/null +++ b/sync_diff_inspector/chunk/chunk_test.go @@ -0,0 +1,648 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunk + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestChunkUpdate(t *testing.T) { + chunk := &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "2", + HasLower: true, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: true, + HasUpper: true, + }, + }, + } + + testCases := []struct { + boundArgs []string + expectStr string + expectArgs []interface{} + }{ + { + []string{"a", "5", "6"}, + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"5", "5", "3", "6", "6", "4"}, + }, { + []string{"b", "5", "6"}, + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"1", "1", "5", "2", "2", "6"}, + }, { + []string{"c", "7", "8"}, + "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))", + []interface{}{"1", "1", "3", "1", "3", "7", "2", "2", "4", "2", "4", "8"}, + }, + } + + for _, cs := range testCases { + newChunk := chunk.CopyAndUpdate(cs.boundArgs[0], cs.boundArgs[1], cs.boundArgs[2], true, true) + conditions, args := newChunk.ToString("") + require.Equal(t, conditions, cs.expectStr) + require.Equal(t, args, cs.expectArgs) + } + + // the origin chunk is not changed + conditions, args := chunk.ToString("") + require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))") + expectArgs := []interface{}{"1", "1", "3", "2", "2", "4"} + require.Equal(t, args, expectArgs) + + // test chunk update build by offset + columnOffset := map[string]int{ + "a": 1, + "b": 0, + } + chunkRange := NewChunkRangeOffset(columnOffset) + chunkRange.Update("a", "1", "2", true, true) + chunkRange.Update("b", "3", "4", true, true) + require.Equal(t, chunkRange.ToMeta(), "range in sequence: (3,1) < (b,a) <= (4,2)") +} + +func TestChunkToString(t *testing.T) { + // lower & upper + chunk := &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "2", + HasLower: true, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: true, + HasUpper: true, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: true, + HasUpper: true, + }, + }, + } + + conditions, args := chunk.ToString("") + require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))") + expectArgs := []string{"1", "1", "3", "1", "3", "5", "2", "2", "4", "2", "4", "6"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + conditions, args = chunk.ToString("latin1") + require.Equal(t, conditions, "((`a` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)) AND ((`a` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?))") + expectArgs = []string{"1", "1", "3", "1", "3", "5", "2", "2", "4", "2", "4", "6"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":true},{"column":"c","lower":"5","upper":"6","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c) <= (2,4,6)") + + // upper + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "2", + HasLower: false, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: false, + HasUpper: true, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: false, + HasUpper: true, + }, + }, + } + + conditions, args = chunk.ToString("latin1") + require.Equal(t, conditions, "(`a` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?)") + expectArgs = []string{"2", "2", "4", "2", "4", "6"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":false,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":true},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (a,b,c) <= (2,4,6)") + + // lower + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "2", + HasLower: true, + HasUpper: false, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: true, + HasUpper: false, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: true, + HasUpper: false, + }, + }, + } + + conditions, args = chunk.ToString("") + require.Equal(t, conditions, "(`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)") + expectArgs = []string{"1", "1", "3", "1", "3", "5"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + conditions, args = chunk.ToString("latin1") + require.Equal(t, conditions, "(`a` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)") + expectArgs = []string{"1", "1", "3", "1", "3", "5"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":true,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":true,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c)") + + // none + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "2", + HasLower: false, + HasUpper: false, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: false, + HasUpper: false, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: false, + HasUpper: false, + }, + }, + } + conditions, args = chunk.ToString("") + require.Equal(t, conditions, "TRUE") + expectArgs = []string{} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"2","has-lower":false,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: Full") + + // same & lower & upper + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "1", + HasLower: true, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: true, + HasUpper: true, + }, { + Column: "c", + Lower: "5", + Upper: "5", + HasLower: true, + HasUpper: true, + }, + }, + } + + conditions, args = chunk.ToString("") + require.Equal(t, conditions, "(`a` = ?) AND ((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))") + expectArgs = []string{"1", "3", "3", "5", "4", "4", "5"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + conditions, args = chunk.ToString("latin1") + require.Equal(t, conditions, "(`a` COLLATE 'latin1' = ?) AND ((`b` COLLATE 'latin1' > ?) OR (`b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)) AND ((`b` COLLATE 'latin1' < ?) OR (`b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?))") + expectArgs = []string{"1", "3", "3", "5", "4", "4", "5"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":true},{"column":"c","lower":"5","upper":"5","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c) <= (1,4,5)") + + // same & upper + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "2", + Upper: "2", + HasLower: false, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: false, + HasUpper: true, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: false, + HasUpper: true, + }, + }, + } + + conditions, args = chunk.ToString("latin1") + require.Equal(t, conditions, "(`a` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' < ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' <= ?)") + expectArgs = []string{"2", "2", "4", "2", "4", "6"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"2","upper":"2","has-lower":false,"has-upper":true},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":true},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (a,b,c) <= (2,4,6)") + + // same & lower + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "1", + HasLower: true, + HasUpper: false, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: true, + HasUpper: false, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: true, + HasUpper: false, + }, + }, + } + + conditions, args = chunk.ToString("") + require.Equal(t, conditions, "(`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)") + expectArgs = []string{"1", "1", "3", "1", "3", "5"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + conditions, args = chunk.ToString("latin1") + require.Equal(t, conditions, "(`a` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' > ?) OR (`a` COLLATE 'latin1' = ? AND `b` COLLATE 'latin1' = ? AND `c` COLLATE 'latin1' > ?)") + expectArgs = []string{"1", "1", "3", "1", "3", "5"} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":true,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":true,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,5) < (a,b,c)") + + // same & none + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "1", + HasLower: false, + HasUpper: false, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: false, + HasUpper: false, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: false, + HasUpper: false, + }, + }, + } + conditions, args = chunk.ToString("") + require.Equal(t, conditions, "TRUE") + expectArgs = []string{} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":false,"has-upper":false},{"column":"b","lower":"3","upper":"4","has-lower":false,"has-upper":false},{"column":"c","lower":"5","upper":"6","has-lower":false,"has-upper":false}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: Full") + + // all equal + chunk = &Range{ + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "1", + HasLower: true, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "3", + HasLower: true, + HasUpper: true, + }, { + Column: "c", + Lower: "6", + Upper: "6", + HasLower: true, + HasUpper: true, + }, + }, + } + conditions, args = chunk.ToString("") + require.Equal(t, conditions, "FALSE") + expectArgs = []string{} + for i, arg := range args { + require.Equal(t, arg, expectArgs[i]) + } + require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"3","has-lower":true,"has-upper":true},{"column":"c","lower":"6","upper":"6","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`) + require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,6) < (a,b,c) <= (1,3,6)") + +} + +func TestChunkInit(t *testing.T) { + chunks := []*Range{ + { + Bounds: []*Bound{ + { + Column: "a", + Lower: "1", + Upper: "2", + HasLower: true, + HasUpper: true, + }, { + Column: "b", + Lower: "3", + Upper: "4", + HasLower: true, + HasUpper: true, + }, { + Column: "c", + Lower: "5", + Upper: "6", + HasLower: true, + HasUpper: true, + }, + }, + }, { + Bounds: []*Bound{ + { + Column: "a", + Lower: "2", + Upper: "3", + HasLower: true, + HasUpper: true, + }, { + Column: "b", + Lower: "4", + Upper: "5", + HasLower: true, + HasUpper: true, + }, { + Column: "c", + Lower: "6", + Upper: "7", + HasLower: true, + HasUpper: true, + }, + }, + }, + } + + InitChunks(chunks, Others, 1, 1, 0, "[123]", "[sdfds fsd fd gd]", 1) + require.Equal(t, chunks[0].Where, "((((`a` COLLATE '[123]' > ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' > ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' = ? AND `c` COLLATE '[123]' > ?)) AND ((`a` COLLATE '[123]' < ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' < ?) OR (`a` COLLATE '[123]' = ? AND `b` COLLATE '[123]' = ? AND `c` COLLATE '[123]' <= ?))) AND ([sdfds fsd fd gd]))") + require.Equal(t, chunks[0].Args, []interface{}{"1", "1", "3", "1", "3", "5", "2", "2", "4", "2", "4", "6"}) + require.Equal(t, chunks[0].Type, Others) + InitChunk(chunks[1], Others, 2, 2, "[456]", "[dsfsdf]") + require.Equal(t, chunks[1].Where, "((((`a` COLLATE '[456]' > ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' > ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' = ? AND `c` COLLATE '[456]' > ?)) AND ((`a` COLLATE '[456]' < ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' < ?) OR (`a` COLLATE '[456]' = ? AND `b` COLLATE '[456]' = ? AND `c` COLLATE '[456]' <= ?))) AND ([dsfsdf]))") + require.Equal(t, chunks[1].Args, []interface{}{"2", "2", "4", "2", "4", "6", "3", "3", "5", "3", "5", "7"}) + require.Equal(t, chunks[1].Type, Others) +} + +func TestChunkCopyAndUpdate(t *testing.T) { + chunk := NewChunkRange() + chunk.Update("a", "1", "2", true, true) + chunk.Update("a", "2", "3", true, true) + chunk.Update("a", "324", "5435", false, false) + chunk.Update("b", "4", "5", true, false) + chunk.Update("b", "8", "9", false, true) + chunk.Update("c", "6", "7", false, true) + chunk.Update("c", "10", "11", true, false) + + conditions, args := chunk.ToString("") + require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))") + require.Equal(t, args, []interface{}{"2", "2", "4", "2", "4", "10", "3", "3", "9", "3", "9", "7"}) + + chunk2 := chunk.CopyAndUpdate("a", "4", "6", true, true) + conditions, args = chunk2.ToString("") + require.Equal(t, conditions, "((`a` > ?) OR (`a` = ? AND `b` > ?) OR (`a` = ? AND `b` = ? AND `c` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` < ?) OR (`a` = ? AND `b` = ? AND `c` <= ?))") + require.Equal(t, args, []interface{}{"4", "4", "4", "4", "4", "10", "6", "6", "9", "6", "9", "7"}) + _, args = chunk.ToString("") + // `Copy` use the same []string + require.Equal(t, args, []interface{}{"2", "2", "4", "2", "4", "10", "3", "3", "9", "3", "9", "7"}) + + InitChunk(chunk, Others, 2, 2, "[324]", "[543]") + chunk3 := chunk.Clone() + chunk3.Update("a", "2", "3", true, true) + require.Equal(t, chunk3.Where, "((((`a` COLLATE '[324]' > ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' > ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' = ? AND `c` COLLATE '[324]' > ?)) AND ((`a` COLLATE '[324]' < ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' < ?) OR (`a` COLLATE '[324]' = ? AND `b` COLLATE '[324]' = ? AND `c` COLLATE '[324]' <= ?))) AND ([543]))") + require.Equal(t, chunk3.Args, []interface{}{"2", "2", "4", "2", "4", "10", "3", "3", "9", "3", "9", "7"}) + require.Equal(t, chunk3.Type, Others) +} + +func TestChunkID(t *testing.T) { + chunkIDBase := &ChunkID{ + TableIndex: 2, + BucketIndexLeft: 2, + BucketIndexRight: 2, + ChunkIndex: 2, + ChunkCnt: 4, + } + + str := chunkIDBase.ToString() + require.Equal(t, str, "2:2-2:2:4") + chunkIDtmp := &ChunkID{} + chunkIDtmp.FromString(str) + require.Equal(t, chunkIDBase.Compare(chunkIDtmp), 0) + + chunkIDSmalls := []*ChunkID{ + { + TableIndex: 1, + BucketIndexLeft: 3, + BucketIndexRight: 3, + ChunkIndex: 4, + ChunkCnt: 5, + }, { + TableIndex: 2, + BucketIndexLeft: 1, + BucketIndexRight: 1, + ChunkIndex: 3, + ChunkCnt: 5, + }, { + TableIndex: 2, + BucketIndexLeft: 2, + BucketIndexRight: 2, + ChunkIndex: 1, + ChunkCnt: 4, + }, + } + + stringRes := []string{ + "1:3-3:4:5", + "2:1-1:3:5", + "2:2-2:1:4", + } + + for i, chunkIDSmall := range chunkIDSmalls { + require.Equal(t, chunkIDBase.Compare(chunkIDSmall), 1) + str = chunkIDSmall.ToString() + require.Equal(t, str, stringRes[i]) + chunkIDtmp = &ChunkID{} + chunkIDtmp.FromString(str) + require.Equal(t, chunkIDSmall.Compare(chunkIDtmp), 0) + } + + chunkIDLarges := []*ChunkID{ + { + TableIndex: 3, + BucketIndexLeft: 1, + BucketIndexRight: 1, + ChunkIndex: 2, + ChunkCnt: 3, + }, { + TableIndex: 2, + BucketIndexLeft: 3, + BucketIndexRight: 3, + ChunkIndex: 1, + ChunkCnt: 3, + }, { + TableIndex: 2, + BucketIndexLeft: 2, + BucketIndexRight: 2, + ChunkIndex: 3, + ChunkCnt: 4, + }, + } + + stringRes = []string{ + "3:1-1:2:3", + "2:3-3:1:3", + "2:2-2:3:4", + } + + for i, chunkIDLarge := range chunkIDLarges { + require.Equal(t, chunkIDBase.Compare(chunkIDLarge), -1) + str = chunkIDLarge.ToString() + require.Equal(t, str, stringRes[i]) + chunkIDtmp = &ChunkID{} + chunkIDtmp.FromString(str) + require.Equal(t, chunkIDLarge.Compare(chunkIDtmp), 0) + } + +} + +func TestChunkIndex(t *testing.T) { + chunkRange := NewChunkRange() + chunkRange.Index.ChunkIndex = 0 + chunkRange.Index.ChunkCnt = 3 + require.True(t, chunkRange.IsFirstChunkForBucket()) + require.False(t, chunkRange.IsLastChunkForBucket()) + chunkRange.Index.ChunkIndex = 2 + require.False(t, chunkRange.IsFirstChunkForBucket()) + require.True(t, chunkRange.IsLastChunkForBucket()) + + chunkRange.Bounds = []*Bound{ + { + Lower: "1", + HasLower: true, + }, { + Lower: "2", + HasLower: true, + }, + } + require.True(t, chunkRange.IsLastChunkForTable()) + require.False(t, chunkRange.IsFirstChunkForTable()) + chunkRange.Bounds = []*Bound{ + { + Upper: "1", + HasUpper: true, + }, { + Upper: "2", + HasUpper: true, + }, + } + require.False(t, chunkRange.IsLastChunkForTable()) + require.True(t, chunkRange.IsFirstChunkForTable()) + chunkRange.Bounds = []*Bound{ + { + Upper: "1", + HasUpper: true, + }, { + Lower: "2", + HasLower: true, + }, + } + require.False(t, chunkRange.IsLastChunkForTable()) + require.False(t, chunkRange.IsFirstChunkForTable()) +} diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go new file mode 100644 index 00000000000..3ab749bc890 --- /dev/null +++ b/sync_diff_inspector/config/config.go @@ -0,0 +1,642 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "crypto/sha256" + "database/sql" + "encoding/json" + "fmt" + "net" + "net/url" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "github.com/BurntSushi/toml" + "github.com/go-sql-driver/mysql" + "github.com/google/uuid" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + tidbutil "github.com/pingcap/tidb/pkg/util" + "github.com/pingcap/tidb/pkg/util/dbutil" + filter "github.com/pingcap/tidb/pkg/util/table-filter" + router "github.com/pingcap/tidb/pkg/util/table-router" + "github.com/pingcap/tiflow/dm/config/security" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + flag "github.com/spf13/pflag" + "go.uber.org/zap" +) + +const ( + LocalDirPerm os.FileMode = 0o755 + LocalFilePerm os.FileMode = 0o644 + + LogFileName = "sync_diff.log" + + baseSplitThreadCount = 3 + + UnifiedTimeZone string = "+0:00" +) + +// TableConfig is the config of table. +type TableConfig struct { + // table's filter to tell us which table should adapt to this config. + TargetTables []string `toml:"target-tables" json:"target-tables"` + // Internally used to indicate which specified table in target is using this config. + Schema string + Table string + // Internally used to distinguish different config. + ConfigIndex int + // Internally used to valid config. + HasMatched bool + + // columns be ignored, will not check this column's data + IgnoreColumns []string `toml:"ignore-columns"` + // field should be the primary key, unique key or field with index + Fields []string `toml:"index-fields"` + // select range, for example: "age > 10 AND age < 20" + Range string `toml:"range"` + + TargetTableInfo *model.TableInfo + + // collation config in mysql/tidb + Collation string `toml:"collation"` + + // specify the chunksize for the table + ChunkSize int64 `toml:"chunk-size" json:"chunk-size"` +} + +// Valid returns true if table's config is valide. +func (t *TableConfig) Valid() bool { + if len(t.TargetTables) == 0 { + log.Error("target tables can't be empty in TableConfig") + return false + } + + return true +} + +// TLS Security wrapper +type Security struct { + TLSName string `json:"tls-name"` + + CAPath string `toml:"ca-path" json:"ca-path"` + CertPath string `toml:"cert-path" json:"cert-path"` + KeyPath string `toml:"key-path" json:"key-path"` + + // raw content + CABytes string `toml:"ca-bytes" json:"ca-bytes"` + CertBytes string `toml:"cert-bytes" json:"cert-bytes"` + KeyBytes string `toml:"key-bytes" json:"key-bytes"` +} + +// DataSource represents the Source Config. +type DataSource struct { + Host string `toml:"host" json:"host"` + Port int `toml:"port" json:"port"` + User string `toml:"user" json:"user"` + Password utils.SecretString `toml:"password" json:"password"` + SqlMode string `toml:"sql-mode" json:"sql-mode"` + Snapshot string `toml:"snapshot" json:"snapshot"` + + Security *Security `toml:"security" json:"security"` + + RouteRules []string `toml:"route-rules" json:"route-rules"` + Router *router.Table + RouteTargetSet map[string]struct{} `json:"-"` + + Conn *sql.DB +} + +// IsAutoSnapshot returns true if the tidb_snapshot is expected to automatically +// be set from the syncpoint from the target TiDB instance. +func (d *DataSource) IsAutoSnapshot() bool { + return strings.EqualFold(d.Snapshot, "auto") +} + +// SetSnapshot changes the snapshot in configuration. This is typically +// used with the auto-snapshot feature. +func (d *DataSource) SetSnapshot(newSnapshot string) { + d.Snapshot = newSnapshot +} + +func (d *DataSource) ToDBConfig() *dbutil.DBConfig { + return &dbutil.DBConfig{ + Host: d.Host, + Port: d.Port, + User: d.User, + Password: d.Password.Plain(), + Snapshot: d.Snapshot, + } +} + +// register TLS config for driver +func (d *DataSource) RegisterTLS() error { + if d.Security == nil { + return nil + } + sec := d.Security + log.Info("try to register tls config") + tlsConfig, err := tidbutil.NewTLSConfig( + tidbutil.WithCAPath(sec.CAPath), + tidbutil.WithCertAndKeyPath(sec.CertPath, sec.KeyPath), + tidbutil.WithCAContent([]byte(sec.CABytes)), + tidbutil.WithCertAndKeyContent([]byte(sec.CertBytes), []byte(sec.KeyBytes)), + ) + if err != nil { + return errors.Trace(err) + } + + if tlsConfig == nil { + return nil + } + + log.Info("success to parse tls config") + sec.TLSName = "sync-diff-inspector-" + uuid.NewString() + err = mysql.RegisterTLSConfig(sec.TLSName, tlsConfig) + return errors.Trace(err) +} + +func (d *DataSource) ToDriverConfig() *mysql.Config { + cfg := mysql.NewConfig() + cfg.Params = make(map[string]string) + + cfg.User = d.User + cfg.Passwd = d.Password.Plain() + cfg.Net = "tcp" + cfg.Addr = net.JoinHostPort(d.Host, strconv.Itoa(d.Port)) + cfg.Params["charset"] = "utf8mb4" + cfg.InterpolateParams = true + cfg.Params["time_zone"] = fmt.Sprintf("'%s'", UnifiedTimeZone) + if len(d.Snapshot) > 0 && !d.IsAutoSnapshot() { + log.Info("create connection with snapshot", zap.String("snapshot", d.Snapshot)) + cfg.Params["tidb_snapshot"] = d.Snapshot + } + if d.Security != nil && len(d.Security.TLSName) > 0 { + cfg.TLSConfig = d.Security.TLSName + } + + return cfg +} + +type TaskConfig struct { + Source []string `toml:"source-instances" json:"source-instances"` + Routes []string `toml:"source-routes" json:"source-routes"` + Target string `toml:"target-instance" json:"target-instance"` + CheckTables []string `toml:"target-check-tables" json:"target-check-tables"` + TableConfigs []string `toml:"target-configs" json:"target-configs"` + // OutputDir include these + // 1. checkpoint Dir + // 2. fix-target-sql Dir + // 3. summary file + // 4. sync diff log file + // 5. fix + OutputDir string `toml:"output-dir" json:"output-dir"` + + SourceInstances []*DataSource + TargetInstance *DataSource + TargetTableConfigs []*TableConfig + TargetCheckTables filter.Filter + + FixDir string + CheckpointDir string + HashFile string +} + +func (t *TaskConfig) Init( + dataSources map[string]*DataSource, + tableConfigs map[string]*TableConfig, +) (err error) { + // Parse Source/Target + dataSourceList := make([]*DataSource, 0, len(t.Source)) + for _, si := range t.Source { + ds, ok := dataSources[si] + if !ok { + log.Error("not found source instance, please correct the config", zap.String("instance", si)) + return errors.Errorf("not found source instance, please correct the config. instance is `%s`", si) + } + // try to register tls + if err := ds.RegisterTLS(); err != nil { + return errors.Trace(err) + } + dataSourceList = append(dataSourceList, ds) + } + t.SourceInstances = dataSourceList + + ts, ok := dataSources[t.Target] + if !ok { + log.Error("not found target instance, please correct the config", zap.String("instance", t.Target)) + return errors.Errorf("not found target instance, please correct the config. instance is `%s`", t.Target) + } + // try to register tls + if err := ts.RegisterTLS(); err != nil { + return errors.Trace(err) + } + t.TargetInstance = ts + + t.TargetCheckTables, err = filter.Parse(t.CheckTables) + if err != nil { + log.Error("parse check tables failed", zap.Error(err)) + return errors.Annotate(err, "parse check tables failed") + } + + targetConfigs := t.TableConfigs + if targetConfigs != nil { + // table config can be nil + tableConfigsList := make([]*TableConfig, 0, len(targetConfigs)) + for configIndex, c := range targetConfigs { + tc, ok := tableConfigs[c] + if !ok { + log.Error("not found table config", zap.String("config", c)) + return errors.Errorf("not found table config. config is `%s`", c) + } + tc.ConfigIndex = configIndex + tableConfigsList = append(tableConfigsList, tc) + } + t.TargetTableConfigs = tableConfigsList + } + + hash, err := t.ComputeConfigHash() + if err != nil { + return errors.Trace(err) + } + + ok, err = pathExists(t.OutputDir) + if err != nil { + return errors.Trace(err) + } + if !ok { + if err = mkdirAll(t.OutputDir); err != nil { + return errors.Trace(err) + } + } + // outputDir exists, we need to check the config hash for checkpoint. + t.CheckpointDir = filepath.Join(t.OutputDir, "checkpoint") + ok, err = pathExists(t.CheckpointDir) + if err != nil { + return errors.Trace(err) + } + if !ok { + // no checkpoint, we can use this outputDir directly. + if err = mkdirAll(t.CheckpointDir); err != nil { + return errors.Trace(err) + } + // create config hash in checkpointDir. + err = os.WriteFile(filepath.Join(t.CheckpointDir, hash), []byte{}, LocalFilePerm) + if err != nil { + return errors.Trace(err) + } + } else { + // checkpoint exists, we need compare the config hash. + ok, err = pathExists(filepath.Join(t.CheckpointDir, hash)) + if err != nil { + return errors.Trace(err) + } + if !ok { + // not match, raise error + return errors.Errorf("config changes breaking the checkpoint, please use another outputDir and start over again!") + } + } + + t.FixDir = filepath.Join(t.OutputDir, fmt.Sprintf("fix-on-%s", t.Target)) + if err = mkdirAll(t.FixDir); err != nil { + return errors.Trace(err) + } + + return nil +} + +// ComputeConfigHash compute the hash according to the task +// if ConfigHash is as same as checkpoint.hash +// we think the second sync diff can use the checkpoint. +func (t *TaskConfig) ComputeConfigHash() (string, error) { + hash := make([]byte, 0) + // compute sources + for _, c := range t.SourceInstances { + configBytes, err := json.Marshal(c) + if err != nil { + return "", errors.Trace(err) + } + hash = append(hash, configBytes...) + } + // compute target + configBytes, err := json.Marshal(t.TargetInstance) + if err != nil { + return "", errors.Trace(err) + } + hash = append(hash, configBytes...) + // compute check-tables and table config + for _, c := range t.TargetTableConfigs { + configBytes, err = json.Marshal(c) + if err != nil { + return "", errors.Trace(err) + } + hash = append(hash, configBytes...) + } + targetCheckTables := t.CheckTables + for _, c := range targetCheckTables { + hash = append(hash, []byte(c)...) + } + + return fmt.Sprintf("%x", sha256.Sum256(hash)), nil +} + +// Config is the configuration. +type Config struct { + *flag.FlagSet `json:"-"` + + // log level + LogLevel string `toml:"-" json:"-"` + // how many goroutines are created to check data + CheckThreadCount int `toml:"check-thread-count" json:"check-thread-count"` + // how many goroutines are created to split chunk. A goroutine splits one table at a time. + SplitThreadCount int `toml:"-" json:"split-thread-count"` + // set true if want to compare rows + // set false won't compare rows. + ExportFixSQL bool `toml:"export-fix-sql" json:"export-fix-sql"` + // only check table struct without table data. + CheckStructOnly bool `toml:"check-struct-only" json:"check-struct-only"` + // experimental feature: only check table data without table struct + CheckDataOnly bool `toml:"check-data-only" json:"-"` + // skip validation for tables that don't exist upstream or downstream + SkipNonExistingTable bool `toml:"skip-non-existing-table" json:"-"` + // DMAddr is dm-master's address, the format should like "http://127.0.0.1:8261" + DMAddr string `toml:"dm-addr" json:"dm-addr"` + // DMTask string `toml:"dm-task" json:"dm-task"` + DMTask string `toml:"dm-task" json:"dm-task"` + + DataSources map[string]*DataSource `toml:"data-sources" json:"data-sources"` + + Routes map[string]*router.TableRule `toml:"routes" json:"routes"` + + TableConfigs map[string]*TableConfig `toml:"table-configs" json:"table-configs"` + + Task TaskConfig `toml:"task" json:"task"` + // config file + ConfigFile string + + // export a template config file + Template string `toml:"-" json:"-"` + + // print version if set true + PrintVersion bool +} + +// NewConfig creates a new config. +func NewConfig() *Config { + cfg := &Config{} + cfg.FlagSet = flag.NewFlagSet("diff", flag.ContinueOnError) + fs := cfg.FlagSet + + fs.BoolVarP(&cfg.PrintVersion, "version", "V", false, "print version of sync_diff_inspector") + fs.StringVarP(&cfg.LogLevel, "log-level", "L", "info", "log level: debug, info, warn, error, fatal") + fs.StringVarP(&cfg.ConfigFile, "config", "C", "", "Config file") + fs.StringVarP(&cfg.Template, "template", "T", "", " export a template config file") + fs.StringVar(&cfg.DMAddr, "dm-addr", "", "the address of DM") + fs.StringVar(&cfg.DMTask, "dm-task", "", "identifier of dm task") + fs.IntVar(&cfg.CheckThreadCount, "check-thread-count", 4, "how many goroutines are created to check data") + fs.BoolVar(&cfg.ExportFixSQL, "export-fix-sql", true, "set true if want to compare rows or set to false will only compare checksum") + fs.BoolVar(&cfg.CheckStructOnly, "check-struct-only", false, "ignore check table's data") + fs.BoolVar(&cfg.SkipNonExistingTable, "skip-non-existing-table", false, "skip validation for tables that don't exist upstream or downstream") + fs.BoolVar(&cfg.CheckDataOnly, "check-data-only", false, "ignore check table's struct") + + _ = fs.MarkHidden("check-data-only") + + fs.SortFlags = false + return cfg +} + +// Parse parses flag definitions from the argument list. +func (c *Config) Parse(arguments []string) error { + // Parse first to get config file. + err := c.FlagSet.Parse(arguments) + if err != nil { + return errors.Trace(err) + } + + if c.PrintVersion { + return nil + } + + if c.Template != "" { + return nil + } + + // Load config file if specified. + if c.ConfigFile == "" { + return errors.Errorf("argument --config is required") + } + err = c.configFromFile(c.ConfigFile) + if err != nil { + return errors.Trace(err) + } + + // Parse again to replace with command line options. + err = c.FlagSet.Parse(arguments) + if err != nil { + return errors.Trace(err) + } + + if len(c.FlagSet.Args()) != 0 { + return errors.Errorf("'%s' is an invalid flag", c.FlagSet.Arg(0)) + } + + // Set default value when output is empty + if c.Task.OutputDir == "" { + c.Task.OutputDir = timestampOutputDir() + if err := os.RemoveAll(c.Task.OutputDir); err != nil && !os.IsNotExist(err) { + log.Fatal("fail to remove the temp directory", zap.String("path", c.Task.OutputDir), zap.String("error", err.Error())) + } + } + + c.SplitThreadCount = baseSplitThreadCount + c.CheckThreadCount/2 + + return nil +} + +func (c *Config) String() string { + cfg, err := json.Marshal(c) + if err != nil { + return err.Error() + } + return string(cfg) +} + +// configFromFile loads config from file. +func (c *Config) configFromFile(path string) error { + meta, err := toml.DecodeFile(path, c) + if err != nil { + return errors.Trace(err) + } + if len(meta.Undecoded()) > 0 { + return errors.Errorf("unknown keys in config file %s: %v", path, meta.Undecoded()) + } + return nil +} + +func parseTLSFromDMConfig(config *security.Security) *Security { + if config == nil { + return nil + } + return &Security{ + CAPath: config.SSLCA, + CertPath: config.SSLCert, + KeyPath: config.SSLKey, + + CABytes: string(config.SSLCABytes), + CertBytes: string(config.SSLCertBytes), + KeyBytes: string(config.SSLKeyBytes), + } +} + +func (c *Config) adjustConfigByDMSubTasks() (err error) { + // DM's subtask config + subTaskCfgs, err := getDMTaskCfg(c.DMAddr, c.DMTask) + if err != nil { + log.Warn("failed to get config from DM tasks") + return errors.Trace(err) + } + sqlMode := "" + if subTaskCfgs[0].EnableANSIQuotes { + sqlMode = "ANSI_QUOTES" + } + dataSources := make(map[string]*DataSource) + dataSources["target"] = &DataSource{ + Host: subTaskCfgs[0].To.Host, + Port: subTaskCfgs[0].To.Port, + User: subTaskCfgs[0].To.User, + Password: utils.SecretString(subTaskCfgs[0].To.Password), + SqlMode: sqlMode, + Security: parseTLSFromDMConfig(subTaskCfgs[0].To.Security), + } + for _, subTaskCfg := range subTaskCfgs { + tableRouter, err := router.NewTableRouter(subTaskCfg.CaseSensitive, []*router.TableRule{}) + routeTargetSet := make(map[string]struct{}) + if err != nil { + return errors.Trace(err) + } + for _, rule := range subTaskCfg.RouteRules { + err := tableRouter.AddRule(rule) + if err != nil { + return errors.Trace(err) + } + routeTargetSet[dbutil.TableName(rule.TargetSchema, rule.TargetTable)] = struct{}{} + } + dataSources[subTaskCfg.SourceID] = &DataSource{ + Host: subTaskCfg.From.Host, + Port: subTaskCfg.From.Port, + User: subTaskCfg.From.User, + Password: utils.SecretString(subTaskCfg.From.Password), + SqlMode: sqlMode, + Security: parseTLSFromDMConfig(subTaskCfg.From.Security), + Router: tableRouter, + + RouteTargetSet: routeTargetSet, + } + } + c.DataSources = dataSources + c.Task.Target = "target" + for id := range dataSources { + if id == "target" { + continue + } + c.Task.Source = append(c.Task.Source, id) + } + return nil +} + +func (c *Config) Init() (err error) { + if len(c.DMAddr) > 0 { + err := c.adjustConfigByDMSubTasks() + if err != nil { + return errors.Annotate(err, "failed to init Task") + } + err = c.Task.Init(c.DataSources, c.TableConfigs) + if err != nil { + return errors.Annotate(err, "failed to init Task") + } + return nil + } + for _, d := range c.DataSources { + routeRuleList := make([]*router.TableRule, 0, len(c.Routes)) + d.RouteTargetSet = make(map[string]struct{}) + // if we had rules + for _, r := range d.RouteRules { + rr, ok := c.Routes[r] + if !ok { + return errors.Errorf("not found source routes for rule %s, please correct the config", r) + } + d.RouteTargetSet[dbutil.TableName(rr.TargetSchema, rr.TargetTable)] = struct{}{} + routeRuleList = append(routeRuleList, rr) + } + // t.SourceRoute can be nil, the caller should check it. + d.Router, err = router.NewTableRouter(false, routeRuleList) + if err != nil { + return errors.Annotate(err, "failed to build route config") + } + } + + err = c.Task.Init(c.DataSources, c.TableConfigs) + if err != nil { + return errors.Annotate(err, "failed to init Task") + } + return nil +} + +func (c *Config) CheckConfig() bool { + if c.CheckThreadCount <= 0 { + log.Error("check-thread-count must greater than 0!") + return false + } + if len(c.DMAddr) != 0 { + u, err := url.Parse(c.DMAddr) + if err != nil || u.Scheme == "" || u.Host == "" { + log.Error("dm-addr's format should like 'http://127.0.0.1:8261'") + return false + } + + if len(c.DMTask) == 0 { + log.Error("must set the `dm-task` if set `dm-addr`") + return false + } + } + return true +} + +func timestampOutputDir() string { + return filepath.Join(os.TempDir(), time.Now().Format("sync-diff.output.2006-01-02T15.04.05Z0700")) +} + +func pathExists(_path string) (bool, error) { + _, err := os.Stat(_path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, errors.Trace(err) + } + return true, nil +} + +func mkdirAll(base string) error { + mask := syscall.Umask(0) + err := os.MkdirAll(base, LocalDirPerm) + syscall.Umask(mask) + return errors.Trace(err) +} diff --git a/sync_diff_inspector/config/config.toml b/sync_diff_inspector/config/config.toml new file mode 100644 index 00000000000..b5557216188 --- /dev/null +++ b/sync_diff_inspector/config/config.toml @@ -0,0 +1,73 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources] +[data-sources.mysql1] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" +# MySQL doesn't has snapshot config + +[data-sources.tidb0] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + +# Support tls connection + # security.ca-path = "..." + # security.cert-path = "..." + # security.key-path = "..." + +# Remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + # snapshot = "386902609362944000" +# When using TiCDC syncpoint source and target can be set to auto + # snapshot = "auto" + +######################### Task config ######################### +# Required +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/output/config" + + source-instances = ["mysql1"] + + target-instance = "tidb0" + + # tables need to check. *Include `schema` and `table`. Use `.` to split* + target-check-tables = ["schema*.table*", "!c.*", "test2.t2"] + + # extra table config + target-configs= ["config1"] + +######################### Tables config ######################### +# Optional +[table-configs] +[table-configs.config1] +# tables need to use this specified config. +# if use this config. target-tables should be a subset of #target-check-tables +target-tables = ["schema*.table*", "test2.t2"] + +range = "age > 10 AND age < 20" +index-fields = [""] +ignore-columns = ["",""] +chunk-size = 0 +collation = "" diff --git a/sync_diff_inspector/config/config_conflict.toml b/sync_diff_inspector/config/config_conflict.toml new file mode 100644 index 00000000000..416c29b1f9e --- /dev/null +++ b/sync_diff_inspector/config/config_conflict.toml @@ -0,0 +1,72 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources] +[data-sources.mysql1] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + # mysql doesn't has snapshot config + +[data-sources.tidb0] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + # snapshot = "386902609362944000" + +######################### Task config ######################### +# Required +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/output/config_conflict" + + source-instances = ["mysql1"] + + target-instance = "tidb0" + + # tables need to check. *Include `schema` and `table`. Use `.` to split* + target-check-tables = ["schema*.table*", "!c.*", "test2.t2"] + + # extra table config + target-configs= ["config1", "config2"] + +# Optional +[table-configs] +[table-configs.config1] +# tables need to use this specified config. +# if use this config. target-tables should be a subset of #target-check-tables +target-tables = ["schema*.table*", "test2.t2"] +range = "age > 10 AND age < 20" +index-fields = [""] +ignore-columns = ["",""] +chunk-size = 0 +collation = "" + +[table-configs.config2] +# conflict config with config1 +target-tables = ["schema*.table*", "test2.t2"] +range = "age > 10 AND age < 20" +index-fields = [""] +ignore-columns = ["",""] +chunk-size = 0 +collation = "" diff --git a/sync_diff_inspector/config/config_dm.toml b/sync_diff_inspector/config/config_dm.toml new file mode 100644 index 00000000000..39d5b9eff0e --- /dev/null +++ b/sync_diff_inspector/config/config_dm.toml @@ -0,0 +1,31 @@ +# Diff Configuration. This config file shows how to check data for DM's task. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +# dm-master's address, the format should like "http://127.0.0.1:8261" +dm-addr = "http://127.0.0.1:8261" + +# the DM's task name which is willing to check data +dm-task = "test" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/output/config" + + # tables need to check. *Include `schema` and `table`. Use `.` to split* + target-check-tables = ["hb_test.*"] + diff --git a/sync_diff_inspector/config/config_sharding.toml b/sync_diff_inspector/config/config_sharding.toml new file mode 100644 index 00000000000..59a70c5b115 --- /dev/null +++ b/sync_diff_inspector/config/config_sharding.toml @@ -0,0 +1,99 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + + route-rules = ["rule1", "rule2"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.mysql2] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + + route-rules = ["rule1", "rule2"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.mysql3] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + + route-rules = ["rule1", "rule3"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb0] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[routes.rule1] +schema-pattern = "test_*" # schema to match. Support wildcard characters * and ?. +table-pattern = "t_*" # table to match. Support wildcard characters * and ?. +target-schema = "test" # target schema +target-table = "t" # target table + +[routes.rule2] +schema-pattern = "test2_*" # schema to match. Support wildcard characters * and ?. +table-pattern = "t2_*" # table to match. Support wildcard characters * and ?. +target-schema = "test2" # target schema +target-table = "t2" # target table + +[routes.rule3] +schema-pattern = "test2_*" # schema to match. Support wildcard characters * and ?. +table-pattern = "t2_*" # table to match. Support wildcard characters * and ?. +target-schema = "test" # target schema +target-table = "t" # target table + + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/output/config" + + source-instances = ["mysql1", "mysql2", "mysql3"] + + target-instance = "tidb0" + + # tables need to check. *Include `schema` and `table`. Use `.` to split* + target-check-tables = ["schema*.table*", "!c.*", "test2.t2"] + + # extra table config + target-configs= ["config1"] + +[table-configs.config1] +# tables need to use this specified config. +# if use this config. target-tables should be a subset of #target-check-tables +target-tables = ["schema*.table*", "test2.t2"] + +range = "age > 10 AND age < 20" +index-fields = [""] +ignore-columns = ["",""] \ No newline at end of file diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go new file mode 100644 index 00000000000..7c12c260bb2 --- /dev/null +++ b/sync_diff_inspector/config/config_test.go @@ -0,0 +1,112 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "encoding/json" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseConfig(t *testing.T) { + cfg := NewConfig() + require.Nil(t, cfg.Parse([]string{"-L", "info", "--config", "config.toml"})) + cfg = NewConfig() + require.Contains(t, cfg.Parse([]string{"-L", "info"}).Error(), "argument --config is required") + + unknownFlag := []string{"--LL", "info"} + err := cfg.Parse(unknownFlag) + require.Contains(t, err.Error(), "LL") + + require.Nil(t, cfg.Parse([]string{"--config", "config.toml"})) + require.Nil(t, cfg.Init()) + require.Nil(t, cfg.Task.Init(cfg.DataSources, cfg.TableConfigs)) + + require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"})) + // we change the config from config.toml to config_sharding.toml + // this action will raise error. + require.Contains(t, cfg.Init().Error(), "failed to init Task: config changes breaking the checkpoint, please use another outputDir and start over again!") + + require.NoError(t, os.RemoveAll(cfg.Task.OutputDir)) + require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"})) + // this time will be ok, because we remove the last outputDir. + require.Nil(t, cfg.Init()) + require.Nil(t, cfg.Task.Init(cfg.DataSources, cfg.TableConfigs)) + + require.True(t, cfg.CheckConfig()) + + // we might not use the same config to run this test. e.g. MYSQL_PORT can be 4000 + require.JSONEq(t, cfg.String(), + "{\"check-thread-count\":4,\"split-thread-count\":5,\"export-fix-sql\":true,\"check-struct-only\":false,\"dm-addr\":\"\",\"dm-task\":\"\",\"data-sources\":{\"mysql1\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},\"mysql2\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},\"mysql3\":{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null},\"tidb0\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null}},\"routes\":{\"rule1\":{\"schema-pattern\":\"test_*\",\"table-pattern\":\"t_*\",\"target-schema\":\"test\",\"target-table\":\"t\"},\"rule2\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test2\",\"target-table\":\"t2\"},\"rule3\":{\"schema-pattern\":\"test2_*\",\"table-pattern\":\"t2_*\",\"target-schema\":\"test\",\"target-table\":\"t\"}},\"table-configs\":{\"config1\":{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}},\"task\":{\"source-instances\":[\"mysql1\",\"mysql2\",\"mysql3\"],\"source-routes\":null,\"target-instance\":\"tidb0\",\"target-check-tables\":[\"schema*.table*\",\"!c.*\",\"test2.t2\"],\"target-configs\":[\"config1\"],\"output-dir\":\"/tmp/output/config\",\"SourceInstances\":[{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule2\"],\"Router\":{\"Selector\":{}},\"Conn\":null},{\"host\":\"127.0.0.1\",\"port\":3306,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":[\"rule1\",\"rule3\"],\"Router\":{\"Selector\":{}},\"Conn\":null}],\"TargetInstance\":{\"host\":\"127.0.0.1\",\"port\":4000,\"user\":\"root\",\"password\":\"******\",\"sql-mode\":\"\",\"snapshot\":\"\",\"security\":null,\"route-rules\":null,\"Router\":{\"Selector\":{}},\"Conn\":null},\"TargetTableConfigs\":[{\"target-tables\":[\"schema*.table*\",\"test2.t2\"],\"Schema\":\"\",\"Table\":\"\",\"ConfigIndex\":0,\"HasMatched\":false,\"IgnoreColumns\":[\"\",\"\"],\"Fields\":[\"\"],\"Range\":\"age \\u003e 10 AND age \\u003c 20\",\"TargetTableInfo\":null,\"Collation\":\"\",\"chunk-size\":0}],\"TargetCheckTables\":[{},{},{}],\"FixDir\":\"/tmp/output/config/fix-on-tidb0\",\"CheckpointDir\":\"/tmp/output/config/checkpoint\",\"HashFile\":\"\"},\"ConfigFile\":\"config_sharding.toml\",\"PrintVersion\":false}") + hash, err := cfg.Task.ComputeConfigHash() + require.NoError(t, err) + require.Equal(t, hash, "c080f9894ec24aadb4aaec1109cd1951454f09a1233f2034bc3b06e0903cb289") + + require.True(t, cfg.TableConfigs["config1"].Valid()) + + require.NoError(t, os.RemoveAll(cfg.Task.OutputDir)) + +} + +func TestError(t *testing.T) { + tableConfig := &TableConfig{} + require.False(t, tableConfig.Valid()) + tableConfig.TargetTables = []string{"123", "234"} + require.True(t, tableConfig.Valid()) + + cfg := NewConfig() + // Parse + require.Contains(t, cfg.Parse([]string{"--config", "no_exist.toml"}).Error(), "no_exist.toml: no such file or directory") + + // CheckConfig + cfg.CheckThreadCount = 0 + require.False(t, cfg.CheckConfig()) + cfg.CheckThreadCount = 1 + require.True(t, cfg.CheckConfig()) + + // Init + cfg.DataSources = make(map[string]*DataSource) + cfg.DataSources["123"] = &DataSource{ + RouteRules: []string{"111"}, + } + err := cfg.Init() + require.Contains(t, err.Error(), "not found source routes for rule 111, please correct the config") +} + +func TestNoSecretLeak(t *testing.T) { + source := &DataSource{ + Host: "127.0.0.1", + Port: 5432, + User: "postgres", + Password: "AVeryV#ryStr0ngP@ssw0rd", + SqlMode: "MYSQL", + Snapshot: "2022/10/24", + } + cfg := &Config{} + cfg.DataSources = map[string]*DataSource{"pg-1": source} + require.NotContains(t, cfg.String(), "AVeryV#ryStr0ngP@ssw0rd", "%s", cfg.String()) + sourceJSON := []byte(` + { + "host": "127.0.0.1", + "port": 5432, + "user": "postgres", + "password": "meow~~~" + } + `) + s := DataSource{} + json.Unmarshal(sourceJSON, &s) + require.Equal(t, string(s.Password), "meow~~~") +} diff --git a/sync_diff_inspector/config/dm.go b/sync_diff_inspector/config/dm.go new file mode 100644 index 00000000000..68564910e4b --- /dev/null +++ b/sync_diff_inspector/config/dm.go @@ -0,0 +1,264 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/BurntSushi/toml" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/util/filter" + router "github.com/pingcap/tidb/pkg/util/table-router" + "github.com/pingcap/tiflow/dm/config/security" + "github.com/pingcap/tiflow/dm/pb" + "github.com/pingcap/tiflow/dm/pkg/terror" + "github.com/pingcap/tiflow/pkg/column-mapping" + flag "github.com/spf13/pflag" + "go.uber.org/zap" +) + +const ( + // dm's http api version, define in https://github.com/pingcap/dm/blob/master/dm/proto/dmmaster.proto + apiVersion = "v1alpha1" +) + +func getDMTaskCfgURL(dmAddr, task string) string { + return fmt.Sprintf("%s/apis/%s/subtasks/%s", dmAddr, apiVersion, task) +} + +// getDMTaskCfg gets dm's sub task config +func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) { + tr := &http.Transport{ + // TODO: support tls + //TLSClientConfig: tlsCfg, + } + client := &http.Client{Transport: tr} + req, err := http.NewRequest("GET", getDMTaskCfgURL(dmAddr, task), nil) + if err != nil { + return nil, err + } + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + getSubTaskCfgResp := &pb.GetSubTaskCfgResponse{} + err = json.Unmarshal(body, getSubTaskCfgResp) + if err != nil { + return nil, err + } + + if !getSubTaskCfgResp.Result { + return nil, errors.Errorf("fail to get sub task config from DM, %s", getSubTaskCfgResp.Msg) + } + + subTaskCfgs := make([]*SubTaskConfig, 0, len(getSubTaskCfgResp.Cfgs)) + for _, cfgBytes := range getSubTaskCfgResp.Cfgs { + subtaskCfg := &SubTaskConfig{} + err = subtaskCfg.Decode(cfgBytes, false) + if err != nil { + return nil, err + } + subtaskCfg.To.Password = DecryptOrPlaintext(subtaskCfg.To.Password) + subtaskCfg.From.Password = DecryptOrPlaintext(subtaskCfg.From.Password) + subTaskCfgs = append(subTaskCfgs, subtaskCfg) + } + + log.Info("dm sub task configs", zap.Reflect("cfgs", subTaskCfgs)) + return subTaskCfgs, nil +} + +// SubTaskConfig is the configuration for SubTask. +type SubTaskConfig struct { + // BurntSushi/toml seems have a bug for flag "-" + // when doing encoding, if we use `toml:"-"`, it still try to encode it + // and it will panic because of unsupported type (reflect.Func) + // so we should not export flagSet + flagSet *flag.FlagSet + + // when in sharding, multi dm-workers do one task + IsSharding bool `toml:"is-sharding" json:"is-sharding"` + ShardMode string `toml:"shard-mode" json:"shard-mode"` + StrictOptimisticShardMode bool `toml:"strict-optimistic-shard-mode" json:"strict-optimistic-shard-mode"` + OnlineDDL bool `toml:"online-ddl" json:"online-ddl"` + + // pt/gh-ost name rule, support regex + ShadowTableRules []string `yaml:"shadow-table-rules" toml:"shadow-table-rules" json:"shadow-table-rules"` + TrashTableRules []string `yaml:"trash-table-rules" toml:"trash-table-rules" json:"trash-table-rules"` + + // deprecated + OnlineDDLScheme string `toml:"online-ddl-scheme" json:"online-ddl-scheme"` + + // handle schema/table name mode, and only for schema/table name/pattern + // if case insensitive, we would convert schema/table name/pattern to lower case + CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"` + + // default "loose" handle create sql by original sql, will not add default collation as upstream + // "strict" will add default collation as upstream, and downstream will occur error when downstream don't support + CollationCompatible string `yaml:"collation_compatible" toml:"collation_compatible" json:"collation_compatible"` + + Name string `toml:"name" json:"name"` + Mode string `toml:"mode" json:"mode"` + // treat it as hidden configuration + IgnoreCheckingItems []string `toml:"ignore-checking-items" json:"ignore-checking-items"` + // it represents a MySQL/MariaDB instance or a replica group + SourceID string `toml:"source-id" json:"source-id"` + ServerID uint32 `toml:"server-id" json:"server-id"` + Flavor string `toml:"flavor" json:"flavor"` + MetaSchema string `toml:"meta-schema" json:"meta-schema"` + // deprecated + HeartbeatUpdateInterval int `toml:"heartbeat-update-interval" json:"heartbeat-update-interval"` + // deprecated + HeartbeatReportInterval int `toml:"heartbeat-report-interval" json:"heartbeat-report-interval"` + // deprecated + EnableHeartbeat bool `toml:"enable-heartbeat" json:"enable-heartbeat"` + Timezone string `toml:"timezone" json:"timezone"` + + // RelayDir get value from dm-worker config + RelayDir string `toml:"relay-dir" json:"relay-dir"` + + // UseRelay get value from dm-worker's relayEnabled + UseRelay bool `toml:"use-relay" json:"use-relay"` + From DBConfig `toml:"from" json:"from"` + To DBConfig `toml:"to" json:"to"` + + RouteRules []*router.TableRule `toml:"route-rules" json:"route-rules"` + // FilterRules []*bf.BinlogEventRule `toml:"filter-rules" json:"filter-rules"` + // deprecated + ColumnMappingRules []*column.Rule `toml:"mapping-rule" json:"mapping-rule"` + // ExprFilter []*ExpressionFilter `yaml:"expression-filter" toml:"expression-filter" json:"expression-filter"` + + // black-white-list is deprecated, use block-allow-list instead + BWList *filter.Rules `toml:"black-white-list" json:"black-white-list"` + BAList *filter.Rules `toml:"block-allow-list" json:"block-allow-list"` + + // compatible with standalone dm unit + LogLevel string `toml:"log-level" json:"log-level"` + LogFile string `toml:"log-file" json:"log-file"` + LogFormat string `toml:"log-format" json:"log-format"` + LogRotate string `toml:"log-rotate" json:"log-rotate"` + + PprofAddr string `toml:"pprof-addr" json:"pprof-addr"` + StatusAddr string `toml:"status-addr" json:"status-addr"` + + ConfigFile string `toml:"-" json:"config-file"` + + CleanDumpFile bool `toml:"clean-dump-file" json:"clean-dump-file"` + + // deprecated, will auto discover SQL mode + EnableANSIQuotes bool `toml:"ansi-quotes" json:"ansi-quotes"` + + // still needed by Syncer / Loader bin + printVersion bool + + // which DM worker is running the subtask, this will be injected when the real worker starts running the subtask(StartSubTask). + WorkerName string `toml:"-" json:"-"` + // task experimental configs + Experimental struct { + AsyncCheckpointFlush bool `yaml:"async-checkpoint-flush" toml:"async-checkpoint-flush" json:"async-checkpoint-flush"` + } `yaml:"experimental" toml:"experimental" json:"experimental"` +} + +// DBConfig is the DB configuration. +type DBConfig struct { + Host string `toml:"host" json:"host" yaml:"host"` + Port int `toml:"port" json:"port" yaml:"port"` + User string `toml:"user" json:"user" yaml:"user"` + Password string `toml:"password" json:"-" yaml:"password"` // omit it for privacy + // deprecated, mysql driver could automatically fetch this value + MaxAllowedPacket *int `toml:"max-allowed-packet" json:"max-allowed-packet" yaml:"max-allowed-packet"` + Session map[string]string `toml:"session" json:"session" yaml:"session"` + + // security config + Security *security.Security `toml:"security" json:"security" yaml:"security"` + + // RawDBCfg *RawDBConfig `toml:"-" json:"-" yaml:"-"` + // Net string `toml:"-" json:"-" yaml:"-"` +} + +// Decode loads config from file data. +func (c *SubTaskConfig) Decode(data string, verifyDecryptPassword bool) error { + if _, err := toml.Decode(data, c); err != nil { + return errors.New("decode subtask config from data") + } + + return nil +} + +// DecryptOrPlaintext tries to decrypt base64 encoded ciphertext to plaintext or return plaintext. +func DecryptOrPlaintext(ciphertextB64 string) string { + plaintext, err := Decrypt(ciphertextB64) + if err != nil { + return ciphertextB64 + } + return plaintext +} + +// Decrypt tries to decrypt base64 encoded ciphertext to plaintext. +func Decrypt(ciphertextB64 string) (string, error) { + ciphertext, err := base64.StdEncoding.DecodeString(ciphertextB64) + if err != nil { + return "", err + } + + plaintext, err := decrypt(ciphertext) + if err != nil { + return "", err + } + return string(plaintext), nil +} + +var ( + secretKey, _ = hex.DecodeString("a529b7665997f043a30ac8fadcb51d6aa032c226ab5b7750530b12b8c1a16a48") + ivSep = []byte("@") // ciphertext format: iv + ivSep + encrypted-plaintext +) + +// decrypt decrypts ciphertext to plaintext. +func decrypt(ciphertext []byte) ([]byte, error) { + block, err := aes.NewCipher(secretKey) + if err != nil { + return nil, err + } + + if len(ciphertext) < block.BlockSize()+len(ivSep) { + return nil, terror.ErrCiphertextLenNotValid.Generate(block.BlockSize()+len(ivSep), len(ciphertext)) + } + + if !bytes.Equal(ciphertext[block.BlockSize():block.BlockSize()+len(ivSep)], ivSep) { + return nil, terror.ErrCiphertextContextNotValid.Generate() + } + + iv := ciphertext[:block.BlockSize()] + ciphertext = ciphertext[block.BlockSize()+len(ivSep):] + plaintext := make([]byte, len(ciphertext)) + + stream := cipher.NewCFBDecrypter(block, iv) + stream.XORKeyStream(plaintext, ciphertext) + + return plaintext, nil +} diff --git a/sync_diff_inspector/config/dm_test.go b/sync_diff_inspector/config/dm_test.go new file mode 100644 index 00000000000..83587f6274f --- /dev/null +++ b/sync_diff_inspector/config/dm_test.go @@ -0,0 +1,72 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "github.com/stretchr/testify/require" +) + +func testHandler(w http.ResponseWriter, req *http.Request) { + fmt.Fprintln(w, `{"result":true,"cfgs":["import-mode = \"logical\"\nis-sharding = true\nshard-mode = \"pessimistic\"\nonline-ddl-scheme = \"\"\ncase-sensitive = false\nname = \"test\"\nmode = \"all\"\nsource-id = \"mysql-replica-01\"\nserver-id = 0\nflavor = \"\"\nmeta-schema = \"dm_meta\"\nheartbeat-update-interval = 1\nheartbeat-report-interval = 10\nenable-heartbeat = false\ntimezone = \"Asia/Shanghai\"\nrelay-dir = \"\"\nuse-relay = false\nfilter-rules = []\nmydumper-path = \"./bin/mydumper\"\nthreads = 4\nchunk-filesize = \"64\"\nstatement-size = 0\nrows = 0\nwhere = \"\"\nskip-tz-utc = true\nextra-args = \"\"\npool-size = 16\ndir = \"./dumped_data.test\"\nmeta-file = \"\"\nworker-count = 16\nbatch = 100\nqueue-size = 1024\ncheckpoint-flush-interval = 30\nmax-retry = 0\nauto-fix-gtid = false\nenable-gtid = false\ndisable-detect = false\nsafe-mode = false\nenable-ansi-quotes = false\nlog-level = \"\"\nlog-file = \"\"\nlog-format = \"\"\nlog-rotate = \"\"\npprof-addr = \"\"\nstatus-addr = \"\"\nclean-dump-file = true\n\n[from]\n host = \"127.0.0.1\"\n port = 3306\n user = \"root\"\n password = \"/Q7B9DizNLLTTfiZHv9WoEAKamfpIUs=\"\n max-allowed-packet = 67108864\n\n[to]\n host = \"127.0.0.1\"\n port = 4000\n user = \"root\"\n password = \"\"\n max-allowed-packet = 67108864\n\n[[route-rules]]\n schema-pattern = \"sharding*\"\n table-pattern = \"t*\"\n target-schema = \"db_target\"\n target-table = \"t_target\"\n\n[[route-rules]]\n schema-pattern = \"sharding*\"\n table-pattern = \"\"\n target-schema = \"db_target\"\n target-table = \"\"\n\n[block-allow-list]\n do-dbs = [\"~^sharding[\\\\d]+\"]\n\n [[block-allow-list.do-tables]]\n db-name = \"~^sharding[\\\\d]+\"\n tbl-name = \"~^t[\\\\d]+\"\n","is-sharding = true\nshard-mode = \"pessimistic\"\nonline-ddl-scheme = \"\"\ncase-sensitive = false\nname = \"test\"\nmode = \"all\"\nsource-id = \"mysql-replica-02\"\nserver-id = 0\nflavor = \"\"\nmeta-schema = \"dm_meta\"\nheartbeat-update-interval = 1\nheartbeat-report-interval = 10\nenable-heartbeat = false\ntimezone = \"Asia/Shanghai\"\nrelay-dir = \"\"\nuse-relay = false\nfilter-rules = []\nmydumper-path = \"./bin/mydumper\"\nthreads = 4\nchunk-filesize = \"64\"\nstatement-size = 0\nrows = 0\nwhere = \"\"\nskip-tz-utc = true\nextra-args = \"\"\npool-size = 16\ndir = \"./dumped_data.test\"\nmeta-file = \"\"\nworker-count = 16\nbatch = 100\nqueue-size = 1024\ncheckpoint-flush-interval = 30\nmax-retry = 0\nauto-fix-gtid = false\nenable-gtid = false\ndisable-detect = false\nsafe-mode = false\nenable-ansi-quotes = false\nlog-level = \"\"\nlog-file = \"\"\nlog-format = \"\"\nlog-rotate = \"\"\npprof-addr = \"\"\nstatus-addr = \"\"\nclean-dump-file = true\n\n[from]\n host = \"127.0.0.1\"\n port = 3307\n user = \"root\"\n password = \"/Q7B9DizNLLTTfiZHv9WoEAKamfpIUs=\"\n max-allowed-packet = 67108864\n\n[to]\n host = \"127.0.0.1\"\n port = 4000\n user = \"root\"\n password = \"\"\n max-allowed-packet = 67108864\n\n[[route-rules]]\n schema-pattern = \"sharding*\"\n table-pattern = \"t*\"\n target-schema = \"db_target\"\n target-table = \"t_target\"\n\n[[route-rules]]\n schema-pattern = \"sharding*\"\n table-pattern = \"\"\n target-schema = \"db_target\"\n target-table = \"\"\n\n[block-allow-list]\n do-dbs = [\"~^sharding[\\\\d]+\"]\n\n [[block-allow-list.do-tables]]\n db-name = \"~^sharding[\\\\d]+\"\n tbl-name = \"~^t[\\\\d]+\"\n"]}`) +} + +func equal(a *DataSource, b *DataSource) bool { + return a.Host == b.Host && a.Port == b.Port && a.Password == b.Password && a.User == b.User +} + +func TestGetDMTaskCfg(t *testing.T) { + mockServer := httptest.NewServer(http.HandlerFunc(testHandler)) + defer mockServer.Close() + + dmTaskCfg, err := getDMTaskCfg(mockServer.URL, "test") + require.NoError(t, err) + require.Equal(t, len(dmTaskCfg), 2) + require.Equal(t, dmTaskCfg[0].SourceID, "mysql-replica-01") + require.Equal(t, dmTaskCfg[1].SourceID, "mysql-replica-02") + + cfg := NewConfig() + cfg.DMAddr = mockServer.URL + cfg.DMTask = "test" + err = cfg.adjustConfigByDMSubTasks() + require.NoError(t, err) + + // after adjust config, will generate source tables for target table + require.Equal(t, len(cfg.DataSources), 3) + require.True(t, equal(cfg.DataSources["target"], &DataSource{ + Host: dmTaskCfg[0].To.Host, + Port: dmTaskCfg[0].To.Port, + Password: utils.SecretString(dmTaskCfg[0].To.Password), + User: dmTaskCfg[0].To.User, + })) + + require.True(t, equal(cfg.DataSources["mysql-replica-01"], &DataSource{ + Host: dmTaskCfg[0].From.Host, + Port: dmTaskCfg[0].From.Port, + Password: utils.SecretString(dmTaskCfg[0].From.Password), + User: dmTaskCfg[0].From.User, + })) + + require.True(t, equal(cfg.DataSources["mysql-replica-02"], &DataSource{ + Host: dmTaskCfg[1].From.Host, + Port: dmTaskCfg[1].From.Port, + Password: utils.SecretString(dmTaskCfg[1].From.Password), + User: dmTaskCfg[1].From.User, + })) +} diff --git a/sync_diff_inspector/config/template.go b/sync_diff_inspector/config/template.go new file mode 100644 index 00000000000..0296856520e --- /dev/null +++ b/sync_diff_inspector/config/template.go @@ -0,0 +1,124 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + + "github.com/pingcap/errors" +) + +const ( + dmConfig = `# Diff Configuration. + +######################### Global config ######################### + +check-thread-count = 4 + +export-fix-sql = true + +check-struct-only = false + +dm-addr = "http://127.0.0.1:8261" + +dm-task = "test" + +######################### Task config ######################### +[task] + output-dir = "./output" + + target-check-tables = ["hb_test.*"] + +` + + normConfig = `# Diff Configuration. + +######################### Global config ######################### + +check-thread-count = 4 + +export-fix-sql = true + +check-struct-only = false + + +######################### Datasource config ######################### +[data-sources] +[data-sources.mysql1] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + + route-rules = ["rule1", "rule2"] + +[data-sources.tidb0] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # security.ca-path = "..." + # security.cert-path = "..." + # security.key-path = "..." + # snapshot = "386902609362944000" + +########################### Routes ########################### +[routes] +[routes.rule1] +schema-pattern = "test_*" +table-pattern = "t_*" +target-schema = "test" +target-table = "t" + +[routes.rule2] +schema-pattern = "test2_*" +table-pattern = "t2_*" +target-schema = "test2" +target-table = "t2" + +######################### Task config ######################### +[task] + output-dir = "./output" + + source-instances = ["mysql1"] + + target-instance = "tidb0" + + target-check-tables = ["schema*.table*", "!c.*", "test2.t2"] + + target-configs = ["config1"] + +######################### Table config ######################### +[table-configs.config1] +target-tables = ["schema*.test*", "test2.t2"] +range = "age > 10 AND age < 20" +index-fields = ["col1","col2"] +ignore-columns = ["",""] +chunk-size = 0 +collation = "" + +` +) + +func ExportTemplateConfig(configType string) error { + switch configType { + case "dm", "DM", "Dm", "dM": + fmt.Print(dmConfig) + case "norm", "normal", "Norm", "Normal": + fmt.Print(normConfig) + default: + return errors.Errorf("Error: unexpect template name: %s\n-T dm: export a dm config\n-T norm: export a normal config\n", configType) + } + return nil +} diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go new file mode 100644 index 00000000000..6ebc69f65cd --- /dev/null +++ b/sync_diff_inspector/diff/diff.go @@ -0,0 +1,844 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package diff + +import ( + "bytes" + "context" + "database/sql" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/BurntSushi/toml" + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/log" + tidbconfig "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/checkpoints" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/progress" + "github.com/pingcap/tiflow/sync_diff_inspector/report" + "github.com/pingcap/tiflow/sync_diff_inspector/source" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/splitter" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "github.com/siddontang/go/ioutil2" + "go.uber.org/zap" +) + +const ( + // checkpointFile represents the checkpoints' file name which used for save and loads chunks + checkpointFile = "sync_diff_checkpoints.pb" +) + +// ChunkDML SQL struct for each chunk +type ChunkDML struct { + node *checkpoints.Node + sqls []string + rowAdd int + rowDelete int +} + +// Diff contains two sql DB, used for comparing. +type Diff struct { + // we may have multiple sources in dm sharding sync. + upstream source.Source + downstream source.Source + + // workSource is one of upstream/downstream by some policy in #pickSource. + workSource source.Source + + checkThreadCount int + splitThreadCount int + exportFixSQL bool + sqlWg sync.WaitGroup + checkpointWg sync.WaitGroup + + FixSQLDir string + CheckpointDir string + + sqlCh chan *ChunkDML + cp *checkpoints.Checkpoint + startRange *splitter.RangeInfo + report *report.Report +} + +// NewDiff returns a Diff instance. +func NewDiff(ctx context.Context, cfg *config.Config) (diff *Diff, err error) { + diff = &Diff{ + checkThreadCount: cfg.CheckThreadCount, + splitThreadCount: cfg.SplitThreadCount, + exportFixSQL: cfg.ExportFixSQL, + sqlCh: make(chan *ChunkDML, splitter.DefaultChannelBuffer), + cp: new(checkpoints.Checkpoint), + report: report.NewReport(&cfg.Task), + } + if err = diff.init(ctx, cfg); err != nil { + diff.Close() + return nil, errors.Trace(err) + } + + return diff, nil +} + +func (df *Diff) PrintSummary(ctx context.Context) bool { + // Stop updating progress bar so that summary won't be flushed. + progress.Close() + df.report.CalculateTotalSize(ctx, df.downstream.GetDB()) + err := df.report.CommitSummary() + if err != nil { + log.Fatal("failed to commit report", zap.Error(err)) + } + df.report.Print(os.Stdout) + return df.report.Result == report.Pass +} + +func (df *Diff) Close() { + if df.upstream != nil { + df.upstream.Close() + } + if df.downstream != nil { + df.downstream.Close() + } + + failpoint.Inject("wait-for-checkpoint", func() { + log.Info("failpoint wait-for-checkpoint injected, skip delete checkpoint file.") + failpoint.Return() + }) + + if err := os.Remove(filepath.Join(df.CheckpointDir, checkpointFile)); err != nil && !os.IsNotExist(err) { + log.Fatal("fail to remove the checkpoint file", zap.String("error", err.Error())) + } +} + +func (df *Diff) init(ctx context.Context, cfg *config.Config) (err error) { + // TODO adjust config + setTiDBCfg() + + df.downstream, df.upstream, err = source.NewSources(ctx, cfg) + if err != nil { + return errors.Trace(err) + } + + df.workSource = df.pickSource(ctx) + df.FixSQLDir = cfg.Task.FixDir + df.CheckpointDir = cfg.Task.CheckpointDir + + sourceConfigs, targetConfig, err := getConfigsForReport(cfg) + if err != nil { + return errors.Trace(err) + } + df.report.Init(df.downstream.GetTables(), sourceConfigs, targetConfig) + if err := df.initCheckpoint(); err != nil { + return errors.Trace(err) + } + return nil +} + +func (df *Diff) initCheckpoint() error { + df.cp.Init() + + finishTableNums := 0 + path := filepath.Join(df.CheckpointDir, checkpointFile) + if ioutil2.FileExists(path) { + node, reportInfo, err := df.cp.LoadChunk(path) + if err != nil { + return errors.Annotate(err, "the checkpoint load process failed") + } else { + // this need not be synchronized, because at the moment, the is only one thread access the section + log.Info("load checkpoint", + zap.Any("chunk index", node.GetID()), + zap.Reflect("chunk", node), + zap.String("state", node.GetState())) + df.cp.InitCurrentSavedID(node) + } + + if node != nil { + // remove the sql file that ID bigger than node. + // cause we will generate these sql again. + err = df.removeSQLFiles(node.GetID()) + if err != nil { + return errors.Trace(err) + } + df.startRange = splitter.FromNode(node) + df.report.LoadReport(reportInfo) + finishTableNums = df.startRange.GetTableIndex() + if df.startRange.ChunkRange.Type == chunk.Empty { + // chunk_iter will skip this table directly + finishTableNums++ + } + } + } else { + log.Info("not found checkpoint file, start from beginning") + id := &chunk.ChunkID{TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, ChunkIndex: -1, ChunkCnt: 0} + err := df.removeSQLFiles(id) + if err != nil { + return errors.Trace(err) + } + } + progress.Init(len(df.workSource.GetTables()), finishTableNums) + return nil +} + +func encodeReportConfig(config *report.ReportConfig) ([]byte, error) { + buf := new(bytes.Buffer) + if err := toml.NewEncoder(buf).Encode(config); err != nil { + return nil, errors.Trace(err) + } + return buf.Bytes(), nil +} + +func getConfigsForReport(cfg *config.Config) ([][]byte, []byte, error) { + sourceConfigs := make([]*report.ReportConfig, len(cfg.Task.SourceInstances)) + for i := 0; i < len(cfg.Task.SourceInstances); i++ { + instance := cfg.Task.SourceInstances[i] + + sourceConfigs[i] = &report.ReportConfig{ + Host: instance.Host, + Port: instance.Port, + User: instance.User, + Snapshot: instance.Snapshot, + SqlMode: instance.SqlMode, + } + } + instance := cfg.Task.TargetInstance + targetConfig := &report.ReportConfig{ + Host: instance.Host, + Port: instance.Port, + User: instance.User, + Snapshot: instance.Snapshot, + SqlMode: instance.SqlMode, + } + sourceBytes := make([][]byte, len(sourceConfigs)) + var err error + for i := range sourceBytes { + sourceBytes[i], err = encodeReportConfig(sourceConfigs[i]) + if err != nil { + return nil, nil, errors.Trace(err) + } + } + targetBytes, err := encodeReportConfig(targetConfig) + if err != nil { + return nil, nil, errors.Trace(err) + } + return sourceBytes, targetBytes, nil +} + +// Equal tests whether two database have same data and schema. +func (df *Diff) Equal(ctx context.Context) error { + chunksIter, err := df.generateChunksIterator(ctx) + if err != nil { + return errors.Trace(err) + } + defer chunksIter.Close() + pool := utils.NewWorkerPool(uint(df.checkThreadCount), "consumer") + stopCh := make(chan struct{}) + + df.checkpointWg.Add(1) + go df.handleCheckpoints(ctx, stopCh) + df.sqlWg.Add(1) + go df.writeSQLs(ctx) + + defer func() { + pool.WaitFinished() + log.Debug("all consume tasks finished") + // close the sql channel + close(df.sqlCh) + df.sqlWg.Wait() + stopCh <- struct{}{} + df.checkpointWg.Wait() + }() + + for { + c, err := chunksIter.Next(ctx) + if err != nil { + return errors.Trace(err) + } + if c == nil { + // finish read the tables + break + } + log.Info("global consume chunk info", zap.Any("chunk index", c.ChunkRange.Index), zap.Any("chunk bound", c.ChunkRange.Bounds)) + pool.Apply(func() { + isEqual := df.consume(ctx, c) + if !isEqual { + progress.FailTable(c.ProgressID) + } + progress.Inc(c.ProgressID) + }) + } + + return nil +} + +func (df *Diff) StructEqual(ctx context.Context) error { + tables := df.downstream.GetTables() + tableIndex := 0 + if df.startRange != nil { + tableIndex = df.startRange.ChunkRange.Index.TableIndex + } + for ; tableIndex < len(tables); tableIndex++ { + isEqual, isSkip, isAllTableExist := false, true, tables[tableIndex].TableLack + if common.AllTableExist(isAllTableExist) { + var err error + isEqual, isSkip, err = df.compareStruct(ctx, tableIndex) + if err != nil { + return errors.Trace(err) + } + } + progress.RegisterTable(dbutil.TableName(tables[tableIndex].Schema, tables[tableIndex].Table), !isEqual, isSkip, isAllTableExist) + df.report.SetTableStructCheckResult(tables[tableIndex].Schema, tables[tableIndex].Table, isEqual, isSkip, isAllTableExist) + } + return nil +} + +func (df *Diff) compareStruct(ctx context.Context, tableIndex int) (isEqual bool, isSkip bool, err error) { + sourceTableInfos, err := df.upstream.GetSourceStructInfo(ctx, tableIndex) + if err != nil { + return false, true, errors.Trace(err) + } + table := df.downstream.GetTables()[tableIndex] + isEqual, isSkip = utils.CompareStruct(sourceTableInfos, table.Info) + table.IgnoreDataCheck = isSkip + return isEqual, isSkip, nil +} + +func (df *Diff) startGCKeeperForTiDB(ctx context.Context, db *sql.DB, snap string) { + pdCli, _ := utils.GetPDClientForGC(ctx, db) + if pdCli != nil { + // Get latest snapshot + latestSnap, err := utils.GetSnapshot(ctx, db) + if err != nil { + log.Info("failed to get snapshot, user should guarantee the GC stopped during diff progress.") + return + } + + if len(latestSnap) == 1 { + if len(snap) == 0 { + snap = latestSnap[0] + } + // compare the snapshot and choose the small one to lock + if strings.Compare(latestSnap[0], snap) < 0 { + snap = latestSnap[0] + } + } + + err = utils.StartGCSavepointUpdateService(ctx, pdCli, db, snap) + if err != nil { + log.Info("failed to keep snapshot, user should guarantee the GC stopped during diff progress.") + } else { + log.Info("start update service to keep GC stopped automatically") + } + } +} + +// pickSource pick one proper source to do some work. e.g. generate chunks +func (df *Diff) pickSource(ctx context.Context) source.Source { + workSource := df.downstream + if ok, _ := dbutil.IsTiDB(ctx, df.upstream.GetDB()); ok { + log.Info("The upstream is TiDB. pick it as work source candidate") + df.startGCKeeperForTiDB(ctx, df.upstream.GetDB(), df.upstream.GetSnapshot()) + workSource = df.upstream + } + if ok, _ := dbutil.IsTiDB(ctx, df.downstream.GetDB()); ok { + log.Info("The downstream is TiDB. pick it as work source first") + df.startGCKeeperForTiDB(ctx, df.downstream.GetDB(), df.downstream.GetSnapshot()) + workSource = df.downstream + } + return workSource +} + +func (df *Diff) generateChunksIterator(ctx context.Context) (source.RangeIterator, error) { + return df.workSource.GetRangeIterator(ctx, df.startRange, df.workSource.GetTableAnalyzer(), df.splitThreadCount) +} + +func (df *Diff) handleCheckpoints(ctx context.Context, stopCh chan struct{}) { + // a background goroutine which will insert the verified chunk, + // and periodically save checkpoint + log.Info("start handleCheckpoint goroutine") + defer func() { + log.Info("close handleCheckpoint goroutine") + df.checkpointWg.Done() + }() + flush := func() { + chunk := df.cp.GetChunkSnapshot() + if chunk != nil { + tableDiff := df.downstream.GetTables()[chunk.GetTableIndex()] + schema, table := tableDiff.Schema, tableDiff.Table + r, err := df.report.GetSnapshot(chunk.GetID(), schema, table) + if err != nil { + log.Warn("fail to save the report", zap.Error(err)) + } + _, err = df.cp.SaveChunk(ctx, filepath.Join(df.CheckpointDir, checkpointFile), chunk, r) + if err != nil { + log.Warn("fail to save the chunk", zap.Error(err)) + // maybe we should panic, because SaveChunk method should not failed. + } + } + } + defer flush() + for { + select { + case <-ctx.Done(): + log.Info("Stop do checkpoint by context done") + return + case <-stopCh: + log.Info("Stop do checkpoint") + return + case <-time.After(10 * time.Second): + flush() + } + } +} + +func (df *Diff) consume(ctx context.Context, rangeInfo *splitter.RangeInfo) bool { + dml := &ChunkDML{ + node: rangeInfo.ToNode(), + } + defer func() { df.sqlCh <- dml }() + tableDiff := df.downstream.GetTables()[rangeInfo.GetTableIndex()] + schema, table := tableDiff.Schema, tableDiff.Table + id := rangeInfo.ChunkRange.Index + if rangeInfo.ChunkRange.Type == chunk.Empty { + dml.node.State = checkpoints.IgnoreState + // for tables that don't exist upstream or downstream + if !common.AllTableExist(tableDiff.TableLack) { + upCount := df.upstream.GetCountForLackTable(ctx, rangeInfo) + downCount := df.downstream.GetCountForLackTable(ctx, rangeInfo) + df.report.SetTableDataCheckResult(schema, table, false, int(upCount), int(downCount), upCount, downCount, id) + return false + } + return true + } + + var state string = checkpoints.SuccessState + + isEqual, upCount, downCount, err := df.compareChecksumAndGetCount(ctx, rangeInfo) + if err != nil { + // If an error occurs during the checksum phase, skip the data compare phase. + state = checkpoints.FailedState + df.report.SetTableMeetError(schema, table, err) + } else if !isEqual && df.exportFixSQL { + state = checkpoints.FailedState + // if the chunk's checksum differ, try to do binary check + info := rangeInfo + if upCount > splitter.SplitThreshold { + log.Debug("count greater than threshold, start do bingenerate", zap.Any("chunk id", rangeInfo.ChunkRange.Index), zap.Int64("upstream chunk size", upCount)) + info, err = df.BinGenerate(ctx, df.workSource, rangeInfo, upCount) + if err != nil { + log.Error("fail to do binary search.", zap.Error(err)) + df.report.SetTableMeetError(schema, table, err) + // reuse rangeInfo to compare data + info = rangeInfo + } else { + log.Debug("bin generate finished", zap.Reflect("chunk", info.ChunkRange), zap.Any("chunk id", info.ChunkRange.Index)) + } + } + isDataEqual, err := df.compareRows(ctx, info, dml) + if err != nil { + df.report.SetTableMeetError(schema, table, err) + } + isEqual = isDataEqual + } + dml.node.State = state + df.report.SetTableDataCheckResult(schema, table, isEqual, dml.rowAdd, dml.rowDelete, upCount, downCount, id) + return isEqual +} + +func (df *Diff) BinGenerate(ctx context.Context, targetSource source.Source, tableRange *splitter.RangeInfo, count int64) (*splitter.RangeInfo, error) { + if count <= splitter.SplitThreshold { + return tableRange, nil + } + tableDiff := targetSource.GetTables()[tableRange.GetTableIndex()] + indices := dbutil.FindAllIndex(tableDiff.Info) + // if no index, do not split + if len(indices) == 0 { + log.Warn("cannot found an index to split and disable the BinGenerate", + zap.String("table", dbutil.TableName(tableDiff.Schema, tableDiff.Table))) + return tableRange, nil + } + var index *model.IndexInfo + // using the index + for _, i := range indices { + if tableRange.IndexID == i.ID { + index = i + break + } + } + if index == nil { + log.Warn("have indices but cannot found a proper index to split and disable the BinGenerate", + zap.String("table", dbutil.TableName(tableDiff.Schema, tableDiff.Table))) + return tableRange, nil + } + // TODO use selectivity from utils.GetBetterIndex + // only support PK/UK + if !(index.Primary || index.Unique) { + log.Warn("BinGenerate only support PK/UK") + return tableRange, nil + } + + log.Debug("index for BinGenerate", zap.String("index", index.Name.O)) + indexColumns := utils.GetColumnsFromIndex(index, tableDiff.Info) + if len(indexColumns) == 0 { + log.Warn("fail to get columns of the selected index, directly return the origin chunk") + return tableRange, nil + } + + return df.binSearch(ctx, targetSource, tableRange, count, tableDiff, indexColumns) +} + +func (df *Diff) binSearch(ctx context.Context, targetSource source.Source, tableRange *splitter.RangeInfo, count int64, tableDiff *common.TableDiff, indexColumns []*model.ColumnInfo) (*splitter.RangeInfo, error) { + if count <= splitter.SplitThreshold { + return tableRange, nil + } + var ( + isEqual1, isEqual2 bool + count1, count2 int64 + ) + tableRange1 := tableRange.Copy() + tableRange2 := tableRange.Copy() + + chunkLimits, args := tableRange.ChunkRange.ToString(tableDiff.Collation) + limitRange := fmt.Sprintf("(%s) AND (%s)", chunkLimits, tableDiff.Range) + midValues, err := utils.GetApproximateMidBySize(ctx, targetSource.GetDB(), tableDiff.Schema, tableDiff.Table, indexColumns, limitRange, args, count) + if err != nil { + return nil, errors.Trace(err) + } + if midValues == nil { + // TODO Since the count is from upstream, + // the midValues may be empty when downstream has much less rows in this chunk. + return tableRange, nil + } + log.Debug("mid values", zap.Reflect("mid values", midValues), zap.Reflect("indices", indexColumns), zap.Reflect("bounds", tableRange.ChunkRange.Bounds)) + log.Debug("table ranges", zap.Reflect("original range", tableRange)) + for i := range indexColumns { + log.Debug("update tableRange", zap.String("field", indexColumns[i].Name.O), zap.String("value", midValues[indexColumns[i].Name.O])) + tableRange1.Update(indexColumns[i].Name.O, "", midValues[indexColumns[i].Name.O], false, true, tableDiff.Collation, tableDiff.Range) + tableRange2.Update(indexColumns[i].Name.O, midValues[indexColumns[i].Name.O], "", true, false, tableDiff.Collation, tableDiff.Range) + } + log.Debug("table ranges", zap.Reflect("tableRange 1", tableRange1), zap.Reflect("tableRange 2", tableRange2)) + isEqual1, count1, _, err = df.compareChecksumAndGetCount(ctx, tableRange1) + if err != nil { + return nil, errors.Trace(err) + } + isEqual2, count2, _, err = df.compareChecksumAndGetCount(ctx, tableRange2) + if err != nil { + return nil, errors.Trace(err) + } + if count1+count2 != count { + log.Fatal("the count is not correct", + zap.Int64("count1", count1), + zap.Int64("count2", count2), + zap.Int64("count", count)) + } + log.Info("chunk split successfully", + zap.Any("chunk id", tableRange.ChunkRange.Index), + zap.Int64("count1", count1), + zap.Int64("count2", count2)) + + // If there is a count zero, we think the range is very small. + if (!isEqual1 && !isEqual2) || (count1 == 0 || count2 == 0) { + return tableRange, nil + } else if !isEqual1 { + c, err := df.binSearch(ctx, targetSource, tableRange1, count1, tableDiff, indexColumns) + if err != nil { + return nil, errors.Trace(err) + } + return c, nil + } else if !isEqual2 { + c, err := df.binSearch(ctx, targetSource, tableRange2, count2, tableDiff, indexColumns) + if err != nil { + return nil, errors.Trace(err) + } + return c, nil + } else { + // TODO: handle the error to foreground + log.Fatal("the isEqual1 and isEqual2 cannot be both true") + return nil, nil + } +} + +func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *splitter.RangeInfo) (bool, int64, int64, error) { + var wg sync.WaitGroup + var upstreamInfo, downstreamInfo *source.ChecksumInfo + wg.Add(1) + go func() { + defer wg.Done() + upstreamInfo = df.upstream.GetCountAndMd5(ctx, tableRange) + }() + downstreamInfo = df.downstream.GetCountAndMd5(ctx, tableRange) + wg.Wait() + + if upstreamInfo.Err != nil { + log.Warn("failed to compare upstream checksum") + return false, -1, -1, errors.Trace(upstreamInfo.Err) + } + if downstreamInfo.Err != nil { + log.Warn("failed to compare downstream checksum") + return false, -1, -1, errors.Trace(downstreamInfo.Err) + + } + + if upstreamInfo.Count == downstreamInfo.Count && upstreamInfo.Checksum == downstreamInfo.Checksum { + return true, upstreamInfo.Count, downstreamInfo.Count, nil + } + log.Debug("checksum doesn't match", zap.Any("chunk id", tableRange.ChunkRange.Index), zap.String("table", df.workSource.GetTables()[tableRange.GetTableIndex()].Table), zap.Int64("upstream chunk size", upstreamInfo.Count), zap.Int64("downstream chunk size", downstreamInfo.Count), zap.Uint64("upstream checksum", upstreamInfo.Checksum), zap.Uint64("downstream checksum", downstreamInfo.Checksum)) + return false, upstreamInfo.Count, downstreamInfo.Count, nil +} + +func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo, dml *ChunkDML) (bool, error) { + rowsAdd, rowsDelete := 0, 0 + upstreamRowsIterator, err := df.upstream.GetRowsIterator(ctx, rangeInfo) + if err != nil { + return false, errors.Trace(err) + } + defer upstreamRowsIterator.Close() + downstreamRowsIterator, err := df.downstream.GetRowsIterator(ctx, rangeInfo) + if err != nil { + return false, errors.Trace(err) + } + defer downstreamRowsIterator.Close() + + var lastUpstreamData, lastDownstreamData map[string]*dbutil.ColumnData + equal := true + + tableInfo := df.workSource.GetTables()[rangeInfo.GetTableIndex()].Info + _, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo) + for { + if lastUpstreamData == nil { + lastUpstreamData, err = upstreamRowsIterator.Next() + if err != nil { + return false, err + } + } + + if lastDownstreamData == nil { + lastDownstreamData, err = downstreamRowsIterator.Next() + if err != nil { + return false, err + } + } + + if lastUpstreamData == nil { + // don't have source data, so all the targetRows's data is redundant, should be deleted + for lastDownstreamData != nil { + sql := df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) + rowsDelete++ + log.Debug("[delete]", zap.String("sql", sql)) + + dml.sqls = append(dml.sqls, sql) + equal = false + lastDownstreamData, err = downstreamRowsIterator.Next() + if err != nil { + return false, err + } + } + break + } + + if lastDownstreamData == nil { + // target lack some data, should insert the last source datas + for lastUpstreamData != nil { + sql := df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) + rowsAdd++ + log.Debug("[insert]", zap.String("sql", sql)) + + dml.sqls = append(dml.sqls, sql) + equal = false + + lastUpstreamData, err = upstreamRowsIterator.Next() + if err != nil { + return false, err + } + } + break + } + + eq, cmp, err := utils.CompareData(lastUpstreamData, lastDownstreamData, orderKeyCols, tableInfo.Columns) + if err != nil { + return false, errors.Trace(err) + } + if eq { + lastDownstreamData = nil + lastUpstreamData = nil + continue + } + + equal = false + sql := "" + + switch cmp { + case 1: + // delete + sql = df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) + rowsDelete++ + log.Debug("[delete]", zap.String("sql", sql)) + lastDownstreamData = nil + case -1: + // insert + sql = df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) + rowsAdd++ + log.Debug("[insert]", zap.String("sql", sql)) + lastUpstreamData = nil + case 0: + // update + sql = df.downstream.GenerateFixSQL(source.Replace, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) + rowsAdd++ + rowsDelete++ + log.Debug("[update]", zap.String("sql", sql)) + lastUpstreamData = nil + lastDownstreamData = nil + } + + dml.sqls = append(dml.sqls, sql) + } + dml.rowAdd = rowsAdd + dml.rowDelete = rowsDelete + return equal, nil +} + +// WriteSQLs write sqls to file +func (df *Diff) writeSQLs(ctx context.Context) { + log.Info("start writeSQLs goroutine") + defer func() { + log.Info("close writeSQLs goroutine") + df.sqlWg.Done() + }() + for { + select { + case <-ctx.Done(): + return + case dml, ok := <-df.sqlCh: + if !ok && dml == nil { + log.Info("write sql channel closed") + return + } + if len(dml.sqls) > 0 { + tableDiff := df.downstream.GetTables()[dml.node.GetTableIndex()] + fileName := fmt.Sprintf("%s:%s:%s.sql", tableDiff.Schema, tableDiff.Table, utils.GetSQLFileName(dml.node.GetID())) + fixSQLPath := filepath.Join(df.FixSQLDir, fileName) + if ok := ioutil2.FileExists(fixSQLPath); ok { + // unreachable + log.Fatal("write sql failed: repeat sql happen", zap.Strings("sql", dml.sqls)) + } + fixSQLFile, err := os.Create(fixSQLPath) + if err != nil { + log.Fatal("write sql failed: cannot create file", zap.Strings("sql", dml.sqls), zap.Error(err)) + continue + } + // write chunk meta + chunkRange := dml.node.ChunkRange + fixSQLFile.WriteString(fmt.Sprintf("-- table: %s.%s\n-- %s\n", tableDiff.Schema, tableDiff.Table, chunkRange.ToMeta())) + if tableDiff.NeedUnifiedTimeZone { + fixSQLFile.WriteString(fmt.Sprintf("set @@session.time_zone = \"%s\";\n", config.UnifiedTimeZone)) + } + for _, sql := range dml.sqls { + _, err = fixSQLFile.WriteString(fmt.Sprintf("%s\n", sql)) + if err != nil { + log.Fatal("write sql failed", zap.String("sql", sql), zap.Error(err)) + } + } + fixSQLFile.Close() + } + log.Debug("insert node", zap.Any("chunk index", dml.node.GetID())) + df.cp.Insert(dml.node) + } + } +} + +func (df *Diff) removeSQLFiles(checkPointId *chunk.ChunkID) error { + ts := time.Now().Format("2006-01-02T15:04:05Z07:00") + dirName := fmt.Sprintf(".trash-%s", ts) + folderPath := filepath.Join(df.FixSQLDir, dirName) + + if _, err := os.Stat(folderPath); os.IsNotExist(err) { + err = os.MkdirAll(folderPath, os.ModePerm) + if err != nil { + return errors.Trace(err) + } + defer os.RemoveAll(folderPath) + } + + err := filepath.Walk(df.FixSQLDir, func(path string, f fs.FileInfo, err error) error { + if os.IsNotExist(err) { + // if path not exists, we should return nil to continue. + return nil + } + if err != nil { + return errors.Trace(err) + } + + if f == nil || f.IsDir() { + return nil + } + + name := f.Name() + // in mac osx, the path parameter is absolute path; in linux, the path is relative path to execution base dir, + // so use Rel to convert to relative path to l.base + relPath, _ := filepath.Rel(df.FixSQLDir, path) + oldPath := filepath.Join(df.FixSQLDir, relPath) + newPath := filepath.Join(folderPath, relPath) + if strings.Contains(oldPath, ".trash") { + return nil + } + + if strings.HasSuffix(name, ".sql") { + fileIDStr := strings.TrimRight(name, ".sql") + fileIDSubstrs := strings.SplitN(fileIDStr, ":", 3) + if len(fileIDSubstrs) != 3 { + return nil + } + tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := utils.GetChunkIDFromSQLFileName(fileIDSubstrs[2]) + if err != nil { + return errors.Trace(err) + } + fileID := &chunk.ChunkID{ + TableIndex: tableIndex, BucketIndexLeft: bucketIndexLeft, BucketIndexRight: bucketIndexRight, ChunkIndex: chunkIndex, ChunkCnt: 0, + } + if err != nil { + return errors.Trace(err) + } + if fileID.Compare(checkPointId) > 0 { + // move to trash + err = os.Rename(oldPath, newPath) + if err != nil { + return errors.Trace(err) + } + } + } + return nil + }) + if err != nil { + return errors.Trace(err) + } + return nil +} + +func setTiDBCfg() { + // to support long index key in TiDB + tidbCfg := tidbconfig.GetGlobalConfig() + // 3027 * 4 is the max value the MaxIndexLength can be set + tidbCfg.MaxIndexLength = tidbconfig.DefMaxOfMaxIndexLength + tidbconfig.StoreGlobalConfig(tidbCfg) + + log.Debug("set tidb cfg") +} diff --git a/sync_diff_inspector/main.go b/sync_diff_inspector/main.go new file mode 100644 index 00000000000..761fe3f026e --- /dev/null +++ b/sync_diff_inspector/main.go @@ -0,0 +1,150 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + _ "github.com/go-sql-driver/mysql" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/charset" + "github.com/pingcap/tidb/pkg/util" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/diff" + flag "github.com/spf13/pflag" + "go.uber.org/zap" +) + +func init() { + c := &charset.Charset{ + Name: "gbk", + DefaultCollation: "gbk_chinese_ci", + Collations: map[string]*charset.Collation{}, + Maxlen: 2, + } + charset.AddCharset(c) + for _, coll := range charset.GetSupportedCollations() { + if strings.EqualFold(coll.CharsetName, c.Name) { + charset.AddCollation(coll) + } + } +} + +func main() { + cfg := config.NewConfig() + err := cfg.Parse(os.Args[1:]) + switch errors.Cause(err) { + case nil: + case flag.ErrHelp: + os.Exit(0) + default: + fmt.Printf("Error: %s\n", err.Error()) + cfg.FlagSet.PrintDefaults() + os.Exit(2) + } + + if cfg.PrintVersion { + fmt.Print(util.GetRawInfo("sync_diff_inspector")) + return + } + + if cfg.Template != "" { + if err := config.ExportTemplateConfig(cfg.Template); err != nil { + fmt.Printf("%s\n", err.Error()) + os.Exit(2) + } + return + } + + conf := new(log.Config) + conf.Level = cfg.LogLevel + + conf.File.Filename = filepath.Join(cfg.Task.OutputDir, config.LogFileName) + lg, p, e := log.InitLogger(conf) + if e != nil { + log.Error("Log init failed!", zap.String("error", e.Error())) + os.Exit(2) + } + log.ReplaceGlobals(lg, p) + + util.PrintInfo("sync_diff_inspector") + + // Initial config + err = cfg.Init() + if err != nil { + fmt.Printf("Fail to initialize config.\n%s\n", err.Error()) + os.Exit(2) + } + + ok := cfg.CheckConfig() + if !ok { + fmt.Printf("There is something wrong with your config, please check log info in %s\n", conf.File.Filename) + os.Exit(2) + } + + log.Info("", zap.Stringer("config", cfg)) + + ctx := context.Background() + if !checkSyncState(ctx, cfg) { + log.Warn("check failed!!!") + os.Exit(1) + } + log.Info("check pass!!!") +} + +func checkSyncState(ctx context.Context, cfg *config.Config) bool { + beginTime := time.Now() + defer func() { + log.Info("check data finished", zap.Duration("cost", time.Since(beginTime))) + }() + + d, err := diff.NewDiff(ctx, cfg) + if err != nil { + fmt.Printf("An error occured while initializing diff: %s, please check log info in %s for full details\n", + err, filepath.Join(cfg.Task.OutputDir, config.LogFileName)) + log.Fatal("failed to initialize diff process", zap.Error(err)) + return false + } + defer d.Close() + + if !cfg.CheckDataOnly { + err = d.StructEqual(ctx) + if err != nil { + fmt.Printf("An error occured while comparing table structure: %s, please check log info in %s for full details\n", + err, filepath.Join(cfg.Task.OutputDir, config.LogFileName)) + log.Fatal("failed to check structure difference", zap.Error(err)) + return false + } + } else { + log.Info("Check table data only, skip struct check") + } + if !cfg.CheckStructOnly { + err = d.Equal(ctx) + if err != nil { + fmt.Printf("An error occured while comparing table data: %s, please check log info in %s for full details\n", + err, filepath.Join(cfg.Task.OutputDir, config.LogFileName)) + log.Fatal("failed to check data difference", zap.Error(err)) + return false + } + } else { + log.Info("Check table struct only, skip data check") + } + return d.PrintSummary(ctx) +} diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go new file mode 100644 index 00000000000..b559ac85937 --- /dev/null +++ b/sync_diff_inspector/progress/progress.go @@ -0,0 +1,480 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package progress + +import ( + "container/list" + "fmt" + "io" + "os" + "strings" + "time" + + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" +) + +type TableProgressPrinter struct { + tableList *list.List + tableFailList *list.List + tableMap map[string]*list.Element + output io.Writer + lines int + + progressTableNums int + finishTableNums int + tableNums int + + progress int + total int + + optCh chan Operator + finishCh chan struct{} +} + +type table_state_t int + +const ( + TABLE_STATE_REGISTER table_state_t = 0x1 + TABLE_STATE_PRESTART table_state_t = 0x2 + TABLE_STATE_COMPARING table_state_t = 0x4 + TABLE_STATE_FINISH table_state_t = 0x8 + TABLE_STATE_RESULT_OK table_state_t = 0x00 + TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE table_state_t = 0x10 + TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE table_state_t = 0x20 + TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS table_state_t = 0x40 + TABLE_STATE_RESULT_DIFFERENT table_state_t = 0x80 + TABLE_STATE_HEAD table_state_t = 0xff + TABLE_STATE_RESULT_MASK table_state_t = 0xff0 + TABLE_STATE_NOT_EXSIT_UPSTREAM table_state_t = 0x100 + TABLE_STATE_NOT_EXSIT_DOWNSTREAM table_state_t = 0x200 +) + +type TableProgress struct { + name string + progress int + total int + state table_state_t + totalStopUpdate bool +} + +type progress_opt_t int + +const ( + PROGRESS_OPT_INC progress_opt_t = iota + PROGRESS_OPT_UPDATE + PROGRESS_OPT_REGISTER + PROGRESS_OPT_START + PROGRESS_OPT_FAIL + PROGRESS_OPT_CLOSE + PROGRESS_OPT_ERROR +) + +type Operator struct { + optType progress_opt_t + name string + total int + state table_state_t + totalStopUpdate bool +} + +func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressPrinter { + tpp := &TableProgressPrinter{ + tableList: list.New(), + tableFailList: list.New(), + tableMap: make(map[string]*list.Element), + lines: 0, + + progressTableNums: 0, + finishTableNums: finishTableNums, + tableNums: tableNums, + + progress: 0, + total: 0, + + optCh: make(chan Operator, 16), + finishCh: make(chan struct{}), + } + tpp.init() + go tpp.serve() + fmt.Fprintf(tpp.output, "A total of %d tables need to be compared\n\n\n", tableNums) + return tpp +} + +func (tpp *TableProgressPrinter) SetOutput(output io.Writer) { + tpp.output = output +} + +func (tpp *TableProgressPrinter) Inc(name string) { + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_INC, + name: name, + } +} + +func (tpp *TableProgressPrinter) UpdateTotal(name string, total int, stopUpdate bool) { + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_UPDATE, + name: name, + total: total, + totalStopUpdate: stopUpdate, + } +} + +func (tpp *TableProgressPrinter) RegisterTable(name string, isFailed bool, isDone bool, isExist int) { + var state table_state_t + if isFailed { + if isDone { + switch isExist { + case common.UpstreamTableLackFlag: + state = TABLE_STATE_NOT_EXSIT_UPSTREAM | TABLE_STATE_REGISTER + case common.DownstreamTableLackFlag: + state = TABLE_STATE_NOT_EXSIT_DOWNSTREAM | TABLE_STATE_REGISTER + default: + state = TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE | TABLE_STATE_REGISTER + } + } else { + state = TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE | TABLE_STATE_REGISTER + } + } else { + state = TABLE_STATE_REGISTER + } + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_REGISTER, + name: name, + state: state, + } +} + +func (tpp *TableProgressPrinter) StartTable(name string, total int, stopUpdate bool) { + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_START, + name: name, + total: total, + state: TABLE_STATE_PRESTART, + totalStopUpdate: stopUpdate, + } +} + +func (tpp *TableProgressPrinter) FailTable(name string) { + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_FAIL, + name: name, + state: TABLE_STATE_RESULT_DIFFERENT, + } +} + +func (tpp *TableProgressPrinter) Close() { + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_CLOSE, + } + <-tpp.finishCh +} + +func (tpp *TableProgressPrinter) PrintSummary() { + var cleanStr, fixStr string + cleanStr = "\x1b[1A\x1b[J" + fixStr = "\nSummary:\n\n" + if tpp.tableFailList.Len() == 0 { + fixStr = fmt.Sprintf( + "%sA total of %d tables have been compared and all are equal.\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n", + fixStr, + tpp.tableNums, + ) + } else { + SkippedNum := 0 + for p := tpp.tableFailList.Front(); p != nil; p = p.Next() { + tp := p.Value.(*TableProgress) + if tp.state&(TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE|TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE) != 0 { + fixStr = fmt.Sprintf("%sThe structure of %s is not equal.\n", fixStr, tp.name) + } + if tp.state&(TABLE_STATE_RESULT_DIFFERENT) != 0 { + fixStr = fmt.Sprintf("%sThe data of %s is not equal.\n", fixStr, tp.name) + } + if tp.state&(TABLE_STATE_NOT_EXSIT_DOWNSTREAM) != 0 { + fixStr = fmt.Sprintf("%sThe data of %s does not exist in downstream database.\n", fixStr, tp.name) + SkippedNum++ + } + if tp.state&(TABLE_STATE_NOT_EXSIT_UPSTREAM) != 0 { + fixStr = fmt.Sprintf("%sThe data of %s does not exist in upstream database.\n", fixStr, tp.name) + SkippedNum++ + } + } + fixStr = fmt.Sprintf( + "%s\nThe rest of the tables are all equal.\nA total of %d tables have been compared, %d tables finished, %d tables failed, %d tables skipped.\nThe patch file has been generated to './output_dir/patch.sql'\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n", + fixStr, tpp.tableNums, tpp.tableNums-tpp.tableFailList.Len(), tpp.tableFailList.Len()-SkippedNum, SkippedNum, + ) + } + + fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr) + +} + +func (tpp *TableProgressPrinter) Error(err error) { + tpp.optCh <- Operator{ + optType: PROGRESS_OPT_ERROR, + } + <-tpp.finishCh + var cleanStr, fixStr string + cleanStr = "\x1b[1A\x1b[J" + fixStr = fmt.Sprintf("\nError in comparison process:\n%v\n\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n", err) + fmt.Fprintf(tpp.output, "%s%s", cleanStr, fixStr) +} + +func (tpp *TableProgressPrinter) init() { + tpp.tableList.PushBack(&TableProgress{ + state: TABLE_STATE_HEAD, + }) + + tpp.output = os.Stdout +} + +func (tpp *TableProgressPrinter) serve() { + tick := time.NewTicker(200 * time.Millisecond) + + for { + select { + case <-tick.C: + tpp.flush(false) + case opt := <-tpp.optCh: + switch opt.optType { + case PROGRESS_OPT_CLOSE: + tpp.flush(false) + tpp.finishCh <- struct{}{} + return + case PROGRESS_OPT_ERROR: + tpp.finishCh <- struct{}{} + return + case PROGRESS_OPT_INC: + if e, ok := tpp.tableMap[opt.name]; ok { + tp := e.Value.(*TableProgress) + tp.progress++ + tpp.progress++ + if tp.progress >= tp.total && tp.totalStopUpdate { + tp.state = (tp.state & TABLE_STATE_RESULT_MASK) | TABLE_STATE_FINISH + tpp.progress -= tp.progress + tpp.total -= tp.total + delete(tpp.tableMap, opt.name) + tpp.flush(true) + } + } + case PROGRESS_OPT_REGISTER: + if _, ok := tpp.tableMap[opt.name]; !ok { + e := tpp.tableList.PushBack(&TableProgress{ + name: opt.name, + progress: 0, + total: opt.total, + state: opt.state, + totalStopUpdate: opt.totalStopUpdate, + }) + tpp.tableMap[opt.name] = e + } + case PROGRESS_OPT_START: + e, ok := tpp.tableMap[opt.name] + if !ok { + e = tpp.tableList.PushBack(&TableProgress{ + name: opt.name, + progress: 0, + total: opt.total, + state: opt.state | TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS, + totalStopUpdate: opt.totalStopUpdate, + }) + tpp.tableMap[opt.name] = e + } else { + tp := e.Value.(*TableProgress) + tp.state ^= TABLE_STATE_REGISTER | opt.state + tp.progress = 0 + tp.total = opt.total + tp.totalStopUpdate = opt.totalStopUpdate + } + if e.Value.(*TableProgress).state&TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE == 0 { + tpp.total += opt.total + } else { + delete(tpp.tableMap, opt.name) + } + tpp.flush(true) + case PROGRESS_OPT_UPDATE: + if e, ok := tpp.tableMap[opt.name]; ok { + tp := e.Value.(*TableProgress) + tpp.total += opt.total + tp.total += opt.total + tp.totalStopUpdate = opt.totalStopUpdate + } + case PROGRESS_OPT_FAIL: + if e, ok := tpp.tableMap[opt.name]; ok { + tp := e.Value.(*TableProgress) + tp.state |= opt.state + // continue to increment chunk + } + } + } + } +} + +// flush flush info +func (tpp *TableProgressPrinter) flush(stateIsChanged bool) { + /* + * A total of 15 tables need to be compared + * + * Comparing the table structure of `schema1.table1` ... equivalent + * Comparing the table data of `schema1.table1` ... equivalent + * Comparing the table structure of `schema2.table2` ... equivalent + * Comparing the table data of `schema2.table2` ... + * _____________________________________________________________________________ + * Progress [===================>-----------------------------------------] 35% + * + */ + + if stateIsChanged { + var cleanStr, fixStr, dynStr string + cleanStr = fmt.Sprintf("\x1b[%dA\x1b[J", tpp.lines) + tpp.lines = 2 + /* PRESTART/COMPARING/FINISH OK/DIFFERENT */ + for p := tpp.tableList.Front(); p != nil; p = p.Next() { + tp := p.Value.(*TableProgress) + // There are 5 situations: + // 1. structure is same and data is same + // 2. structure is same and data is different + // 3. structure is different and we won't compare data + // 4. structure is different and data is same + // 5. structure is different and data is different + switch tp.state & 0xf { + case TABLE_STATE_PRESTART: + switch tp.state & TABLE_STATE_RESULT_MASK { + case TABLE_STATE_RESULT_OK: + fixStr = fmt.Sprintf("%sComparing the table structure of %s ... equivalent\n", fixStr, tp.name) + dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) + tpp.lines++ + tpp.progressTableNums++ + tp.state = TABLE_STATE_COMPARING + case TABLE_STATE_NOT_EXSIT_UPSTREAM, TABLE_STATE_NOT_EXSIT_DOWNSTREAM: + dynStr = fmt.Sprintf("%sComparing the table data of %s ...skipped\n", dynStr, tp.name) + tpp.tableFailList.PushBack(tp) + preNode := p.Prev() + tpp.tableList.Remove(p) + p = preNode + tpp.finishTableNums++ + case TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE: + fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name) + tpp.tableFailList.PushBack(tp) + // we have empty node as list head, so p is not nil + preNode := p.Prev() + tpp.tableList.Remove(p) + p = preNode + tpp.finishTableNums++ + case TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE: + fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name) + dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) + tpp.lines++ + tpp.progressTableNums++ + tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART + case TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS: + fixStr = fmt.Sprintf("%sComparing the table structure of %s ... skip\n", fixStr, tp.name) + dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) + tpp.lines++ + tpp.progressTableNums++ + tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART + } + case TABLE_STATE_COMPARING: + dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) + tpp.lines++ + case TABLE_STATE_FINISH: + if tp.state&TABLE_STATE_RESULT_DIFFERENT == 0 { + fixStr = fmt.Sprintf("%sComparing the table data of %s ... equivalent\n", fixStr, tp.name) + } else { + fixStr = fmt.Sprintf("%sComparing the table data of %s ... failure\n", fixStr, tp.name) + } + if tp.state&TABLE_STATE_RESULT_MASK != 0 { + tpp.tableFailList.PushBack(tp) + } + // we have empty node as list head, so p is not nil + preNode := p.Prev() + tpp.tableList.Remove(p) + p = preNode + tpp.progressTableNums-- + tpp.finishTableNums++ + } + } + + dynStr = fmt.Sprintf("%s_____________________________________________________________________________\n", dynStr) + fmt.Fprintf(tpp.output, "%s%s%s", cleanStr, fixStr, dynStr) + } else { + fmt.Fprint(tpp.output, "\x1b[1A\x1b[J") + } + // show bar + // 60 '='+'-' + coe := float32(tpp.progressTableNums*tpp.progress)/float32(tpp.tableNums*(tpp.total+1)) + float32(tpp.finishTableNums)/float32(tpp.tableNums) + numLeft := int(60 * coe) + percent := int(100 * coe) + fmt.Fprintf(tpp.output, "Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total) +} + +var progress_ *TableProgressPrinter = nil + +func Init(tableNums, finishTableNums int) { + progress_ = NewTableProgressPrinter(tableNums, finishTableNums) +} + +func Inc(name string) { + if progress_ != nil { + progress_.Inc(name) + } +} + +func UpdateTotal(name string, total int, stopUpdate bool) { + if progress_ != nil { + progress_.UpdateTotal(name, total, stopUpdate) + } +} + +func RegisterTable(name string, isFailed bool, isDone bool, isExist int) { + if progress_ != nil { + progress_.RegisterTable(name, isFailed, isDone, isExist) + } +} + +func StartTable(name string, total int, stopUpdate bool) { + if progress_ != nil { + progress_.StartTable(name, total, stopUpdate) + } +} + +func FailTable(name string) { + if progress_ != nil { + progress_.FailTable(name) + } +} + +func Close() { + if progress_ != nil { + progress_.Close() + } +} + +func PrintSummary() { + if progress_ != nil { + progress_.PrintSummary() + } +} + +func Error(err error) { + if progress_ != nil { + progress_.Error(err) + } +} + +func SetOutput(output io.Writer) { + if progress_ != nil { + progress_.SetOutput(output) + } +} diff --git a/sync_diff_inspector/progress/progress_test.go b/sync_diff_inspector/progress/progress_test.go new file mode 100644 index 00000000000..7393f93c022 --- /dev/null +++ b/sync_diff_inspector/progress/progress_test.go @@ -0,0 +1,108 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package progress + +import ( + "bytes" + "errors" + "testing" + "time" + + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/stretchr/testify/require" +) + +func TestProgress(t *testing.T) { + p := NewTableProgressPrinter(6, 0) + p.RegisterTable("1", true, true, common.AllTableExistFlag) + p.StartTable("1", 50, true) + p.RegisterTable("2", true, false, common.AllTableExistFlag) + p.StartTable("2", 2, true) + p.Inc("2") + p.RegisterTable("3", false, false, common.AllTableExistFlag) + p.StartTable("3", 1, false) + p.Inc("2") + p.Inc("3") + p.UpdateTotal("3", 1, true) + p.Inc("3") + p.StartTable("4", 1, true) + p.FailTable("4") + p.Inc("3") + p.Inc("4") + p.RegisterTable("5", true, true, common.UpstreamTableLackFlag) + p.StartTable("5", 1, true) + p.RegisterTable("6", true, true, common.DownstreamTableLackFlag) + p.StartTable("6", 1, true) + time.Sleep(500 * time.Millisecond) + p.Close() + buffer := new(bytes.Buffer) + p.SetOutput(buffer) + p.PrintSummary() + require.Equal( + t, + "\x1b[1A\x1b[J\nSummary:\n\nThe structure of 1 is not equal.\nThe structure of 2 is not equal.\nThe data of 4 is not equal.\nThe data of 5 does not exist in upstream database.\nThe data of 6 does not exist in downstream database.\n"+ + "\nThe rest of the tables are all equal.\nA total of 6 tables have been compared, 1 tables finished, 3 tables failed, 2 tables skipped.\nThe patch file has been generated to './output_dir/patch.sql'\n"+ + "You can view the comparison details through './output_dir/sync_diff_inspector.log'\n\n", + buffer.String(), + ) +} + +func TestTableError(t *testing.T) { + p := NewTableProgressPrinter(4, 0) + p.RegisterTable("1", true, true, common.AllTableExistFlag) + p.StartTable("1", 50, true) + p.RegisterTable("2", true, true, common.AllTableExistFlag) + p.StartTable("2", 1, true) + p.RegisterTable("3", true, true, common.DownstreamTableLackFlag) + p.StartTable("3", 1, true) + + p.Inc("2") + buffer := new(bytes.Buffer) + p.SetOutput(buffer) + p.Error(errors.New("[aaa]")) + time.Sleep(500 * time.Millisecond) + require.Equal( + t, + "\x1b[0A\x1b[JComparing the table structure of 1 ... failure\n"+ + "_____________________________________________________________________________\n"+ + "Progress [===============>---------------------------------------------] 25% 0/0\n"+ + "\x1b[2A\x1b[JComparing the table structure of 2 ... failure\n"+ + "_____________________________________________________________________________\n"+ + "Progress [==============================>------------------------------] 50% 0/0\n"+ + "\x1b[2A\x1b[JComparing the table data of 3 ...skipped\n"+ + "_____________________________________________________________________________\n"+ + "Progress [=============================================>---------------] 75% 0/1\n"+ + "\x1b[1A\x1b[J\nError in comparison process:\n[aaa]\n\n"+ + "You can view the comparison details through './output_dir/sync_diff_inspector.log'\n", + buffer.String(), + ) +} + +func TestAllSuccess(t *testing.T) { + Init(2, 0) + RegisterTable("1", false, false, common.AllTableExistFlag) + StartTable("1", 1, true) + RegisterTable("2", false, false, common.AllTableExistFlag) + StartTable("2", 1, true) + Inc("1") + Inc("2") + Close() + buf := new(bytes.Buffer) + SetOutput(buf) + PrintSummary() + require.Equal(t, buf.String(), "\x1b[1A\x1b[J\nSummary:\n\n"+ + "A total of 2 tables have been compared and all are equal.\n"+ + "You can view the comparison details through './output_dir/sync_diff_inspector.log'\n\n", + ) +} diff --git a/sync_diff_inspector/report/report.go b/sync_diff_inspector/report/report.go new file mode 100644 index 00000000000..ba58878e8a5 --- /dev/null +++ b/sync_diff_inspector/report/report.go @@ -0,0 +1,419 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package report + +import ( + "context" + "database/sql" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/olekukonko/tablewriter" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +const ( + // Pass means all data and struct of tables are equal + Pass = "pass" + // Fail means not all data or struct of tables are equal + Fail = "fail" + Error = "error" +) + +// ReportConfig stores the config information for the user +type ReportConfig struct { + Host string `toml:"host"` + Port int `toml:"port"` + User string `toml:"user"` + Snapshot string `toml:"snapshot,omitempty"` + SqlMode string `toml:"sql-mode,omitempty"` +} + +// TableResult saves the check result for every table. +type TableResult struct { + Schema string `json:"schema"` + Table string `json:"table"` + StructEqual bool `json:"struct-equal"` + DataSkip bool `json:"data-skip"` + DataEqual bool `json:"data-equal"` + MeetError error `json:"-"` + ChunkMap map[string]*ChunkResult `json:"chunk-result"` // `ChunkMap` stores the `ChunkResult` of each chunk of the table + UpCount int64 `json:"up-count"` // `UpCount` is the number of rows in the table from upstream + DownCount int64 `json:"down-count"` // `DownCount` is the number of rows in the table from downstream + TableLack int `json:"table-lack"` +} + +// ChunkResult save the necessarily information to provide summary information +type ChunkResult struct { + RowsAdd int `json:"rows-add"` // `RowsAdd` is the number of rows needed to add + RowsDelete int `json:"rows-delete"` // `RowsDelete` is the number of rows needed to delete +} + +// Report saves the check results. +type Report struct { + sync.RWMutex + Result string `json:"-"` // Result is pass or fail + PassNum int32 `json:"-"` // The pass number of tables + FailedNum int32 `json:"-"` // The failed number of tables + SkippedNum int32 `json:"-"` // The skipped number of tables + TableResults map[string]map[string]*TableResult `json:"table-results"` // TableResult saved the map of `schema` => `table` => `tableResult` + StartTime time.Time `json:"start-time"` + Duration time.Duration `json:"time-duration"` + TotalSize int64 `json:"-"` // Total size of the checked tables + SourceConfig [][]byte `json:"-"` + TargetConfig []byte `json:"-"` + + task *config.TaskConfig `json:"-"` +} + +// LoadReport loads the report from the checkpoint +func (r *Report) LoadReport(reportInfo *Report) { + r.StartTime = time.Now() + r.Duration = reportInfo.Duration + r.TotalSize = reportInfo.TotalSize + for schema, tableMap := range reportInfo.TableResults { + if _, ok := r.TableResults[schema]; !ok { + r.TableResults[schema] = make(map[string]*TableResult) + } + for table, result := range tableMap { + r.TableResults[schema][table] = result + } + } +} + +func (r *Report) getSortedTables() [][]string { + equalTables := make([][]string, 0) + for schema, tableMap := range r.TableResults { + for table, result := range tableMap { + if result.StructEqual && result.DataEqual { + equalRow := make([]string, 0, 3) + equalRow = append(equalRow, dbutil.TableName(schema, table)) + equalRow = append(equalRow, strconv.FormatInt(result.UpCount, 10)) + equalRow = append(equalRow, strconv.FormatInt(result.DownCount, 10)) + equalTables = append(equalTables, equalRow) + } + } + } + sort.Slice(equalTables, func(i, j int) bool { return equalTables[i][0] < equalTables[j][0] }) + return equalTables +} + +func (r *Report) getDiffRows() [][]string { + diffRows := make([][]string, 0) + for schema, tableMap := range r.TableResults { + for table, result := range tableMap { + if result.StructEqual && result.DataEqual { + continue + } + diffRow := make([]string, 0) + diffRow = append(diffRow, dbutil.TableName(schema, table)) + if !common.AllTableExist(result.TableLack) { + diffRow = append(diffRow, "skipped") + } else { + diffRow = append(diffRow, "succeed") + } + if !result.StructEqual { + diffRow = append(diffRow, "false") + } else { + diffRow = append(diffRow, "true") + } + rowsAdd, rowsDelete := 0, 0 + for _, chunkResult := range result.ChunkMap { + rowsAdd += chunkResult.RowsAdd + rowsDelete += chunkResult.RowsDelete + } + diffRow = append(diffRow, fmt.Sprintf("+%d/-%d", rowsAdd, rowsDelete), strconv.FormatInt(result.UpCount, 10), strconv.FormatInt(result.DownCount, 10)) + diffRows = append(diffRows, diffRow) + } + } + return diffRows +} + +// CalculateTotalSize calculate the total size of all the checked tables +// Notice, user should run the analyze table first, when some of tables' size are zero. +func (r *Report) CalculateTotalSize(ctx context.Context, db *sql.DB) { + for schema, tableMap := range r.TableResults { + for table := range tableMap { + size, err := utils.GetTableSize(ctx, db, schema, table) + if size == 0 || err != nil { + log.Warn("fail to get the correct size of table, if you want to get the correct size, please analyze the corresponding tables", zap.String("table", dbutil.TableName(schema, table)), zap.Error(err)) + } else { + r.TotalSize += size + } + } + } +} + +// CommitSummary commit summary info +func (r *Report) CommitSummary() error { + passNum, failedNum, skippedNum := int32(0), int32(0), int32(0) + for _, tableMap := range r.TableResults { + for _, result := range tableMap { + if result.StructEqual && result.DataEqual { + passNum++ + } else if !common.AllTableExist(result.TableLack) { + skippedNum++ + } else { + failedNum++ + } + } + } + r.PassNum = passNum + r.FailedNum = failedNum + r.SkippedNum = skippedNum + summaryPath := filepath.Join(r.task.OutputDir, "summary.txt") + summaryFile, err := os.Create(summaryPath) + if err != nil { + return errors.Trace(err) + } + defer summaryFile.Close() + summaryFile.WriteString("Summary\n\n\n\n") + summaryFile.WriteString("Source Database\n\n\n\n") + for i := 0; i < len(r.SourceConfig); i++ { + summaryFile.Write(r.SourceConfig[i]) + summaryFile.WriteString("\n") + } + summaryFile.WriteString("Target Databases\n\n\n\n") + summaryFile.Write(r.TargetConfig) + summaryFile.WriteString("\n") + + summaryFile.WriteString("Comparison Result\n\n\n\n") + summaryFile.WriteString("The table structure and data in following tables are equivalent\n\n") + equalTables := r.getSortedTables() + if len(equalTables) > 0 { + tableString := &strings.Builder{} + table := tablewriter.NewWriter(tableString) + table.SetHeader([]string{"Table", "UpCount", "DownCount"}) + for _, v := range equalTables { + table.Append(v) + } + table.Render() + summaryFile.WriteString(tableString.String()) + summaryFile.WriteString("\n\n") + } + if r.Result == Fail || r.SkippedNum != 0 { + summaryFile.WriteString("The following tables contains inconsistent data\n\n") + tableString := &strings.Builder{} + table := tablewriter.NewWriter(tableString) + table.SetHeader([]string{"Table", "Result", "Structure equality", "Data diff rows", "UpCount", "DownCount"}) + diffRows := r.getDiffRows() + for _, v := range diffRows { + table.Append(v) + } + table.Render() + summaryFile.WriteString(tableString.String()) + } + duration := r.Duration + time.Since(r.StartTime) + summaryFile.WriteString(fmt.Sprintf("\nTime Cost: %s\n", duration)) + summaryFile.WriteString(fmt.Sprintf("Average Speed: %fMB/s\n", float64(r.TotalSize)/(1024.0*1024.0*duration.Seconds()))) + return nil +} + +func (r *Report) Print(w io.Writer) error { + var summary strings.Builder + if r.Result == Pass && r.SkippedNum == 0 { + summary.WriteString(fmt.Sprintf("A total of %d table have been compared and all are equal.\n", r.FailedNum+r.PassNum+r.SkippedNum)) + summary.WriteString(fmt.Sprintf("You can view the comparison details through '%s/%s'\n", r.task.OutputDir, config.LogFileName)) + } else if r.Result == Fail || r.SkippedNum != 0 { + for schema, tableMap := range r.TableResults { + for table, result := range tableMap { + if !result.StructEqual { + if result.DataSkip { + switch result.TableLack { + case common.UpstreamTableLackFlag: + summary.WriteString(fmt.Sprintf("The data of %s does not exist in upstream database\n", dbutil.TableName(schema, table))) + case common.DownstreamTableLackFlag: + summary.WriteString(fmt.Sprintf("The data of %s does not exist in downstream database\n", dbutil.TableName(schema, table))) + default: + summary.WriteString(fmt.Sprintf("The structure of %s is not equal, and data-check is skipped\n", dbutil.TableName(schema, table))) + } + } else { + summary.WriteString(fmt.Sprintf("The structure of %s is not equal\n", dbutil.TableName(schema, table))) + } + } + if !result.DataEqual && common.AllTableExist(result.TableLack) { + summary.WriteString(fmt.Sprintf("The data of %s is not equal\n", dbutil.TableName(schema, table))) + } + } + } + summary.WriteString("\n") + summary.WriteString("The rest of tables are all equal.\n") + summary.WriteString("\n") + summary.WriteString(fmt.Sprintf("A total of %d tables have been compared, %d tables finished, %d tables failed, %d tables skipped.\n", r.FailedNum+r.PassNum+r.SkippedNum, r.PassNum, r.FailedNum, r.SkippedNum)) + summary.WriteString(fmt.Sprintf("The patch file has been generated in \n\t'%s/'\n", r.task.FixDir)) + summary.WriteString(fmt.Sprintf("You can view the comparison details through '%s/%s'\n", r.task.OutputDir, config.LogFileName)) + } else { + summary.WriteString("Error in comparison process:\n") + for schema, tableMap := range r.TableResults { + for table, result := range tableMap { + if result.MeetError != nil { + summary.WriteString(fmt.Sprintf("%s error occured in %s\n", result.MeetError.Error(), dbutil.TableName(schema, table))) + } + } + } + summary.WriteString(fmt.Sprintf("You can view the comparison details through '%s/%s'\n", r.task.OutputDir, config.LogFileName)) + } + fmt.Fprint(w, summary.String()) + return nil +} + +// NewReport returns a new Report. +func NewReport(task *config.TaskConfig) *Report { + return &Report{ + TableResults: make(map[string]map[string]*TableResult), + Result: Pass, + task: task, + } +} + +func (r *Report) Init(tableDiffs []*common.TableDiff, sourceConfig [][]byte, targetConfig []byte) { + r.StartTime = time.Now() + r.SourceConfig = sourceConfig + r.TargetConfig = targetConfig + for _, tableDiff := range tableDiffs { + schema, table := tableDiff.Schema, tableDiff.Table + if _, ok := r.TableResults[schema]; !ok { + r.TableResults[schema] = make(map[string]*TableResult) + } + r.TableResults[schema][table] = &TableResult{ + Schema: schema, + Table: table, + StructEqual: true, + DataEqual: true, + MeetError: nil, + ChunkMap: make(map[string]*ChunkResult), + } + } +} + +// SetTableStructCheckResult sets the struct check result for table. +func (r *Report) SetTableStructCheckResult(schema, table string, equal bool, skip bool, exist int) { + r.Lock() + defer r.Unlock() + tableResult := r.TableResults[schema][table] + tableResult.StructEqual = equal + tableResult.DataSkip = skip + tableResult.TableLack = exist + if !equal && common.AllTableExist(tableResult.TableLack) && r.Result != Error { + r.Result = Fail + } +} + +// SetTableDataCheckResult sets the data check result for table. +func (r *Report) SetTableDataCheckResult(schema, table string, equal bool, rowsAdd, rowsDelete int, upCount, downCount int64, id *chunk.ChunkID) { + r.Lock() + defer r.Unlock() + result := r.TableResults[schema][table] + result.UpCount += upCount + result.DownCount += downCount + if !equal { + result.DataEqual = equal + if _, ok := result.ChunkMap[id.ToString()]; !ok { + result.ChunkMap[id.ToString()] = &ChunkResult{ + RowsAdd: 0, + RowsDelete: 0, + } + } + result.ChunkMap[id.ToString()].RowsAdd += rowsAdd + result.ChunkMap[id.ToString()].RowsDelete += rowsDelete + if r.Result != Error && common.AllTableExist(result.TableLack) { + r.Result = Fail + } + } + if !equal && common.AllTableExist(result.TableLack) && r.Result != Error { + r.Result = Fail + } +} + +// SetTableMeetError sets meet error when check the table. +func (r *Report) SetTableMeetError(schema, table string, err error) { + r.Lock() + defer r.Unlock() + if _, ok := r.TableResults[schema]; !ok { + r.TableResults[schema] = make(map[string]*TableResult) + r.TableResults[schema][table] = &TableResult{ + MeetError: err, + } + return + } + + r.TableResults[schema][table].MeetError = err + r.Result = Error +} + +// GetSnapshot get the snapshot of the current state of the report, then we can restart the +// sync-diff and get the correct report state. +func (r *Report) GetSnapshot(chunkID *chunk.ChunkID, schema, table string) (*Report, error) { + r.RLock() + defer r.RUnlock() + targetID := utils.UniqueID(schema, table) + reserveMap := make(map[string]map[string]*TableResult) + for schema, tableMap := range r.TableResults { + reserveMap[schema] = make(map[string]*TableResult) + for table, result := range tableMap { + reportID := utils.UniqueID(schema, table) + if reportID >= targetID { + chunkRes := make(map[string]*ChunkResult) + reserveMap[schema][table] = &TableResult{ + Schema: result.Schema, + Table: result.Table, + StructEqual: result.StructEqual, + DataEqual: result.DataEqual, + MeetError: result.MeetError, + } + for id, chunkResult := range result.ChunkMap { + sid := new(chunk.ChunkID) + err := sid.FromString(id) + if err != nil { + return nil, errors.Trace(err) + } + if sid.Compare(chunkID) <= 0 { + chunkRes[id] = chunkResult + } + } + reserveMap[schema][table].ChunkMap = chunkRes + } + } + } + + result := r.Result + totalSize := r.TotalSize + duration := time.Since(r.StartTime) + task := r.task + return &Report{ + PassNum: 0, + FailedNum: 0, + Result: result, + TableResults: reserveMap, + StartTime: r.StartTime, + Duration: duration, + TotalSize: totalSize, + + task: task, + }, nil +} diff --git a/sync_diff_inspector/report/report_test.go b/sync_diff_inspector/report/report_test.go new file mode 100644 index 00000000000..ce49d1a8016 --- /dev/null +++ b/sync_diff_inspector/report/report_test.go @@ -0,0 +1,526 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package report + +import ( + "bytes" + "context" + "errors" + "os" + "path" + "testing" + + "github.com/BurntSushi/toml" + "github.com/DATA-DOG/go-sqlmock" + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/stretchr/testify/require" +) + +var task *config.TaskConfig = &config.TaskConfig{ + OutputDir: "output_dir", + FixDir: "output_dir/123456/fix-on-tidb1", + CheckpointDir: "output_dir/123456/checkpoint", +} + +func TestReport(t *testing.T) { + ctx := context.Background() + + db, mock, err := sqlmock.New() + require.NoError(t, err) + + report := NewReport(task) + createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + createTableSQL2 := "create table `atest`.`atbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + tableDiffs := []*common.TableDiff{ + { + Schema: "test", + Table: "tbl", + Info: tableInfo1, + Collation: "[123]", + }, + { + Schema: "atest", + Table: "atbl", + Info: tableInfo2, + Collation: "[123]", + }, + { + Schema: "ctest", + Table: "atbl", + Info: tableInfo2, + Collation: "[123]", + }, + { + Schema: "dtest", + Table: "atbl", + Info: tableInfo2, + Collation: "[123]", + }, + } + configs := []*ReportConfig{ + { + Host: "127.0.0.1", + Port: 3306, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 3307, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 4000, + User: "root", + }, + } + + configsBytes := make([][]byte, 3) + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + err := toml.NewEncoder(buf).Encode(configs[i]) + require.NoError(t, err) + configsBytes[i] = buf.Bytes() + } + report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) + + // Test CalculateTotal + mock.ExpectQuery("select sum.*").WillReturnRows(sqlmock.NewRows([]string{"data"}).AddRow("123")) + mock.ExpectQuery("select sum.*where table_schema=.*").WillReturnRows(sqlmock.NewRows([]string{"data"}).AddRow("456")) + report.CalculateTotalSize(ctx, db) + + // Test Table Report + report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 222, 222, &chunk.ChunkID{1, 1, 1, 1, 2}) + report.SetTableMeetError("test", "tbl", errors.New("eeee")) + + new_report := NewReport(task) + new_report.LoadReport(report) + + require.Equal(t, new_report.TotalSize, int64(579)) + result, ok := new_report.TableResults["test"]["tbl"] + require.True(t, ok) + require.Equal(t, result.MeetError.Error(), "eeee") + require.True(t, result.DataEqual) + require.True(t, result.StructEqual) + + require.Equal(t, new_report.getSortedTables(), [][]string{{"`atest`.`atbl`", "0", "0"}, {"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) + require.Equal(t, new_report.getDiffRows(), [][]string{}) + + new_report.SetTableStructCheckResult("atest", "atbl", true, false, common.AllTableExistFlag) + new_report.SetTableDataCheckResult("atest", "atbl", false, 111, 222, 333, 333, &chunk.ChunkID{1, 1, 1, 1, 2}) + require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) + require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "true", "+111/-222", "333", "333"}}) + + new_report.SetTableStructCheckResult("atest", "atbl", false, false, common.AllTableExistFlag) + require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) + require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "false", "+111/-222", "333", "333"}}) + + new_report.SetTableStructCheckResult("ctest", "atbl", false, true, common.AllTableExistFlag) + + new_report.SetTableStructCheckResult("dtest", "atbl", false, true, common.DownstreamTableLackFlag) + + buf := new(bytes.Buffer) + new_report.Print(buf) + info := buf.String() + require.Contains(t, info, "The structure of `atest`.`atbl` is not equal\n") + require.Contains(t, info, "The data of `atest`.`atbl` is not equal\n") + require.Contains(t, info, "The structure of `ctest`.`atbl` is not equal, and data-check is skipped\n") + require.Contains(t, info, "The data of `dtest`.`atbl` does not exist in downstream database\n") + require.Contains(t, info, "\n"+ + "The rest of tables are all equal.\n\n"+ + "A total of 0 tables have been compared, 0 tables finished, 0 tables failed, 0 tables skipped.\n"+ + "The patch file has been generated in \n\t'output_dir/123456/fix-on-tidb1/'\n"+ + "You can view the comparison details through 'output_dir/sync_diff.log'\n") +} + +func TestCalculateTotal(t *testing.T) { + ctx := context.Background() + + db, mock, err := sqlmock.New() + require.NoError(t, err) + + report := NewReport(task) + createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + tableDiffs := []*common.TableDiff{ + { + Schema: "test", + Table: "tbl", + Info: tableInfo, + Collation: "[123]", + }, + } + configs := []*ReportConfig{ + { + Host: "127.0.0.1", + Port: 3306, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 3307, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 4000, + User: "root", + }, + } + + configsBytes := make([][]byte, 3) + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + err := toml.NewEncoder(buf).Encode(configs[i]) + require.NoError(t, err) + configsBytes[i] = buf.Bytes() + } + report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) + + // Normal + mock.ExpectQuery("select sum.*").WillReturnRows(sqlmock.NewRows([]string{"data"}).AddRow("123")) + report.CalculateTotalSize(ctx, db) + require.Equal(t, report.TotalSize, int64(123)) +} + +func TestPrint(t *testing.T) { + report := NewReport(task) + createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + tableDiffs := []*common.TableDiff{ + { + Schema: "test", + Table: "tbl", + Info: tableInfo, + Collation: "[123]", + }, + { + Schema: "test", + Table: "tbl1", + Info: tableInfo, + Collation: "[123]", + }, + } + configs := []*ReportConfig{ + { + Host: "127.0.0.1", + Port: 3306, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 3307, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 4000, + User: "root", + }, + } + + configsBytes := make([][]byte, 3) + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + err := toml.NewEncoder(buf).Encode(configs[i]) + require.NoError(t, err) + configsBytes[i] = buf.Bytes() + } + report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) + + var buf *bytes.Buffer + // All Pass + report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 22, 22, &chunk.ChunkID{0, 0, 0, 0, 1}) + buf = new(bytes.Buffer) + report.Print(buf) + require.Equal(t, buf.String(), "A total of 0 table have been compared and all are equal.\n"+ + "You can view the comparison details through 'output_dir/sync_diff.log'\n") + + // Error + report.SetTableMeetError("test", "tbl1", errors.New("123")) + report.SetTableStructCheckResult("test", "tbl1", false, false, common.AllTableExistFlag) + buf = new(bytes.Buffer) + report.Print(buf) + require.Equal(t, buf.String(), "Error in comparison process:\n"+ + "123 error occured in `test`.`tbl1`\n"+ + "You can view the comparison details through 'output_dir/sync_diff.log'\n") +} + +func TestGetSnapshot(t *testing.T) { + report := NewReport(task) + createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New()) + require.NoError(t, err) + + tableDiffs := []*common.TableDiff{ + { + Schema: "test", + Table: "tbl", + Info: tableInfo1, + Collation: "[123]", + }, { + Schema: "atest", + Table: "tbl", + Info: tableInfo2, + Collation: "[123]", + }, { + Schema: "xtest", + Table: "tbl", + Info: tableInfo3, + Collation: "[123]", + }, + } + configs := []*ReportConfig{ + { + Host: "127.0.0.1", + Port: 3306, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 3307, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 4000, + User: "root", + }, + } + + configsBytes := make([][]byte, 3) + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + err := toml.NewEncoder(buf).Encode(configs[i]) + require.NoError(t, err) + configsBytes[i] = buf.Bytes() + } + report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) + + report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("test", "tbl", false, 100, 100, 200, 300, &chunk.ChunkID{0, 0, 0, 1, 10}) + report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 300, 300, &chunk.ChunkID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("test", "tbl", false, 200, 200, 400, 500, &chunk.ChunkID{0, 0, 0, 3, 10}) + + report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("atest", "tbl", false, 100, 100, 500, 600, &chunk.ChunkID{0, 0, 0, 0, 10}) + report.SetTableDataCheckResult("atest", "tbl", true, 0, 0, 600, 600, &chunk.ChunkID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("atest", "tbl", false, 200, 200, 700, 800, &chunk.ChunkID{0, 0, 0, 3, 10}) + + report.SetTableStructCheckResult("xtest", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("xtest", "tbl", false, 100, 100, 800, 900, &chunk.ChunkID{0, 0, 0, 0, 10}) + report.SetTableDataCheckResult("xtest", "tbl", true, 0, 0, 900, 900, &chunk.ChunkID{0, 0, 0, 1, 10}) + report.SetTableDataCheckResult("xtest", "tbl", false, 200, 200, 1000, 1100, &chunk.ChunkID{0, 0, 0, 3, 10}) + + report_snap, err := report.GetSnapshot(&chunk.ChunkID{0, 0, 0, 1, 10}, "test", "tbl") + require.NoError(t, err) + require.Equal(t, report_snap.TotalSize, report.TotalSize) + require.Equal(t, report_snap.Result, report.Result) + for key, value := range report.TableResults { + if _, ok := report_snap.TableResults[key]; !ok { + v, ok := value["tbl"] + require.True(t, ok) + require.Equal(t, v.Schema, "atest") + continue + } + + if _, ok := report_snap.TableResults[key]["tbl"]; !ok { + require.Equal(t, key, "atest") + continue + } + + v1 := value["tbl"] + v2 := report_snap.TableResults[key]["tbl"] + require.Equal(t, v1.Schema, v2.Schema) + require.Equal(t, v1.Table, v2.Table) + require.Equal(t, v1.StructEqual, v2.StructEqual) + require.Equal(t, v1.DataEqual, v2.DataEqual) + require.Equal(t, v1.MeetError, v2.MeetError) + + chunkMap1 := v1.ChunkMap + chunkMap2 := v2.ChunkMap + for id, r1 := range chunkMap1 { + sid := new(chunk.ChunkID) + if _, ok := chunkMap2[id]; !ok { + require.NoError(t, sid.FromString(id)) + require.Equal(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 3, 10}), 0) + continue + } + require.NoError(t, sid.FromString(id)) + require.True(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 1, 10}) <= 0) + r2 := chunkMap2[id] + require.Equal(t, r1.RowsAdd, r2.RowsAdd) + require.Equal(t, r1.RowsDelete, r2.RowsDelete) + } + + } +} + +func TestCommitSummary(t *testing.T) { + outputDir := "./" + report := NewReport(&config.TaskConfig{OutputDir: outputDir, FixDir: task.FixDir}) + createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New()) + require.NoError(t, err) + createTableSQL4 := "create table `xtest`.`tb1`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo4, err := dbutil.GetTableInfoBySQL(createTableSQL4, parser.New()) + require.NoError(t, err) + tableDiffs := []*common.TableDiff{ + { + Schema: "test", + Table: "tbl", + Info: tableInfo1, + Collation: "[123]", + }, { + Schema: "atest", + Table: "tbl", + Info: tableInfo2, + Collation: "[123]", + }, { + Schema: "xtest", + Table: "tbl", + Info: tableInfo3, + Collation: "[123]", + }, { + Schema: "ytest", + Table: "tbl", + Info: tableInfo3, + Collation: "[123]", + }, { + Schema: "xtest", + Table: "tb1", + Info: tableInfo4, + Collation: "[123]", + }, { + Schema: "xtest", + Table: "tb2", + Info: tableInfo4, + Collation: "[123]", + }, + } + configs := []*ReportConfig{ + { + Host: "127.0.0.1", + Port: 3306, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 3307, + User: "root", + }, + { + Host: "127.0.0.1", + Port: 4000, + User: "root", + }, + } + + configsBytes := make([][]byte, 3) + for i := 0; i < 3; i++ { + buf := new(bytes.Buffer) + err := toml.NewEncoder(buf).Encode(configs[i]) + require.NoError(t, err) + configsBytes[i] = buf.Bytes() + } + report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) + + report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 400, 400, &chunk.ChunkID{0, 0, 0, 1, 10}) + + report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("atest", "tbl", false, 100, 200, 500, 600, &chunk.ChunkID{0, 0, 0, 2, 10}) + + report.SetTableStructCheckResult("xtest", "tbl", false, false, common.AllTableExistFlag) + report.SetTableDataCheckResult("xtest", "tbl", false, 100, 200, 600, 700, &chunk.ChunkID{0, 0, 0, 3, 10}) + + report.SetTableStructCheckResult("xtest", "tb1", false, true, common.UpstreamTableLackFlag) + report.SetTableDataCheckResult("xtest", "tb1", false, 0, 200, 0, 200, &chunk.ChunkID{0, 0, 0, 4, 10}) + + report.SetTableStructCheckResult("xtest", "tb2", false, true, common.DownstreamTableLackFlag) + report.SetTableDataCheckResult("xtest", "tb2", false, 100, 0, 100, 0, &chunk.ChunkID{0, 0, 0, 5, 10}) + + err = report.CommitSummary() + require.NoError(t, err) + filename := path.Join(outputDir, "summary.txt") + file, err := os.Open(filename) + require.NoError(t, err) + + p := make([]byte, 2048) + file.Read(p) + str := string(p) + require.Contains(t, str, "Summary\n\n\n\n"+ + "Source Database\n\n\n\n"+ + "host = \"127.0.0.1\"\n"+ + "port = 3306\n"+ + "user = \"root\"\n\n"+ + "host = \"127.0.0.1\"\n"+ + "port = 3307\n"+ + "user = \"root\"\n\n"+ + "Target Databases\n\n\n\n"+ + "host = \"127.0.0.1\"\n"+ + "port = 4000\n"+ + "user = \"root\"\n\n"+ + "Comparison Result\n\n\n\n"+ + "The table structure and data in following tables are equivalent\n\n"+ + "+---------------+---------+-----------+\n"+ + "| TABLE | UPCOUNT | DOWNCOUNT |\n"+ + "+---------------+---------+-----------+\n"+ + "| `test`.`tbl` | 400 | 400 |\n"+ + "| `ytest`.`tbl` | 0 | 0 |\n"+ + "+---------------+---------+-----------+\n\n\n"+ + "The following tables contains inconsistent data\n\n"+ + "+---------------+---------+--------------------+----------------+---------+-----------+\n"+ + "| TABLE | RESULT | STRUCTURE EQUALITY | DATA DIFF ROWS | UPCOUNT | DOWNCOUNT |\n"+ + "+---------------+---------+--------------------+----------------+---------+-----------+\n") + require.Contains(t, str, + "| `atest`.`tbl` | succeed | true | +100/-200 | 500 | 600 |\n") + require.Contains(t, str, + "| `xtest`.`tbl` | succeed | false | +100/-200 | 600 | 700 |\n") + require.Contains(t, str, + "| `xtest`.`tb1` | skipped | false | +0/-200 | 0 | 200 |\n") + require.Contains(t, str, + "| `xtest`.`tb2` | skipped | false | +100/-0 | 100 | 0 |\n") + + file.Close() + err = os.Remove(filename) + require.NoError(t, err) +} diff --git a/sync_diff_inspector/source/chunks_iter.go b/sync_diff_inspector/source/chunks_iter.go new file mode 100644 index 00000000000..44b051fcb72 --- /dev/null +++ b/sync_diff_inspector/source/chunks_iter.go @@ -0,0 +1,189 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package source + +import ( + "context" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/progress" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/splitter" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" +) + +// ChunksIterator is used for single mysql/tidb source. +type ChunksIterator struct { + ID *chunk.ChunkID + tableAnalyzer TableAnalyzer + + TableDiffs []*common.TableDiff + nextTableIndex int + chunksCh chan *splitter.RangeInfo + errCh chan error + splitThreadCount int + + cancel context.CancelFunc +} + +func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs []*common.TableDiff, startRange *splitter.RangeInfo, splitThreadCount int) (*ChunksIterator, error) { + ctxx, cancel := context.WithCancel(ctx) + iter := &ChunksIterator{ + splitThreadCount: splitThreadCount, + tableAnalyzer: analyzer, + TableDiffs: tableDiffs, + + // reserve 30 capacity for each goroutine on average + chunksCh: make(chan *splitter.RangeInfo, 30*splitThreadCount), + errCh: make(chan error, len(tableDiffs)), + cancel: cancel, + } + go iter.produceChunks(ctxx, startRange) + return iter, nil +} + +func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter.RangeInfo) { + defer close(t.chunksCh) + pool := utils.NewWorkerPool(uint(t.splitThreadCount), "chunks producer") + t.nextTableIndex = 0 + + // If chunkRange + if startRange != nil { + curIndex := startRange.GetTableIndex() + curTable := t.TableDiffs[curIndex] + t.nextTableIndex = curIndex + 1 + // if this chunk is empty, data-check for this table should be skipped + if startRange.ChunkRange.Type != chunk.Empty { + pool.Apply(func() { + chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, curTable, startRange) + if err != nil { + t.errCh <- errors.Trace(err) + return + } + defer chunkIter.Close() + for { + c, err := chunkIter.Next() + if err != nil { + t.errCh <- errors.Trace(err) + return + } + if c == nil { + break + } + c.Index.TableIndex = curIndex + select { + case <-ctx.Done(): + log.Info("Stop do produce chunks by context done") + return + case t.chunksCh <- &splitter.RangeInfo{ + ChunkRange: c, + IndexID: getCurTableIndexID(chunkIter), + ProgressID: dbutil.TableName(curTable.Schema, curTable.Table), + }: + } + } + }) + } + } + + for ; t.nextTableIndex < len(t.TableDiffs); t.nextTableIndex++ { + curTableIndex := t.nextTableIndex + // skip data-check, but still need to send a empty chunk to make checkpoint continuous + if t.TableDiffs[curTableIndex].IgnoreDataCheck || !common.AllTableExist(t.TableDiffs[curTableIndex].TableLack) { + pool.Apply(func() { + table := t.TableDiffs[curTableIndex] + progressID := dbutil.TableName(table.Schema, table.Table) + progress.StartTable(progressID, 1, true) + select { + case <-ctx.Done(): + log.Info("Stop do produce chunks by context done") + return + case t.chunksCh <- &splitter.RangeInfo{ + ChunkRange: &chunk.Range{ + Index: &chunk.ChunkID{ + TableIndex: curTableIndex, + }, + Type: chunk.Empty, + IsFirst: true, + IsLast: true, + }, + ProgressID: progressID, + }: + } + }) + continue + } + + pool.Apply(func() { + table := t.TableDiffs[curTableIndex] + chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, table, nil) + if err != nil { + t.errCh <- errors.Trace(err) + return + } + defer chunkIter.Close() + for { + c, err := chunkIter.Next() + if err != nil { + t.errCh <- errors.Trace(err) + return + } + if c == nil { + break + } + c.Index.TableIndex = curTableIndex + select { + case <-ctx.Done(): + log.Info("Stop do produce chunks by context done") + return + case t.chunksCh <- &splitter.RangeInfo{ + ChunkRange: c, + IndexID: getCurTableIndexID(chunkIter), + ProgressID: dbutil.TableName(table.Schema, table.Table), + }: + } + } + }) + } + pool.WaitFinished() +} + +func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error) { + select { + case <-ctx.Done(): + return nil, nil + case r, ok := <-t.chunksCh: + if !ok && r == nil { + return nil, nil + } + return r, nil + case err := <-t.errCh: + return nil, errors.Trace(err) + } +} + +func (t *ChunksIterator) Close() { + t.cancel() +} + +// TODO: getCurTableIndexID only used for binary search, should be optimized later. +func getCurTableIndexID(tableIter splitter.ChunkIterator) int64 { + if bt, ok := tableIter.(*splitter.BucketIterator); ok { + return bt.GetIndexID() + } + return 0 +} diff --git a/sync_diff_inspector/source/common/common_test.go b/sync_diff_inspector/source/common/common_test.go new file mode 100644 index 00000000000..467548935bf --- /dev/null +++ b/sync_diff_inspector/source/common/common_test.go @@ -0,0 +1,64 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "container/heap" + "testing" + + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "github.com/stretchr/testify/require" +) + +func TestRowData(t *testing.T) { + createTableSQL := "create table test.test(id int(24), name varchar(24), age int(24), primary key(id, name));" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + _, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo) + require.Equal(t, utils.NeedQuotes(orderKeyCols[1].FieldType.GetType()), true) + ids := []string{"3", "2", "2", "2", "4", "1", "NULL"} + names := []string{"d", "NULL", "c", "g", "b", "a", "e"} + ages := []string{"1", "2", "3", "3", "NULL", "5", "4"} + + expectIDs := []string{"NULL", "1", "2", "2", "2", "3", "4"} + expectNames := []string{"e", "a", "NULL", "c", "g", "d", "b"} + + rowDatas := &RowDatas{ + Rows: make([]RowData, 0, len(ids)), + OrderKeyCols: orderKeyCols, + } + + heap.Init(rowDatas) + for i, id := range ids { + data := map[string]*dbutil.ColumnData{ + "id": {Data: []byte(id), IsNull: (id == "NULL")}, + "name": {Data: []byte(names[i]), IsNull: (names[i] == "NULL")}, + "age": {Data: []byte(ages[i]), IsNull: (ages[i] == "NULL")}, + } + heap.Push(rowDatas, RowData{ + Data: data, + }) + } + + for i := 0; i < len(ids); i++ { + rowData := heap.Pop(rowDatas).(RowData) + id := string(rowData.Data["id"].Data) + name := string(rowData.Data["name"].Data) + require.Equal(t, id, expectIDs[i]) + require.Equal(t, name, expectNames[i]) + } +} diff --git a/sync_diff_inspector/source/common/conn.go b/sync_diff_inspector/source/common/conn.go new file mode 100755 index 00000000000..286d1f5485e --- /dev/null +++ b/sync_diff_inspector/source/common/conn.go @@ -0,0 +1,74 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "database/sql" + "encoding/base64" + + "github.com/go-sql-driver/mysql" + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + tmysql "github.com/pingcap/tidb/pkg/parser/mysql" +) + +func tryConnectMySQL(cfg *mysql.Config) (*sql.DB, error) { + failpoint.Inject("MustMySQLPassword", func(val failpoint.Value) { + pwd := val.(string) + if cfg.Passwd != pwd { + failpoint.Return(nil, &mysql.MySQLError{Number: tmysql.ErrAccessDenied, Message: "access denied"}) + } + failpoint.Return(nil, nil) + }) + c, err := mysql.NewConnector(cfg) + if err != nil { + return nil, errors.Trace(err) + } + db := sql.OpenDB(c) + if err = db.Ping(); err != nil { + _ = db.Close() + return nil, errors.Trace(err) + } + return db, nil +} + +// ConnectMySQL creates sql.DB used for select data +func ConnectMySQL(cfg *mysql.Config, num int) (db *sql.DB, err error) { + defer func() { + if err == nil && db != nil { + // SetMaxOpenConns and SetMaxIdleConns for connection to avoid error like + // `dial tcp 10.26.2.1:3306: connect: cannot assign requested address` + db.SetMaxOpenConns(num) + db.SetMaxIdleConns(num) + } + }() + // Try plain password first. + db, firstErr := tryConnectMySQL(cfg) + if firstErr == nil { + return db, nil + } + // If access is denied and password is encoded by base64, try the decoded string as well. + if mysqlErr, ok := errors.Cause(firstErr).(*mysql.MySQLError); ok && mysqlErr.Number == tmysql.ErrAccessDenied { + // If password is encoded by base64, try the decoded string as well. + if password, decodeErr := base64.StdEncoding.DecodeString(cfg.Passwd); decodeErr == nil && string(password) != cfg.Passwd { + cfg.Passwd = string(password) + db2, err := tryConnectMySQL(cfg) + if err == nil { + return db2, nil + } + } + } + // If we can't connect successfully, return the first error. + return nil, errors.Trace(firstErr) +} diff --git a/sync_diff_inspector/source/common/conn_test.go b/sync_diff_inspector/source/common/conn_test.go new file mode 100644 index 00000000000..7509854152a --- /dev/null +++ b/sync_diff_inspector/source/common/conn_test.go @@ -0,0 +1,48 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "encoding/base64" + "fmt" + "testing" + + "github.com/pingcap/failpoint" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "github.com/stretchr/testify/require" +) + +func TestConnect(t *testing.T) { + plainPsw := "dQAUoDiyb1ucWZk7" + + require.NoError(t, failpoint.Enable( + "github.com/pingcap/tiflow/sync_diff_inspector/source/common/MustMySQLPassword", + fmt.Sprintf("return(\"%s\")", plainPsw))) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tiflow/sync_diff_inspector/source/common/MustMySQLPassword")) + }() + + dataSource := &config.DataSource{ + Host: "127.0.0.1", + Port: 4000, + User: "root", + Password: utils.SecretString(plainPsw), + } + _, err := ConnectMySQL(dataSource.ToDriverConfig(), 2) + require.NoError(t, err) + dataSource.Password = utils.SecretString(base64.StdEncoding.EncodeToString([]byte(plainPsw))) + _, err = ConnectMySQL(dataSource.ToDriverConfig(), 2) + require.NoError(t, err) +} diff --git a/sync_diff_inspector/source/common/rows.go b/sync_diff_inspector/source/common/rows.go new file mode 100644 index 00000000000..a97204881f2 --- /dev/null +++ b/sync_diff_inspector/source/common/rows.go @@ -0,0 +1,101 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "strconv" + + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +type RowData struct { + Data map[string]*dbutil.ColumnData + Source int +} + +// RowDatas is a heap of MergeItems. +type RowDatas struct { + Rows []RowData + OrderKeyCols []*model.ColumnInfo +} + +func (r RowDatas) Len() int { return len(r.Rows) } +func (r RowDatas) Less(i, j int) bool { + for _, col := range r.OrderKeyCols { + col1, ok := r.Rows[i].Data[col.Name.O] + if !ok { + log.Fatal("data don't have column", zap.String("column", col.Name.O), zap.Reflect("data", r.Rows[i].Data)) + } + col2, ok := r.Rows[j].Data[col.Name.O] + if !ok { + log.Fatal("data don't have column", zap.String("column", col.Name.O), zap.Reflect("data", r.Rows[j].Data)) + } + + switch { + case col1.IsNull && col2.IsNull: + continue + case col1.IsNull: + return true + case col2.IsNull: + return false + } + + strData1 := string(col1.Data) + strData2 := string(col2.Data) + + if utils.NeedQuotes(col.FieldType.GetType()) { + if strData1 == strData2 { + continue + } + return strData1 < strData2 + } + + num1, err1 := strconv.ParseFloat(strData1, 64) + if err1 != nil { + log.Fatal("convert string to float failed", zap.String("column", col.Name.O), zap.String("data", strData1), zap.Error(err1)) + } + num2, err2 := strconv.ParseFloat(strData2, 64) + if err2 != nil { + log.Fatal("convert string to float failed", zap.String("column", col.Name.O), zap.String("data", strData2), zap.Error(err2)) + } + + if num1 == num2 { + continue + } + return num1 < num2 + + } + + return false +} +func (r RowDatas) Swap(i, j int) { r.Rows[i], r.Rows[j] = r.Rows[j], r.Rows[i] } + +// Push implements heap.Interface's Push function +func (r *RowDatas) Push(x interface{}) { + r.Rows = append(r.Rows, x.(RowData)) +} + +// Pop implements heap.Interface's Pop function +func (r *RowDatas) Pop() (x interface{}) { + if len(r.Rows) == 0 { + return nil + } + + r.Rows, x = r.Rows[:len(r.Rows)-1], r.Rows[len(r.Rows)-1] + return +} diff --git a/sync_diff_inspector/source/common/table_diff.go b/sync_diff_inspector/source/common/table_diff.go new file mode 100644 index 00000000000..1d8befb7a11 --- /dev/null +++ b/sync_diff_inspector/source/common/table_diff.go @@ -0,0 +1,83 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "database/sql" + + "github.com/pingcap/tidb/pkg/parser/model" +) + +// TableShardSource represents the origin schema and table and DB connection before router. +// It used for MySQL Shard source. +type TableShardSource struct { + TableSource + // DBConn represents the origin DB connection for this TableSource. + // This TableSource may exists in different MySQL shard. + DBConn *sql.DB +} + +// TableSource represents the origin schema and table before router. +// It used for TiDB/MySQL source. +type TableSource struct { + OriginSchema string + OriginTable string +} + +// TableDiff saves config for diff table +type TableDiff struct { + // Schema represents the database name. + Schema string `json:"schema"` + + // Table represents the table name. + Table string `json:"table"` + + // Info is the parser.TableInfo, include some meta infos for this table. + // It used for TiDB/MySQL/MySQL Shard sources. + Info *model.TableInfo `json:"info"` + + // columns be ignored + IgnoreColumns []string `json:"-"` + + // field should be the primary key, unique key or field with index + Fields string `json:"fields"` + + // select range, for example: "age > 10 AND age < 20" + Range string `json:"range"` + + // ignore check table's data + IgnoreDataCheck bool `json:"-"` + + // the table has column timestamp, which need to reset time_zone. + NeedUnifiedTimeZone bool `json:"-"` + + Collation string `json:"collation"` + + ChunkSize int64 `json:"chunk-size"` + + // TableLack = 1: the table only exists downstream, + // TableLack = -1: the table only exists upstream, + // TableLack = 0: the table exists both upstream and downstream. + TableLack int `json:"-"` +} + +const ( + AllTableExistFlag = 0 + DownstreamTableLackFlag = -1 + UpstreamTableLackFlag = 1 +) + +func AllTableExist(tableLack int) bool { + return tableLack == AllTableExistFlag +} diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go new file mode 100644 index 00000000000..2a43f48081f --- /dev/null +++ b/sync_diff_inspector/source/mysql_shard.go @@ -0,0 +1,390 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package source + +import ( + "container/heap" + "context" + "database/sql" + "fmt" + "time" + + tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/filter" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/splitter" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +type MySQLTableAnalyzer struct { + sourceTableMap map[string][]*common.TableShardSource +} + +func (a *MySQLTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) { + matchedSources := getMatchedSourcesForTable(a.sourceTableMap, table) + + // It's useful we are not able to pick shard merge source as workSource to generate ChunksIterator. + if len(matchedSources) > 1 { + log.Fatal("unreachable, shard merge table cannot generate splitter for now.") + } + // Shallow Copy + originTable := *table + originTable.Schema = matchedSources[0].OriginSchema + originTable.Table = matchedSources[0].OriginTable + progressID := dbutil.TableName(table.Schema, table.Table) + // use random splitter if we cannot use bucket splitter, then we can simply choose target table to generate chunks. + randIter, err := splitter.NewRandomIteratorWithCheckpoint(ctx, progressID, &originTable, matchedSources[0].DBConn, startRange) + if err != nil { + return nil, errors.Trace(err) + } + return randIter, nil +} + +type MySQLSources struct { + tableDiffs []*common.TableDiff + + sourceTablesMap map[string][]*common.TableShardSource +} + +func getMatchedSourcesForTable(sourceTablesMap map[string][]*common.TableShardSource, table *common.TableDiff) []*common.TableShardSource { + if sourceTablesMap == nil { + log.Fatal("unreachable, source tables map shouldn't be nil.") + } + matchSources, ok := sourceTablesMap[utils.UniqueID(table.Schema, table.Table)] + if !ok && common.AllTableExist(table.TableLack) { + log.Fatal("unreachable, no match source tables in mysql shard source.") + } + return matchSources +} + +func (s *MySQLSources) GetTableAnalyzer() TableAnalyzer { + return &MySQLTableAnalyzer{ + s.sourceTablesMap, + } +} + +func (s *MySQLSources) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) { + return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount) +} + +func (s *MySQLSources) Close() { + for _, t := range s.sourceTablesMap { + for _, db := range t { + db.DBConn.Close() + } + } +} + +func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { + beginTime := time.Now() + table := s.tableDiffs[tableRange.GetTableIndex()] + chunk := tableRange.GetChunk() + + matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table) + infoCh := make(chan *ChecksumInfo, len(s.sourceTablesMap)) + + for _, ms := range matchSources { + go func(ms *common.TableShardSource) { + count, checksum, err := utils.GetCountAndMd5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, chunk.Args) + infoCh <- &ChecksumInfo{ + Checksum: checksum, + Count: count, + Err: err, + } + }(ms) + } + defer close(infoCh) + + var ( + err error + totalCount int64 + totalChecksum uint64 + ) + + for range matchSources { + info := <-infoCh + // catch the first error + if err == nil && info.Err != nil { + err = info.Err + } + totalCount += info.Count + totalChecksum ^= info.Checksum + } + + cost := time.Since(beginTime) + return &ChecksumInfo{ + Checksum: totalChecksum, + Count: totalCount, + Err: err, + Cost: cost, + } +} + +func (s *MySQLSources) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 { + table := s.tableDiffs[tableRange.GetTableIndex()] + var totalCount int64 + + matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table) + if matchSources != nil { + for _, ms := range matchSources { + count, _ := dbutil.GetRowCount(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, "", nil) + totalCount += count + } + } + return totalCount +} + +func (s *MySQLSources) GetTables() []*common.TableDiff { + return s.tableDiffs +} + +func (s *MySQLSources) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string { + switch t { + case Insert: + return utils.GenerateReplaceDML(upstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) + case Delete: + return utils.GenerateDeleteDML(downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) + case Replace: + return utils.GenerateReplaceDMLWithAnnotation(upstreamData, downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) + default: + log.Fatal("Don't support this type", zap.Any("dml type", t)) + } + return "" +} + +func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) { + chunk := tableRange.GetChunk() + + sourceRows := make(map[int]*sql.Rows) + + table := s.tableDiffs[tableRange.GetTableIndex()] + // for tables that do not exist upstream or downstream + if !common.AllTableExist(table.TableLack) { + return nil, nil + } + matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table) + + var rowsQuery string + var orderKeyCols []*model.ColumnInfo + for i, ms := range matchSources { + rowsQuery, orderKeyCols = utils.GetTableRowsQueryFormat(ms.OriginSchema, ms.OriginTable, table.Info, table.Collation) + query := fmt.Sprintf(rowsQuery, chunk.Where) + rows, err := ms.DBConn.QueryContext(ctx, query, chunk.Args...) + if err != nil { + return nil, errors.Trace(err) + } + sourceRows[i] = rows + } + + sourceRowDatas := &common.RowDatas{ + Rows: make([]common.RowData, 0, len(sourceRows)), + OrderKeyCols: orderKeyCols, + } + heap.Init(sourceRowDatas) + // first push one row from all the sources into heap + for source, sourceRow := range sourceRows { + rowData, err := getRowData(sourceRow) + if err != nil { + return nil, errors.Trace(err) + } + if rowData != nil { + heap.Push(sourceRowDatas, common.RowData{ + Data: rowData, + Source: source, + }) + } else { + if sourceRow.Err() != nil { + return nil, sourceRow.Err() + } + } + } + + return &MultiSourceRowsIterator{ + sourceRows: sourceRows, + sourceRowDatas: sourceRowDatas, + }, nil +} + +func (s *MySQLSources) GetDB() *sql.DB { + // return any of them is ok + for _, st := range s.sourceTablesMap { + for _, db := range st { + return db.DBConn + } + } + log.Warn("the source has no DB connection.") + return nil +} + +func (s *MySQLSources) GetSnapshot() string { + log.Fatal("unreachable!, mysql doesn't have the snapshot") + return "" +} + +func (s *MySQLSources) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) { + tableDiff := s.GetTables()[tableIndex] + // for tables that do not exist upstream or downstream + if !common.AllTableExist(tableDiff.TableLack) { + return nil, nil + } + tableSources := getMatchedSourcesForTable(s.sourceTablesMap, tableDiff) + sourceTableInfos := make([]*model.TableInfo, len(tableSources)) + for i, tableSource := range tableSources { + sourceSchema, sourceTable := tableSource.OriginSchema, tableSource.OriginTable + sourceTableInfo, err := utils.GetTableInfo(ctx, tableSource.DBConn, sourceSchema, sourceTable) + if err != nil { + return nil, errors.Trace(err) + } + sourceTableInfo, _ = utils.ResetColumns(sourceTableInfo, tableDiff.IgnoreColumns) + sourceTableInfos[i] = sourceTableInfo + } + return sourceTableInfos, nil +} + +type MultiSourceRowsIterator struct { + sourceRows map[int]*sql.Rows + sourceRowDatas *common.RowDatas +} + +func getRowData(rows *sql.Rows) (rowData map[string]*dbutil.ColumnData, err error) { + for rows.Next() { + rowData, err = dbutil.ScanRow(rows) + return + } + return +} + +func (ms *MultiSourceRowsIterator) Next() (map[string]*dbutil.ColumnData, error) { + // Before running getSourceRow, heap save one row from all the sources, + // otherwise this source has read to the end. Each row should be the smallest in each source. + // Once there is one row popped, we need to immediately push one row, which is from the same source, into the heap. + // all the sources had read to the end, no data to return + if len(ms.sourceRowDatas.Rows) == 0 { + return nil, nil + } + rowData := heap.Pop(ms.sourceRowDatas).(common.RowData) + newRowData, err := getRowData(ms.sourceRows[rowData.Source]) + if err != nil { + return nil, err + } + if newRowData != nil { + heap.Push(ms.sourceRowDatas, common.RowData{ + Data: newRowData, + Source: rowData.Source, + }) + } else { + if ms.sourceRows[rowData.Source].Err() != nil { + return nil, ms.sourceRows[rowData.Source].Err() + } + } + return rowData.Data, nil +} + +func (ms *MultiSourceRowsIterator) Close() { + for _, s := range ms.sourceRows { + s.Close() + } +} + +func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []*config.DataSource, threadCount int, f tableFilter.Filter, skipNonExistingTable bool) (Source, error) { + sourceTablesMap := make(map[string][]*common.TableShardSource) + // we should get the real table name + // and real table row query from sourceDB. + targetUniqueTableMap := make(map[string]struct{}) + for _, tableDiff := range tableDiffs { + targetUniqueTableMap[utils.UniqueID(tableDiff.Schema, tableDiff.Table)] = struct{}{} + } + + // only used for check + sourceTablesAfterRoute := make(map[string]struct{}) + + for i, sourceDB := range ds { + sourceSchemas, err := dbutil.GetSchemas(ctx, sourceDB.Conn) + if err != nil { + return nil, errors.Annotatef(err, "get schemas from %d source", i) + } + + // use this map to record max Connection for this source. + maxSourceRouteTableCount := make(map[string]int) + for _, schema := range sourceSchemas { + // Skip system schema. + if filter.IsSystemSchema(schema) { + continue + } + allTables, err := dbutil.GetTables(ctx, sourceDB.Conn, schema) + if err != nil { + return nil, errors.Annotatef(err, "get tables from %d source %s", i, schema) + } + for _, table := range allTables { + targetSchema, targetTable := schema, table + if sourceDB.Router != nil { + targetSchema, targetTable, err = sourceDB.Router.Route(schema, table) + if err != nil { + return nil, errors.Errorf("get route result for %d source %s.%s failed, error %v", i, schema, table, err) + } + } + uniqueId := utils.UniqueID(targetSchema, targetTable) + isMatched := f.MatchTable(targetSchema, targetTable) + if isMatched { + // if match the filter, we should respect it and check target has this table later. + sourceTablesAfterRoute[uniqueId] = struct{}{} + } + if _, ok := targetUniqueTableMap[uniqueId]; !ok && !(isMatched && skipNonExistingTable) { + continue + } + maxSourceRouteTableCount[uniqueId]++ + if _, ok := sourceTablesMap[uniqueId]; !ok { + sourceTablesMap[uniqueId] = make([]*common.TableShardSource, 0) + } + sourceTablesMap[uniqueId] = append(sourceTablesMap[uniqueId], &common.TableShardSource{ + TableSource: common.TableSource{ + OriginSchema: schema, + OriginTable: table, + }, + DBConn: sourceDB.Conn, + }) + } + } + maxConn := 0 + for _, c := range maxSourceRouteTableCount { + if c > maxConn { + maxConn = c + } + } + log.Info("will increase connection configurations for DB of instance", + zap.Int("connection limit", maxConn*threadCount+1)) + // Set this conn to max + sourceDB.Conn.SetMaxOpenConns(maxConn*threadCount + 1) + sourceDB.Conn.SetMaxIdleConns(maxConn*threadCount + 1) + + } + + tableDiffs, err := checkTableMatched(tableDiffs, targetUniqueTableMap, sourceTablesAfterRoute, skipNonExistingTable) + if err != nil { + return nil, errors.Annotatef(err, "please make sure the filter is correct.") + } + + mss := &MySQLSources{ + tableDiffs: tableDiffs, + sourceTablesMap: sourceTablesMap, + } + return mss, nil +} diff --git a/sync_diff_inspector/source/source.go b/sync_diff_inspector/source/source.go new file mode 100644 index 00000000000..5e615488886 --- /dev/null +++ b/sync_diff_inspector/source/source.go @@ -0,0 +1,429 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package source + +import ( + "context" + "database/sql" + "sort" + "strings" + "time" + + "github.com/go-sql-driver/mysql" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/filter" + tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" + router "github.com/pingcap/tidb/pkg/util/table-router" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/splitter" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +type DMLType int32 + +const ( + Insert DMLType = iota + 1 + Delete + Replace +) + +const ( + ShieldDBName = "_no__exists__db_" + ShieldTableName = "_no__exists__table_" + GetSyncPointQuery = "SELECT primary_ts, secondary_ts FROM tidb_cdc.syncpoint_v1 ORDER BY primary_ts DESC LIMIT 1" +) + +type ChecksumInfo struct { + Checksum uint64 + Count int64 + Err error + Cost time.Duration +} + +// RowDataIterator represents the row data in source. +type RowDataIterator interface { + // Next seeks the next row data, it used when compared rows. + Next() (map[string]*dbutil.ColumnData, error) + // Close release the resource. + Close() +} + +// TableAnalyzer represents the method in different source. +// each source has its own analyze function. +type TableAnalyzer interface { + // AnalyzeSplitter picks the proper splitter.ChunkIterator according to table and source. + AnalyzeSplitter(context.Context, *common.TableDiff, *splitter.RangeInfo) (splitter.ChunkIterator, error) +} + +type Source interface { + // GetTableAnalyzer pick the proper analyzer for different source. + // the implement of this function is different in mysql/tidb. + GetTableAnalyzer() TableAnalyzer + + // GetRangeIterator generates the range iterator with the checkpoint(*splitter.RangeInfo) and analyzer. + // this is the mainly iterator across the whole sync diff. + // One source has one range iterator to produce the range to channel. + // there are many workers consume the range from the channel to compare. + GetRangeIterator(context.Context, *splitter.RangeInfo, TableAnalyzer, int) (RangeIterator, error) + + // GetCountAndMd5 gets the md5 result and the count from given range. + GetCountAndMd5(context.Context, *splitter.RangeInfo) *ChecksumInfo + + // GetCountForLackTable gets the count for tables that don't exist upstream or downstream. + GetCountForLackTable(context.Context, *splitter.RangeInfo) int64 + + // GetRowsIterator gets the row data iterator from given range. + GetRowsIterator(context.Context, *splitter.RangeInfo) (RowDataIterator, error) + + // GenerateFixSQL generates the fix sql with given type. + GenerateFixSQL(DMLType, map[string]*dbutil.ColumnData, map[string]*dbutil.ColumnData, int) string + + // GetTables represents the tableDiffs. + GetTables() []*common.TableDiff + + // GetSourceStructInfo get the source table info from a given target table + GetSourceStructInfo(context.Context, int) ([]*model.TableInfo, error) + + // GetDB represents the db connection. + GetDB() *sql.DB + + // GetSnapshot represents the snapshot of source. + // only TiDB source has the snapshot. + // TODO refine the interface. + GetSnapshot() string + + // Close ... + Close() +} + +func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, upstream Source, err error) { + // init db connection for upstream / downstream. + err = initDBConn(ctx, cfg) + if err != nil { + return nil, nil, errors.Trace(err) + } + tablesToBeCheck, err := initTables(ctx, cfg) + if err != nil { + return nil, nil, errors.Trace(err) + } + + tableDiffs := make([]*common.TableDiff, 0, len(tablesToBeCheck)) + for _, tableConfig := range tablesToBeCheck { + newInfo, needUnifiedTimeZone := utils.ResetColumns(tableConfig.TargetTableInfo, tableConfig.IgnoreColumns) + tableDiffs = append(tableDiffs, &common.TableDiff{ + Schema: tableConfig.Schema, + Table: tableConfig.Table, + Info: newInfo, + // TODO: field `IgnoreColumns` can be deleted. + IgnoreColumns: tableConfig.IgnoreColumns, + Fields: strings.Join(tableConfig.Fields, ","), + Range: tableConfig.Range, + NeedUnifiedTimeZone: needUnifiedTimeZone, + Collation: tableConfig.Collation, + ChunkSize: tableConfig.ChunkSize, + }) + + // When the router set case-sensitive false, + // that add rule match itself will make table case unsensitive. + for _, d := range cfg.Task.SourceInstances { + if _, ok := d.RouteTargetSet[dbutil.TableName(tableConfig.Schema, tableConfig.Table)]; ok { + // There is a user rule routing to `tableConfig.Schema`.`tableConfig.Table` + rules := d.Router.Match(tableConfig.Schema, tableConfig.Table) + + if len(rules) == 0 { + // There is no self match in these user rules. + // Need to shield the table for this source. + if d.Router.AddRule(&router.TableRule{ + SchemaPattern: tableConfig.Schema, + TablePattern: tableConfig.Table, + TargetSchema: ShieldDBName, + TargetTable: ShieldTableName, + }) != nil { + return nil, nil, errors.Errorf("add shield rule failed [schema = %s] [table = %s]", tableConfig.Schema, tableConfig.Table) + } + } + } else if _, ok := d.RouteTargetSet[dbutil.TableName(tableConfig.Schema, "")]; ok { + // There is a user rule routing to `tableConfig.Schema` + rules := d.Router.Match(tableConfig.Schema, tableConfig.Table) + + if len(rules) == 0 { + // There is no self match in these user rules. + // Need to shield the table for this source. + if d.Router.AddRule(&router.TableRule{ + SchemaPattern: tableConfig.Schema, + TablePattern: tableConfig.Table, + TargetSchema: ShieldDBName, + TargetTable: ShieldTableName, + }) != nil { + return nil, nil, errors.Errorf("add shield rule failed [schema = %s] [table = %s]", tableConfig.Schema, tableConfig.Table) + } + } + } else { + // Add the default rule to match upper/lower case + if d.Router.AddRule(&router.TableRule{ + SchemaPattern: tableConfig.Schema, + TablePattern: tableConfig.Table, + TargetSchema: tableConfig.Schema, + TargetTable: tableConfig.Table, + }) != nil { + return nil, nil, errors.Errorf("add rule failed [schema = %s] [table = %s]", tableConfig.Schema, tableConfig.Table) + } + } + } + } + + // Sort TableDiff is important! + // because we compare table one by one. + sort.Slice(tableDiffs, func(i, j int) bool { + ti := utils.UniqueID(tableDiffs[i].Schema, tableDiffs[i].Table) + tj := utils.UniqueID(tableDiffs[j].Schema, tableDiffs[j].Table) + return strings.Compare(ti, tj) > 0 + }) + + // If `bucket size` is much larger than `chunk size`, + // we need to split the bucket into some chunks, which wastes much time. + // So we use WorkPool to split buckets in parallel. + // Besides, bucketSpliters of each table use shared WorkPool + bucketSpliterPool := utils.NewWorkerPool(uint(cfg.CheckThreadCount), "bucketIter") + // for mysql_shard, it needs `cfg.CheckThreadCount` + `cfg.SplitThreadCount` at most, because it cannot use bucket. + mysqlConnCount := cfg.CheckThreadCount + cfg.SplitThreadCount + upstream, err = buildSourceFromCfg(ctx, tableDiffs, mysqlConnCount, bucketSpliterPool, cfg.SkipNonExistingTable, cfg.Task.TargetCheckTables, cfg.Task.SourceInstances...) + if err != nil { + return nil, nil, errors.Annotate(err, "from upstream") + } + if len(upstream.GetTables()) == 0 { + return nil, nil, errors.Errorf("no table need to be compared") + } + downstream, err = buildSourceFromCfg(ctx, upstream.GetTables(), mysqlConnCount, bucketSpliterPool, cfg.SkipNonExistingTable, cfg.Task.TargetCheckTables, cfg.Task.TargetInstance) + if err != nil { + return nil, nil, errors.Annotate(err, "from downstream") + } + return downstream, upstream, nil +} + +func buildSourceFromCfg( + ctx context.Context, + tableDiffs []*common.TableDiff, connCount int, + bucketSpliterPool *utils.WorkerPool, + skipNonExistingTable bool, + f tableFilter.Filter, dbs ...*config.DataSource, +) (Source, error) { + if len(dbs) < 1 { + return nil, errors.Errorf("no db config detected") + } + ok, err := dbutil.IsTiDB(ctx, dbs[0].Conn) + if err != nil { + return nil, errors.Annotatef(err, "connect to db failed") + } + + if ok { + if len(dbs) == 1 { + return NewTiDBSource(ctx, tableDiffs, dbs[0], bucketSpliterPool, f, skipNonExistingTable) + } else { + log.Fatal("Don't support check table in multiple tidb instance, please specify one tidb instance.") + } + } + return NewMySQLSources(ctx, tableDiffs, dbs, connCount, f, skipNonExistingTable) +} + +func getAutoSnapshotPosition(cfg *mysql.Config) (string, string, error) { + tmpConn, err := common.ConnectMySQL(cfg, 2) + if err != nil { + return "", "", errors.Annotatef(err, "connecting to auto-position tidb_snapshot failed") + } + defer tmpConn.Close() + var primaryTs, secondaryTs string + err = tmpConn.QueryRow(GetSyncPointQuery).Scan(&primaryTs, &secondaryTs) + if err != nil { + return "", "", errors.Annotatef(err, "fetching auto-position tidb_snapshot failed") + } + return primaryTs, secondaryTs, nil +} + +func initDBConn(ctx context.Context, cfg *config.Config) error { + // Fill in tidb_snapshot if it is set to AUTO + // This is only supported when set to auto on both target/source. + if cfg.Task.TargetInstance.IsAutoSnapshot() { + if len(cfg.Task.SourceInstances) > 1 { + return errors.Errorf("'auto' snapshot only supports one tidb source") + } + if !cfg.Task.SourceInstances[0].IsAutoSnapshot() { + return errors.Errorf("'auto' snapshot should be set on both target and source") + } + primaryTs, secondaryTs, err := getAutoSnapshotPosition(cfg.Task.TargetInstance.ToDriverConfig()) + if err != nil { + return err + } + cfg.Task.TargetInstance.SetSnapshot(secondaryTs) + cfg.Task.SourceInstances[0].SetSnapshot(primaryTs) + } + // we had `cfg.SplitThreadCount` producers and `cfg.CheckThreadCount` consumer to use db connections maybe and `cfg.CheckThreadCount` splitter to split buckets. + // so the connection count need to be cfg.SplitThreadCount + cfg.CheckThreadCount + cfg.CheckThreadCount. + targetConn, err := common.ConnectMySQL(cfg.Task.TargetInstance.ToDriverConfig(), cfg.SplitThreadCount+2*cfg.CheckThreadCount) + if err != nil { + return errors.Trace(err) + } + + cfg.Task.TargetInstance.Conn = targetConn + + for _, source := range cfg.Task.SourceInstances { + // If it is still set to AUTO it means it was not set on the target. + // We require it to be set to AUTO on both. + if source.IsAutoSnapshot() { + return errors.Errorf("'auto' snapshot should be set on both target and source") + } + // connect source db with target db time_zone + conn, err := common.ConnectMySQL(source.ToDriverConfig(), cfg.SplitThreadCount+2*cfg.CheckThreadCount) + if err != nil { + return errors.Trace(err) + } + source.Conn = conn + } + return nil +} + +func initTables(ctx context.Context, cfg *config.Config) (cfgTables []*config.TableConfig, err error) { + downStreamConn := cfg.Task.TargetInstance.Conn + TargetTablesList := make([]*common.TableSource, 0) + targetSchemas, err := dbutil.GetSchemas(ctx, downStreamConn) + if err != nil { + return nil, errors.Annotatef(err, "get schemas from target source") + } + + for _, schema := range targetSchemas { + if filter.IsSystemSchema(schema) { + continue + } + allTables, err := dbutil.GetTables(ctx, downStreamConn, schema) + if err != nil { + return nil, errors.Annotatef(err, "get tables from target source %s", schema) + } + for _, t := range allTables { + TargetTablesList = append(TargetTablesList, &common.TableSource{ + OriginSchema: schema, + OriginTable: t, + }) + } + } + + // fill the table information. + // will add default source information, don't worry, we will use table config's info replace this later. + // cfg.Tables.Schema => cfg.Tables.Tables => target/source Schema.Table + cfgTables = make([]*config.TableConfig, 0, len(TargetTablesList)) + version := utils.TryToGetVersion(ctx, downStreamConn) + for _, tables := range TargetTablesList { + if cfg.Task.TargetCheckTables.MatchTable(tables.OriginSchema, tables.OriginTable) { + log.Debug("match target table", zap.String("table", dbutil.TableName(tables.OriginSchema, tables.OriginTable))) + + tableInfo, err := utils.GetTableInfoWithVersion(ctx, downStreamConn, tables.OriginSchema, tables.OriginTable, version) + if err != nil { + return nil, errors.Errorf("get table %s.%s's information error %s", tables.OriginSchema, tables.OriginTable, errors.ErrorStack(err)) + } + // Initialize all the tables that matches the `target-check-tables`[config.toml] and appears in downstream. + cfgTables = append(cfgTables, &config.TableConfig{ + Schema: tables.OriginSchema, + Table: tables.OriginTable, + TargetTableInfo: tableInfo, + Range: "TRUE", + }) + } + } + + // Reset fields of some tables of `cfgTables` according to `table-configs`[config.toml]. + // The table in `table-configs`[config.toml] should exist in both `target-check-tables`[config.toml] and tables from downstream. + for i, table := range cfg.Task.TargetTableConfigs { + // parse every config to find target table. + cfgFilter, err := tableFilter.Parse(table.TargetTables) + if err != nil { + return nil, errors.Errorf("unable to parse target table for the %dth config", i) + } + // iterate all target tables to make sure + // 1. one table only match at most one config. + // 2. config can miss table. + for _, cfgTable := range cfgTables { + if cfgFilter.MatchTable(cfgTable.Schema, cfgTable.Table) { + if cfgTable.HasMatched { + return nil, errors.Errorf("different config matched to same target table %s.%s", cfgTable.Schema, cfgTable.Table) + } + if table.Range != "" { + cfgTable.Range = table.Range + } + cfgTable.IgnoreColumns = table.IgnoreColumns + cfgTable.Fields = table.Fields + cfgTable.Collation = table.Collation + cfgTable.ChunkSize = table.ChunkSize + cfgTable.HasMatched = true + } + } + } + return cfgTables, nil +} + +// RangeIterator generate next chunk for the whole tables lazily. +type RangeIterator interface { + // Next seeks the next chunk, return nil if seeks to end. + Next(context.Context) (*splitter.RangeInfo, error) + + Close() +} + +func checkTableMatched(tableDiffs []*common.TableDiff, targetMap map[string]struct{}, sourceMap map[string]struct{}, skipNonExistingTable bool) ([]*common.TableDiff, error) { + tableIndexMap := getIndexMapForTable(tableDiffs) + // check target exists but source not found + for tableDiff := range targetMap { + // target table have all passed in tableFilter + if _, ok := sourceMap[tableDiff]; !ok { + if !skipNonExistingTable { + return tableDiffs, errors.Errorf("the source has no table to be compared. target-table is `%s`", tableDiff) + } + index := tableIndexMap[tableDiff] + if tableDiffs[index].TableLack == 0 { + tableDiffs[index].TableLack = common.UpstreamTableLackFlag + log.Info("the source has no table to be compared", zap.String("target-table", tableDiff)) + } + } + } + // check source exists but target not found + for tableDiff := range sourceMap { + // need check source table have passd in tableFilter here + if _, ok := targetMap[tableDiff]; !ok { + if !skipNonExistingTable { + return tableDiffs, errors.Errorf("the target has no table to be compared. source-table is `%s`", tableDiff) + } + slice := strings.Split(strings.Replace(tableDiff, "`", "", -1), ".") + tableDiffs = append(tableDiffs, &common.TableDiff{ + Schema: slice[0], + Table: slice[1], + TableLack: common.DownstreamTableLackFlag, + }) + log.Info("the target has no table to be compared", zap.String("source-table", tableDiff)) + } + } + log.Info("table match check finished") + return tableDiffs, nil +} + +func getIndexMapForTable(tableDiffs []*common.TableDiff) map[string]int { + tableIndexMap := make(map[string]int) + for i := 0; i < len(tableDiffs); i++ { + tableUniqueID := utils.UniqueID(tableDiffs[i].Schema, tableDiffs[i].Table) + tableIndexMap[tableUniqueID] = i + } + return tableIndexMap +} diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go new file mode 100644 index 00000000000..bb3ea1b02ab --- /dev/null +++ b/sync_diff_inspector/source/source_test.go @@ -0,0 +1,955 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package source + +import ( + "context" + "database/sql" + "database/sql/driver" + "fmt" + "os" + "regexp" + "strconv" + "testing" + "time" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/util/dbutil" + filter "github.com/pingcap/tidb/pkg/util/table-filter" + router "github.com/pingcap/tidb/pkg/util/table-router" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/splitter" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "github.com/stretchr/testify/require" + + _ "github.com/go-sql-driver/mysql" +) + +type tableCaseType struct { + schema string + table string + createTableSQL string + rangeColumns []string + rangeLeft []string + rangeRight []string + rangeInfo *splitter.RangeInfo + rowQuery string + rowColumns []string + rows [][]driver.Value +} + +type MockChunkIterator struct { + ctx context.Context + tableDiff *common.TableDiff + rangeInfo *splitter.RangeInfo + index *chunk.ChunkID +} + +const CHUNKS = 5 +const BUCKETS = 1 + +func (m *MockChunkIterator) Next() (*chunk.Range, error) { + if m.index.ChunkIndex == m.index.ChunkCnt-1 { + return nil, nil + } + m.index.ChunkIndex = m.index.ChunkIndex + 1 + return &chunk.Range{ + Index: &chunk.ChunkID{ + TableIndex: m.index.TableIndex, + BucketIndexLeft: m.index.BucketIndexLeft, + BucketIndexRight: m.index.BucketIndexRight, + ChunkIndex: m.index.ChunkIndex, + ChunkCnt: m.index.ChunkCnt, + }, + }, nil +} + +func (m *MockChunkIterator) Close() { + +} + +type MockAnalyzer struct { +} + +func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.TableDiff, rangeInfo *splitter.RangeInfo) (splitter.ChunkIterator, error) { + i := &chunk.ChunkID{ + TableIndex: 0, + BucketIndexLeft: 0, + BucketIndexRight: 0, + ChunkIndex: -1, + ChunkCnt: CHUNKS, + } + return &MockChunkIterator{ + ctx, + tableDiff, + rangeInfo, + i, + }, nil +} + +func TestTiDBSource(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + tableCases := []*tableCaseType{ + { + schema: "source_test", + table: "test1", + createTableSQL: "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, `d` binary(1), `e` varbinary(1), PRIMARY KEY(`a`)\n)", + rangeColumns: []string{"a", "b"}, + rangeLeft: []string{"3", "b"}, + rangeRight: []string{"5", "f"}, + rowQuery: "SELECT", + rowColumns: []string{"a", "b", "c", "d", "e"}, + rows: [][]driver.Value{ + {"1", "a", "1.2", []byte{0xaa}, []byte{0xaa}}, + {"2", "b", "3.4", []byte{0xbb}, []byte{0xbb}}, + {"3", "c", "5.6", []byte{0xcc}, []byte{0xcc}}, + {"4", "d", "6.7", []byte{0xdd}, []byte{0xdd}}, + }, + }, + { + schema: "source_test", + table: "test2", + createTableSQL: "CREATE TABLE `source_test`.`test2` (`a` int, `b` varchar(24), `c` float, `d` datetime, PRIMARY KEY(`a`)\n)", + rangeColumns: []string{"a", "b"}, + rangeLeft: []string{"3", "b"}, + rangeRight: []string{"5", "f"}, + }, + } + + tableDiffs := prepareTiDBTables(t, tableCases) + + mock.ExpectQuery("SHOW DATABASES").WillReturnRows(sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("source_test")) + mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(sqlmock.NewRows([]string{"Table", "type"}).AddRow("test1", "base").AddRow("test2", "base")) + mock.ExpectQuery("SELECT version()*").WillReturnRows(sqlmock.NewRows([]string{"version()"}).AddRow("5.7.25-TiDB-v4.0.12")) + + f, err := filter.Parse([]string{"source_test.*"}) + require.NoError(t, err) + tidb, err := NewTiDBSource(ctx, tableDiffs, &config.DataSource{Conn: conn}, utils.NewWorkerPool(1, "bucketIter"), f, false) + require.NoError(t, err) + + caseFn := []struct { + check func(sqlmock.Sqlmock, Source) (bool, error) + }{ + { + check: func(mock sqlmock.Sqlmock, source Source) (bool, error) { + mock.ExpectQuery("SHOW CREATE TABLE*").WillReturnRows(sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow(tableCases[0].table, tableCases[0].createTableSQL)) + mock.ExpectQuery("SELECT _tidb_rowid FROM*").WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid"})) + mock.ExpectQuery("SHOW VARIABLES LIKE 'sql_mode'*").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION")) + tableInfo, err := source.GetSourceStructInfo(ctx, 0) + if err != nil { + return false, err + } + return !tableInfo[0].PKIsHandle, nil + }, + }, + { + check: func(mock sqlmock.Sqlmock, source Source) (bool, error) { + mock.ExpectQuery("SHOW CREATE TABLE*").WillReturnRows(sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow(tableCases[1].table, tableCases[1].createTableSQL)) + mock.ExpectQuery("SELECT _tidb_rowid FROM*").WillReturnError(fmt.Errorf("ERROR 1054 (42S22): Unknown column '_tidb_rowid' in 'field list'")) + mock.ExpectQuery("SHOW VARIABLES LIKE 'sql_mode'*").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION")) + tableInfo, err := source.GetSourceStructInfo(ctx, 0) + if err != nil { + return false, err + } + return tableInfo[0].PKIsHandle, nil + }, + }, + } + + for n, tableCase := range tableCases { + t.Log(n) + check, err := caseFn[n].check(mock, tidb) + require.NoError(t, err) + require.True(t, check) + require.Equal(t, n, tableCase.rangeInfo.GetTableIndex()) + countRows := sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456) + mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows) + checksum := tidb.GetCountAndMd5(ctx, tableCase.rangeInfo) + require.NoError(t, checksum.Err) + require.Equal(t, checksum.Count, int64(123)) + require.Equal(t, checksum.Checksum, uint64(456)) + } + + // Test ChunkIterator + iter, err := tidb.GetRangeIterator(ctx, tableCases[0].rangeInfo, &MockAnalyzer{}, 3) + require.NoError(t, err) + resRecords := [][]bool{ + {false, false, false, false, false}, + {false, false, false, false, false}, + } + for { + ch, err := iter.Next(ctx) + require.NoError(t, err) + if ch == nil { + break + } + require.Equal(t, ch.ChunkRange.Index.ChunkCnt, 5) + require.Equal(t, resRecords[ch.ChunkRange.Index.TableIndex][ch.ChunkRange.Index.ChunkIndex], false) + resRecords[ch.ChunkRange.Index.TableIndex][ch.ChunkRange.Index.ChunkIndex] = true + } + iter.Close() + require.Equal(t, resRecords, [][]bool{ + {true, true, true, true, true}, + {true, true, true, true, true}, + }) + + // Test RowIterator + tableCase := tableCases[0] + dataRows := sqlmock.NewRows(tableCase.rowColumns) + for _, row := range tableCase.rows { + dataRows.AddRow(row...) + } + mock.ExpectQuery(tableCase.rowQuery).WillReturnRows(dataRows) + rowIter, err := tidb.GetRowsIterator(ctx, tableCase.rangeInfo) + require.NoError(t, err) + + row := 0 + var firstRow, secondRow map[string]*dbutil.ColumnData + for { + columns, err := rowIter.Next() + require.NoError(t, err) + if columns == nil { + require.Equal(t, row, len(tableCase.rows)) + break + } + for j, value := range tableCase.rows[row] { + require.Equal(t, columns[tableCase.rowColumns[j]].IsNull, false) + if _, ok := value.(string); ok { + require.Equal(t, columns[tableCase.rowColumns[j]].Data, []byte(value.(string))) + } + } + if row == 0 { + firstRow = columns + } else if row == 1 { + secondRow = columns + } + row++ + } + require.Equal(t, tidb.GenerateFixSQL(Insert, firstRow, secondRow, 0), "REPLACE INTO `source_test`.`test1`(`a`,`b`,`c`,`d`,`e`) VALUES (1,'a',1.2,x'aa',x'aa');") + require.Equal(t, tidb.GenerateFixSQL(Delete, firstRow, secondRow, 0), "DELETE FROM `source_test`.`test1` WHERE `a` = 2 AND `b` = 'b' AND `c` = 3.4 AND `d` = x'bb' AND `e` = x'bb' LIMIT 1;") + require.Equal(t, tidb.GenerateFixSQL(Replace, firstRow, secondRow, 0), + "/*\n"+ + " DIFF COLUMNS ╏ `A` ╏ `B` ╏ `C` ╏ `D` ╏ `E` \n"+ + "╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍╋╍╍╍╍╍╍╍╍\n"+ + " source data ╏ 1 ╏ 'a' ╏ 1.2 ╏ x'aa' ╏ x'aa' \n"+ + "╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍╋╍╍╍╍╍╍╍╍\n"+ + " target data ╏ 2 ╏ 'b' ╏ 3.4 ╏ x'aa' ╏ x'aa' \n"+ + "╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍╋╍╍╍╍╍╍╍╍\n"+ + "*/\n"+ + "REPLACE INTO `source_test`.`test1`(`a`,`b`,`c`,`d`,`e`) VALUES (1,'a',1.2,x'aa',x'aa');") + + rowIter.Close() + + analyze := tidb.GetTableAnalyzer() + countRows := sqlmock.NewRows([]string{"Cnt"}).AddRow(0) + mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows) + chunkIter, err := analyze.AnalyzeSplitter(ctx, tableDiffs[0], tableCase.rangeInfo) + require.NoError(t, err) + chunkIter.Close() + tidb.Close() +} + +func TestFallbackToRandomIfRangeIsSet(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + mock.ExpectQuery("SHOW DATABASES").WillReturnRows(sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("source_test")) + mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(sqlmock.NewRows([]string{"Table", "type"}).AddRow("test1", "base")) + statsRows := sqlmock.NewRows([]string{"Db_name", "Table_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound"}) + for i := 0; i < 5; i++ { + statsRows.AddRow("source_test", "test1", "PRIMARY", 1, (i+1)*64, (i+1)*64, 1, + fmt.Sprintf("(%d, %d)", i*64, i*12), fmt.Sprintf("(%d, %d)", (i+1)*64-1, (i+1)*12-1)) + } + mock.ExpectQuery("SELECT version()*").WillReturnRows(sqlmock.NewRows([]string{"version()"}).AddRow("5.7.25-TiDB-v4.0.12")) + mock.ExpectQuery(regexp.QuoteMeta("SELECT COUNT(1) cnt")).WillReturnRows(sqlmock.NewRows([]string{"cnt"}).AddRow(100)) + + f, err := filter.Parse([]string{"source_test.*"}) + require.NoError(t, err) + + createTableSQL1 := "CREATE TABLE `test1` " + + "(`id` int(11) NOT NULL AUTO_INCREMENT, " + + " `k` int(11) NOT NULL DEFAULT '0', " + + "`c` char(120) NOT NULL DEFAULT '', " + + "PRIMARY KEY (`id`), KEY `k_1` (`k`))" + + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + + table1 := &common.TableDiff{ + Schema: "source_test", + Table: "test1", + Info: tableInfo, + Range: "id < 10", // This should prevent using BucketIterator + } + + tidb, err := NewTiDBSource(ctx, []*common.TableDiff{table1}, &config.DataSource{Conn: conn}, utils.NewWorkerPool(1, "bucketIter"), f, false) + require.NoError(t, err) + + analyze := tidb.GetTableAnalyzer() + chunkIter, err := analyze.AnalyzeSplitter(ctx, table1, nil) + require.NoError(t, err) + require.IsType(t, &splitter.RandomIterator{}, chunkIter) + + chunkIter.Close() + tidb.Close() +} + +func TestMysqlShardSources(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + tableCases := []*tableCaseType{ + { + schema: "source_test", + table: "test1", + createTableSQL: "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))", + rangeColumns: []string{"a", "b"}, + rangeLeft: []string{"3", "b"}, + rangeRight: []string{"5", "f"}, + rowQuery: "SELECT.*", + rowColumns: []string{"a", "b", "c"}, + rows: [][]driver.Value{ + {"1", "a", "1.2"}, + {"2", "b", "2.2"}, + {"3", "c", "3.2"}, + {"4", "d", "4.2"}, + {"5", "e", "5.2"}, + {"6", "f", "6.2"}, + {"7", "g", "7.2"}, + {"8", "h", "8.2"}, + {"9", "i", "9.2"}, + {"10", "j", "10.2"}, + {"11", "k", "11.2"}, + {"12", "l", "12.2"}, + }, + }, + { + schema: "source_test", + table: "test2", + createTableSQL: "CREATE TABLE `source_test`.`test2` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))", + rangeColumns: []string{"a", "b"}, + rangeLeft: []string{"3", "b"}, + rangeRight: []string{"5", "f"}, + }, + } + + tableDiffs := prepareTiDBTables(t, tableCases) + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + dbs := []*sql.DB{ + conn, conn, conn, conn, + } + + cs := make([]*config.DataSource, 4) + for i := range dbs { + mock.ExpectQuery("SHOW DATABASES").WillReturnRows(sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("source_test")) + mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(sqlmock.NewRows([]string{"Table", "type"}).AddRow("test1", "base").AddRow("test2", "base")) + cs[i] = &config.DataSource{Conn: conn} + } + + f, err := filter.Parse([]string{"source_test.*"}) + require.NoError(t, err) + shard, err := NewMySQLSources(ctx, tableDiffs, cs, 4, f, false) + require.NoError(t, err) + + for i := 0; i < len(dbs); i++ { + infoRows := sqlmock.NewRows([]string{"Table", "Create Table"}).AddRow("test_t", "CREATE TABLE `source_test`.`test1` (`a` int, `b` varchar(24), `c` float, primary key(`a`, `b`))") + variableRows := sqlmock.NewRows([]string{"Variable_name", "Value"}).AddRow("sql_mode", "ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION") + + mock.ExpectQuery("SHOW CREATE TABLE.*").WillReturnRows(infoRows) + mock.ExpectQuery("SHOW VARIABLE.*").WillReturnRows(variableRows) + } + info, err := shard.GetSourceStructInfo(ctx, 0) + require.NoError(t, err) + require.Equal(t, info[0].Name.O, "test1") + + for n, tableCase := range tableCases { + require.Equal(t, n, tableCase.rangeInfo.GetTableIndex()) + var resChecksum uint64 = 0 + for i := 0; i < len(dbs); i++ { + resChecksum = resChecksum + 1< 10 AND a < 20", + TargetTableInfo: nil, + Collation: "", + }, + }, + Task: config.TaskConfig{ + Source: []string{"mysql1"}, + Routes: nil, + Target: "tidb", + CheckTables: []string{"schema*.tbl"}, + TableConfigs: []string{"config1"}, + OutputDir: "./output", + SourceInstances: []*config.DataSource{ + { + Host: host, + Port: port, + User: "root", + Router: router, + }, + }, + TargetInstance: &config.DataSource{ + Host: host, + Port: port, + User: "root", + }, + TargetTableConfigs: []*config.TableConfig{ + { + Schema: "schema1", + Table: "tbl", + IgnoreColumns: []string{"", ""}, + Fields: []string{""}, + Range: "a > 10 AND a < 20", + TargetTableInfo: nil, + Collation: "", + }, + }, + TargetCheckTables: nil, + FixDir: "output/fix-on-tidb0", + CheckpointDir: "output/checkpoint", + HashFile: "", + }, + ConfigFile: "config.toml", + PrintVersion: false, + } + cfg.Task.TargetCheckTables, err = filter.Parse([]string{"schema*.tbl"}) + require.NoError(t, err) + + // create table + conn, err := sql.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/?charset=utf8mb4", host, port)) + require.NoError(t, err) + + conn.Exec("CREATE DATABASE IF NOT EXISTS schema1") + conn.Exec("CREATE TABLE IF NOT EXISTS `schema1`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))") + // create db connections refused. + // TODO unit_test covers source.go + _, _, err = NewSources(ctx, cfg) + require.NoError(t, err) +} + +func TestRouterRules(t *testing.T) { + host, isExist := os.LookupEnv("MYSQL_HOST") + if host == "" || !isExist { + return + } + portStr, isExist := os.LookupEnv("MYSQL_PORT") + if portStr == "" || !isExist { + //return + } + port, err := strconv.Atoi(portStr) + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + r, err := router.NewTableRouter(false, []*router.TableRule{ + // make sure this rule works + { + SchemaPattern: "schema1", + TablePattern: "tbl", + TargetSchema: "schema2", + TargetTable: "tbl", + }, + }) + cfg := &config.Config{ + LogLevel: "debug", + CheckThreadCount: 4, + ExportFixSQL: true, + CheckStructOnly: false, + DataSources: map[string]*config.DataSource{ + "mysql1": { + Host: host, + Port: port, + User: "root", + }, + "tidb": { + Host: host, + Port: port, + User: "root", + }, + }, + Routes: nil, + Task: config.TaskConfig{ + Source: []string{"mysql1"}, + Routes: nil, + Target: "tidb", + CheckTables: []string{"schema2.tbl"}, + OutputDir: "./output", + SourceInstances: []*config.DataSource{ + { + Host: host, + Port: port, + User: "root", + Router: r, + RouteTargetSet: make(map[string]struct{}), + }, + }, + TargetInstance: &config.DataSource{ + Host: host, + Port: port, + User: "root", + }, + TargetCheckTables: nil, + FixDir: "output/fix-on-tidb0", + CheckpointDir: "output/checkpoint", + HashFile: "", + }, + ConfigFile: "config.toml", + PrintVersion: false, + } + cfg.Task.TargetCheckTables, err = filter.Parse([]string{"schema2.tbl", "schema_test.tbl"}) + require.NoError(t, err) + cfg.Task.SourceInstances[0].RouteTargetSet[dbutil.TableName("schema2", "tbl")] = struct{}{} + + // create table + conn, err := sql.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/?charset=utf8mb4", host, port)) + require.NoError(t, err) + + conn.Exec("CREATE DATABASE IF NOT EXISTS schema1") + conn.Exec("CREATE TABLE IF NOT EXISTS `schema1`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))") + conn.Exec("CREATE DATABASE IF NOT EXISTS schema2") + conn.Exec("CREATE TABLE IF NOT EXISTS `schema2`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))") + conn.Exec("CREATE DATABASE IF NOT EXISTS schema_test") + conn.Exec("CREATE TABLE IF NOT EXISTS `schema_test`.`tbl` (`a` int, `b` varchar(24), `c` float, `d` datetime, primary key(`a`, `b`))") + + _, _, err = NewSources(ctx, cfg) + require.NoError(t, err) + + require.Equal(t, 1, len(cfg.Task.SourceInstances)) + targetSchema, targetTable, err := cfg.Task.SourceInstances[0].Router.Route("schema1", "tbl") + require.NoError(t, err) + require.Equal(t, "schema2", targetSchema) + require.Equal(t, "tbl", targetTable) + targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema2", "tbl") + require.NoError(t, err) + require.Equal(t, ShieldDBName, targetSchema) + require.Equal(t, ShieldTableName, targetTable) + targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema_test", "tbl") + require.NoError(t, err) + require.Equal(t, "schema_test", targetSchema) + require.Equal(t, "tbl", targetTable) + _, tableRules := cfg.Task.SourceInstances[0].Router.AllRules() + require.Equal(t, 1, len(tableRules["schema1"])) + require.Equal(t, 1, len(tableRules["schema2"])) + require.Equal(t, 1, len(tableRules["schema_test"])) +} + +func TestInitTables(t *testing.T) { + ctx := context.Background() + cfg := config.NewConfig() + // Test case 1: test2.t2 will parse after filter. + require.NoError(t, cfg.Parse([]string{"--config", "../config/config.toml"})) + require.NoError(t, cfg.Init()) + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + cfg.Task.TargetInstance.Conn = conn + + rows := sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("test2") + mock.ExpectQuery("SHOW DATABASES").WillReturnRows(rows) + rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t1", "t1").AddRow("t2", "t2") + mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(rows) + rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t2", "CREATE TABLE `t2` (\n\t\t\t`id` int(11) DEFAULT NULL,\n\t\t \t`name` varchar(24) DEFAULT NULL\n\t\t\t) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin") + mock.ExpectQuery("SHOW CREATE TABLE *").WillReturnRows(rows) + rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("", "") + mock.ExpectQuery("SHOW VARIABLES LIKE*").WillReturnRows(rows) + + tablesToBeCheck, err := initTables(ctx, cfg) + require.NoError(t, err) + + require.Len(t, tablesToBeCheck, 1) + require.Equal(t, tablesToBeCheck[0].Schema, "test2") + require.Equal(t, tablesToBeCheck[0].Table, "t2") + // Range can be replaced during initTables + require.Equal(t, tablesToBeCheck[0].Range, "age > 10 AND age < 20") + + require.NoError(t, mock.ExpectationsWereMet()) + + // Test case 2: init failed due to conflict table config point to one table. + cfg = config.NewConfig() + require.NoError(t, cfg.Parse([]string{"--config", "../config/config_conflict.toml"})) + require.NoError(t, cfg.Init()) + cfg.Task.TargetInstance.Conn = conn + + rows = sqlmock.NewRows([]string{"Database"}).AddRow("mysql").AddRow("test2") + mock.ExpectQuery("SHOW DATABASES").WillReturnRows(rows) + rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t1", "t1").AddRow("t2", "t2") + mock.ExpectQuery("SHOW FULL TABLES*").WillReturnRows(rows) + rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("t2", "CREATE TABLE `t2` (\n\t\t\t`id` int(11) DEFAULT NULL,\n\t\t \t`name` varchar(24) DEFAULT NULL\n\t\t\t) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin") + mock.ExpectQuery("SHOW CREATE TABLE *").WillReturnRows(rows) + rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("", "") + mock.ExpectQuery("SHOW VARIABLES LIKE*").WillReturnRows(rows) + + tablesToBeCheck, err = initTables(ctx, cfg) + require.Contains(t, err.Error(), "different config matched to same target table") + require.NoError(t, mock.ExpectationsWereMet()) +} + +func TestCheckTableMatched(t *testing.T) { + var tableDiffs []*common.TableDiff + tableDiffs = append(tableDiffs, &common.TableDiff{ + Schema: "test", + Table: "t1", + }) + tableDiffs = append(tableDiffs, &common.TableDiff{ + Schema: "test", + Table: "t2", + }) + + tmap := make(map[string]struct{}) + smap := make(map[string]struct{}) + + smap["`test`.`t1`"] = struct{}{} + smap["`test`.`t2`"] = struct{}{} + + tmap["`test`.`t1`"] = struct{}{} + tmap["`test`.`t2`"] = struct{}{} + + tables, err := checkTableMatched(tableDiffs, tmap, smap, false) + require.NoError(t, err) + + smap["`test`.`t3`"] = struct{}{} + tables, err = checkTableMatched(tableDiffs, tmap, smap, false) + require.Contains(t, err.Error(), "the target has no table to be compared. source-table is ``test`.`t3``") + + delete(smap, "`test`.`t2`") + tables, err = checkTableMatched(tableDiffs, tmap, smap, false) + require.Contains(t, err.Error(), "the source has no table to be compared. target-table is ``test`.`t2``") + + tables, err = checkTableMatched(tableDiffs, tmap, smap, true) + require.NoError(t, err) + require.Equal(t, 0, tables[0].TableLack) + require.Equal(t, 1, tables[1].TableLack) + require.Equal(t, -1, tables[2].TableLack) +} diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go new file mode 100644 index 00000000000..0af78384cc1 --- /dev/null +++ b/sync_diff_inspector/source/tidb.go @@ -0,0 +1,285 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package source + +import ( + "context" + "database/sql" + "fmt" + "time" + + "github.com/coreos/go-semver/semver" + tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/filter" + "github.com/pingcap/tiflow/sync_diff_inspector/config" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/splitter" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +type TiDBTableAnalyzer struct { + dbConn *sql.DB + bucketSpliterPool *utils.WorkerPool + sourceTableMap map[string]*common.TableSource +} + +func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) { + matchedSource := getMatchSource(a.sourceTableMap, table) + // Shallow Copy + originTable := *table + originTable.Schema = matchedSource.OriginSchema + originTable.Table = matchedSource.OriginTable + progressID := dbutil.TableName(table.Schema, table.Table) + // if we decide to use bucket to split chunks + // we always use bucksIter even we load from checkpoint is not bucketNode + // TODO check whether we can use bucket for this table to split chunks. + // NOTICE: If checkpoint use random splitter, it will also fail the next time build bucket splitter. + bucketIter, err := splitter.NewBucketIteratorWithCheckpoint(ctx, progressID, &originTable, a.dbConn, startRange, a.bucketSpliterPool) + if err == nil { + return bucketIter, nil + } + log.Info("failed to build bucket iterator, fall back to use random iterator", zap.Error(err)) + // fall back to random splitter + + // use random splitter if we cannot use bucket splitter, then we can simply choose target table to generate chunks. + randIter, err := splitter.NewRandomIteratorWithCheckpoint(ctx, progressID, &originTable, a.dbConn, startRange) + if err != nil { + return nil, errors.Trace(err) + } + return randIter, nil + +} + +type TiDBRowsIterator struct { + rows *sql.Rows +} + +func (s *TiDBRowsIterator) Close() { + s.rows.Close() +} + +func (s *TiDBRowsIterator) Next() (map[string]*dbutil.ColumnData, error) { + if s.rows.Next() { + return dbutil.ScanRow(s.rows) + } + return nil, nil +} + +type TiDBSource struct { + tableDiffs []*common.TableDiff + sourceTableMap map[string]*common.TableSource + snapshot string + // bucketSpliterPool is the shared pool to produce chunks using bucket + bucketSpliterPool *utils.WorkerPool + dbConn *sql.DB + + version *semver.Version +} + +func (s *TiDBSource) GetTableAnalyzer() TableAnalyzer { + return &TiDBTableAnalyzer{ + s.dbConn, + s.bucketSpliterPool, + s.sourceTableMap, + } +} + +func getMatchSource(sourceTableMap map[string]*common.TableSource, table *common.TableDiff) *common.TableSource { + if len(sourceTableMap) == 0 { + // no sourceTableMap, return the origin table name + return &common.TableSource{ + OriginSchema: table.Schema, + OriginTable: table.Table, + } + } + uniqueID := utils.UniqueID(table.Schema, table.Table) + return sourceTableMap[uniqueID] +} + +func (s *TiDBSource) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) { + return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount) +} + +func (s *TiDBSource) Close() { + s.dbConn.Close() +} +func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { + beginTime := time.Now() + table := s.tableDiffs[tableRange.GetTableIndex()] + chunk := tableRange.GetChunk() + + matchSource := getMatchSource(s.sourceTableMap, table) + count, checksum, err := utils.GetCountAndMd5Checksum(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, chunk.Where, chunk.Args) + + cost := time.Since(beginTime) + return &ChecksumInfo{ + Checksum: checksum, + Count: count, + Err: err, + Cost: cost, + } +} + +func (s *TiDBSource) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 { + table := s.tableDiffs[tableRange.GetTableIndex()] + matchSource := getMatchSource(s.sourceTableMap, table) + if matchSource != nil { + count, _ := dbutil.GetRowCount(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, "", nil) + return count + } + return 0 +} + +func (s *TiDBSource) GetTables() []*common.TableDiff { + return s.tableDiffs +} + +func (s *TiDBSource) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) { + var err error + tableInfos := make([]*model.TableInfo, 1) + tableDiff := s.GetTables()[tableIndex] + source := getMatchSource(s.sourceTableMap, tableDiff) + tableInfos[0], err = utils.GetTableInfoWithVersion(ctx, s.GetDB(), source.OriginSchema, source.OriginTable, s.version) + if err != nil { + return nil, errors.Trace(err) + } + tableInfos[0], _ = utils.ResetColumns(tableInfos[0], tableDiff.IgnoreColumns) + return tableInfos, nil +} + +func (s *TiDBSource) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string { + if t == Insert { + return utils.GenerateReplaceDML(upstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) + } + if t == Delete { + return utils.GenerateDeleteDML(downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) + } + if t == Replace { + return utils.GenerateReplaceDMLWithAnnotation(upstreamData, downstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) + } + log.Fatal("Don't support this type", zap.Any("dml type", t)) + return "" +} + +func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) { + chunk := tableRange.GetChunk() + + table := s.tableDiffs[tableRange.GetTableIndex()] + matchedSource := getMatchSource(s.sourceTableMap, table) + rowsQuery, _ := utils.GetTableRowsQueryFormat(matchedSource.OriginSchema, matchedSource.OriginTable, table.Info, table.Collation) + query := fmt.Sprintf(rowsQuery, chunk.Where) + + log.Debug("select data", zap.String("sql", query), zap.Reflect("args", chunk.Args)) + rows, err := s.dbConn.QueryContext(ctx, query, chunk.Args...) + if err != nil { + return nil, errors.Trace(err) + } + return &TiDBRowsIterator{ + rows, + }, nil +} + +func (s *TiDBSource) GetDB() *sql.DB { + return s.dbConn +} + +func (s *TiDBSource) GetSnapshot() string { + return s.snapshot +} + +func NewTiDBSource( + ctx context.Context, + tableDiffs []*common.TableDiff, ds *config.DataSource, + bucketSpliterPool *utils.WorkerPool, + f tableFilter.Filter, skipNonExistingTable bool, +) (Source, error) { + sourceTableMap := make(map[string]*common.TableSource) + log.Info("find router for tidb source") + // we should get the real table name + // and real table row query from source. + targetUniqueTableMap := make(map[string]struct{}) + for _, tableDiff := range tableDiffs { + targetUniqueTableMap[utils.UniqueID(tableDiff.Schema, tableDiff.Table)] = struct{}{} + } + sourceTablesAfterRoute := make(map[string]struct{}) + + // instance -> db -> table + allTablesMap := make(map[string]map[string]interface{}) + sourceSchemas, err := dbutil.GetSchemas(ctx, ds.Conn) + + if err != nil { + return nil, errors.Annotatef(err, "get schemas from database") + } + + for _, schema := range sourceSchemas { + if filter.IsSystemSchema(schema) { + // ignore system schema + continue + } + allTables, err := dbutil.GetTables(ctx, ds.Conn, schema) + if err != nil { + return nil, errors.Annotatef(err, "get tables from %s", schema) + } + allTablesMap[schema] = utils.SliceToMap(allTables) + } + + for schema, allTables := range allTablesMap { + for table := range allTables { + targetSchema, targetTable := schema, table + if ds.Router != nil { + targetSchema, targetTable, err = ds.Router.Route(schema, table) + if err != nil { + return nil, errors.Errorf("get route result for %s.%s failed, error %v", schema, table, err) + } + } + + uniqueId := utils.UniqueID(targetSchema, targetTable) + isMatched := f.MatchTable(targetSchema, targetTable) + if isMatched { + // if match the filter, we should respect it and check target has this table later. + sourceTablesAfterRoute[uniqueId] = struct{}{} + } + if _, ok := targetUniqueTableMap[uniqueId]; ok || (isMatched && skipNonExistingTable) { + if _, ok := sourceTableMap[uniqueId]; ok { + log.Error("TiDB source don't support compare multiple source tables with one downstream table," + + " if this happening when diff on same instance is fine. otherwise we are not guarantee this diff result is right") + } + sourceTableMap[uniqueId] = &common.TableSource{ + OriginSchema: schema, + OriginTable: table, + } + } + } + } + + tableDiffs, err = checkTableMatched(tableDiffs, targetUniqueTableMap, sourceTablesAfterRoute, skipNonExistingTable) + if err != nil { + return nil, errors.Annotatef(err, "please make sure the filter is correct.") + } + ts := &TiDBSource{ + tableDiffs: tableDiffs, + sourceTableMap: sourceTableMap, + snapshot: ds.Snapshot, + dbConn: ds.Conn, + bucketSpliterPool: bucketSpliterPool, + version: utils.TryToGetVersion(ctx, ds.Conn), + } + return ts, nil +} diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go new file mode 100644 index 00000000000..b64b3ae68bd --- /dev/null +++ b/sync_diff_inspector/splitter/bucket.go @@ -0,0 +1,365 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "context" + "database/sql" + "sync" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/progress" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +const DefaultChannelBuffer = 1024 + +type BucketIterator struct { + buckets []dbutil.Bucket + table *common.TableDiff + indexColumns []*model.ColumnInfo + + chunkPool *utils.WorkerPool + wg sync.WaitGroup // control for one bucket in shared chunkPool + + chunkSize int64 + chunks []*chunk.Range + nextChunk uint + + chunksCh chan []*chunk.Range + errCh chan error + cancel context.CancelFunc + indexID int64 + progressID string + + dbConn *sql.DB +} + +func NewBucketIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*BucketIterator, error) { + return NewBucketIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil, utils.NewWorkerPool(1, "bucketIter")) +} + +func NewBucketIteratorWithCheckpoint( + ctx context.Context, + progressID string, + table *common.TableDiff, + dbConn *sql.DB, + startRange *RangeInfo, + bucketSpliterPool *utils.WorkerPool, +) (*BucketIterator, error) { + if !utils.IsRangeTrivial(table.Range) { + return nil, errors.Errorf( + "BucketIterator does not support user configured Range. Range: %s", + table.Range) + } + + bctx, cancel := context.WithCancel(ctx) + bs := &BucketIterator{ + table: table, + chunkPool: bucketSpliterPool, + chunkSize: table.ChunkSize, + chunksCh: make(chan []*chunk.Range, DefaultChannelBuffer), + errCh: make(chan error, 1), + cancel: cancel, + dbConn: dbConn, + + progressID: progressID, + } + + if err := bs.init(ctx, startRange); err != nil { + return nil, errors.Trace(err) + } + + // Let the progress bar begins to record the table. + progress.StartTable(bs.progressID, 0, false) + go bs.produceChunks(bctx, startRange) + + return bs, nil +} + +func (s *BucketIterator) GetIndexID() int64 { + return s.indexID +} + +func (s *BucketIterator) Next() (*chunk.Range, error) { + var ok bool + if uint(len(s.chunks)) <= s.nextChunk { + select { + case err := <-s.errCh: + return nil, errors.Trace(err) + case s.chunks, ok = <-s.chunksCh: + if !ok && s.chunks == nil { + log.Info("close chunks channel for table", + zap.String("schema", s.table.Schema), zap.String("table", s.table.Table)) + return nil, nil + } + } + s.nextChunk = 0 + failpoint.Inject("ignore-last-n-chunk-in-bucket", func(v failpoint.Value) { + log.Info("failpoint ignore-last-n-chunk-in-bucket injected (bucket splitter)", zap.Int("n", v.(int))) + if len(s.chunks) <= 1+v.(int) { + failpoint.Return(nil, nil) + } + s.chunks = s.chunks[:(len(s.chunks) - v.(int))] + }) + } + + c := s.chunks[s.nextChunk] + s.nextChunk = s.nextChunk + 1 + failpoint.Inject("print-chunk-info", func() { + lowerBounds := make([]string, len(c.Bounds)) + upperBounds := make([]string, len(c.Bounds)) + for i, bound := range c.Bounds { + lowerBounds[i] = bound.Lower + upperBounds[i] = bound.Upper + } + log.Info("failpoint print-chunk-info injected (bucket splitter)", zap.Strings("lowerBounds", lowerBounds), zap.Strings("upperBounds", upperBounds), zap.String("indexCode", c.Index.ToString())) + }) + return c, nil +} + +func (s *BucketIterator) init(ctx context.Context, startRange *RangeInfo) error { + fields, err := indexFieldsFromConfigString(s.table.Fields, s.table.Info) + if err != nil { + return err + } + + s.nextChunk = 0 + buckets, err := dbutil.GetBucketsInfo(ctx, s.dbConn, s.table.Schema, s.table.Table, s.table.Info) + if err != nil { + return errors.Trace(err) + } + + var indices []*model.IndexInfo + if fields.IsEmpty() { + indices, err = utils.GetBetterIndex(context.Background(), s.dbConn, s.table.Schema, s.table.Table, s.table.Info) + if err != nil { + return errors.Trace(err) + } + } else { + // There are user configured "index-fields", so we will try to match from all indices. + indices = dbutil.FindAllIndex(s.table.Info) + } + +NEXTINDEX: + for _, index := range indices { + if index == nil { + continue + } + if startRange != nil && startRange.IndexID != index.ID { + continue + } + + indexColumns := utils.GetColumnsFromIndex(index, s.table.Info) + + if len(indexColumns) < len(index.Columns) { + // some column in index is ignored. + continue + } + + if !fields.MatchesIndex(index) { + // We are enforcing user configured "index-fields" settings. + continue + } + + // skip the index that has expression column + for _, col := range indexColumns { + if col.Hidden { + continue NEXTINDEX + } + } + + bucket, ok := buckets[index.Name.O] + if !ok { + // We found an index matching the "index-fields", but no bucket is found + // for that index. Returning an error here will make the caller retry with + // the random splitter. + return errors.NotFoundf("index %s in buckets info", index.Name.O) + } + log.Debug("buckets for index", zap.String("index", index.Name.O), zap.Reflect("buckets", buckets)) + + s.buckets = bucket + s.indexColumns = indexColumns + s.indexID = index.ID + break + } + + if s.buckets == nil || s.indexColumns == nil { + return errors.NotFoundf("no index to split buckets") + } + + // Notice: `cnt` is only an estimated value + cnt := s.buckets[len(s.buckets)-1].Count + // We can use config file to fix chunkSize, + // otherwise chunkSize is 0. + if s.chunkSize <= 0 { + s.chunkSize = utils.CalculateChunkSize(cnt) + } + + log.Info("get chunk size for table", zap.Int64("chunk size", s.chunkSize), + zap.String("db", s.table.Schema), zap.String("table", s.table.Table)) + return nil +} + +func (s *BucketIterator) Close() { + s.cancel() +} + +func (s *BucketIterator) splitChunkForBucket(ctx context.Context, firstBucketID, lastBucketID int, beginIndex int, bucketChunkCnt int, splitChunkCnt int, chunkRange *chunk.Range) { + s.wg.Add(1) + s.chunkPool.Apply(func() { + defer s.wg.Done() + chunks, err := splitRangeByRandom(ctx, s.dbConn, chunkRange, splitChunkCnt, s.table.Schema, s.table.Table, s.indexColumns, s.table.Range, s.table.Collation) + if err != nil { + select { + case <-ctx.Done(): + case s.errCh <- errors.Trace(err): + } + return + } + chunk.InitChunks(chunks, chunk.Bucket, firstBucketID, lastBucketID, beginIndex, s.table.Collation, s.table.Range, bucketChunkCnt) + progress.UpdateTotal(s.progressID, len(chunks), false) + s.chunksCh <- chunks + }) +} + +func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInfo) { + defer func() { + s.wg.Wait() + progress.UpdateTotal(s.progressID, 0, true) + close(s.chunksCh) + }() + var ( + lowerValues, upperValues []string + latestCount int64 + err error + ) + firstBucket := 0 + if startRange != nil { + c := startRange.GetChunk() + if c.IsLastChunkForTable() { + // the last checkpoint range is the last chunk so return + return + } + // init values for the next bucket + firstBucket = c.Index.BucketIndexRight + 1 + // Note: Since this chunk is not the last one, + // its bucketID is less than len(s.buckets) + if c.Index.BucketIndexRight >= len(s.buckets) { + select { + case <-ctx.Done(): + case s.errCh <- errors.New("Wrong Bucket: Bucket index of the checkpoint node is larger than buckets' size"): + } + return + } + latestCount = s.buckets[c.Index.BucketIndexRight].Count + nextUpperValues, err := dbutil.AnalyzeValuesFromBuckets(s.buckets[c.Index.BucketIndexRight].UpperBound, s.indexColumns) + if err != nil { + select { + case <-ctx.Done(): + case s.errCh <- errors.Trace(err): + } + return + } + lowerValues = nextUpperValues + + // build left chunks for this bucket + leftCnt := c.Index.ChunkCnt - c.Index.ChunkIndex - 1 + if leftCnt > 0 { + chunkRange := chunk.NewChunkRange() + + for i, column := range s.indexColumns { + chunkRange.Update(column.Name.O, "", nextUpperValues[i], false, true) + } + + for _, bound := range c.Bounds { + chunkRange.Update(bound.Column, bound.Upper, "", true, false) + } + + s.splitChunkForBucket(ctx, c.Index.BucketIndexLeft, c.Index.BucketIndexRight, c.Index.ChunkIndex+1, c.Index.ChunkCnt, leftCnt, chunkRange) + } + } + halfChunkSize := s.chunkSize >> 1 + // `firstBucket` is the first bucket of one chunk. + // It is equivalent to `BucketLeftIndex` of the chunk's ID. + for i := firstBucket; i < len(s.buckets); i++ { + count := s.buckets[i].Count - latestCount + if count < s.chunkSize { + // merge more buckets into one chunk + continue + } + + upperValues, err = dbutil.AnalyzeValuesFromBuckets(s.buckets[i].UpperBound, s.indexColumns) + if err != nil { + select { + case <-ctx.Done(): + case s.errCh <- errors.Trace(err): + } + return + } + + chunkRange := chunk.NewChunkRange() + for j, column := range s.indexColumns { + var lowerValue, upperValue string + if len(lowerValues) > 0 { + lowerValue = lowerValues[j] + } + if len(upperValues) > 0 { + upperValue = upperValues[j] + } + chunkRange.Update(column.Name.O, lowerValue, upperValue, len(lowerValues) > 0, len(upperValues) > 0) + } + + // `splitRangeByRandom` will skip when chunkCnt <= 1 + // assume the number of the selected buckets is `x` + // if x >= 2 -> chunkCnt = 1 + // if x = 1 -> chunkCnt = (count + halfChunkSize) / chunkSize + // count >= chunkSize + if i == firstBucket { + // + chunkCnt := int((count + halfChunkSize) / s.chunkSize) + s.splitChunkForBucket(ctx, firstBucket, i, 0, chunkCnt, chunkCnt, chunkRange) + } else { + // use multi-buckets so chunkCnt = 1 + s.splitChunkForBucket(ctx, firstBucket, i, 0, 1, 1, chunkRange) + } + + latestCount = s.buckets[i].Count + lowerValues = upperValues + firstBucket = i + 1 + + failpoint.Inject("check-one-bucket", func() { + log.Info("failpoint check-one-bucket injected, stop producing new chunks.") + failpoint.Return() + }) + } + + // merge the rest keys into one chunk + chunkRange := chunk.NewChunkRange() + if len(lowerValues) > 0 { + for j, column := range s.indexColumns { + chunkRange.Update(column.Name.O, lowerValues[j], "", true, false) + } + } + // When the table is much less than chunkSize, + // it will return a chunk include the whole table. + s.splitChunkForBucket(ctx, firstBucket, len(s.buckets), 0, 1, 1, chunkRange) +} diff --git a/sync_diff_inspector/splitter/index_fields.go b/sync_diff_inspector/splitter/index_fields.go new file mode 100644 index 00000000000..1508d59075d --- /dev/null +++ b/sync_diff_inspector/splitter/index_fields.go @@ -0,0 +1,111 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "sort" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +// indexFields wraps the column info for the user config "index-fields". +type indexFields struct { + cols []*model.ColumnInfo + tableInfo *model.TableInfo + empty bool +} + +func indexFieldsFromConfigString(strFields string, tableInfo *model.TableInfo) (*indexFields, error) { + if len(strFields) == 0 { + // Empty option + return &indexFields{empty: true}, nil + } + + if tableInfo == nil { + log.Panic("parsing index fields with empty tableInfo", + zap.String("index-fields", strFields)) + } + + splitFieldArr := strings.Split(strFields, ",") + for i := range splitFieldArr { + splitFieldArr[i] = strings.TrimSpace(splitFieldArr[i]) + } + + fields, err := GetSplitFields(tableInfo, splitFieldArr) + if err != nil { + return nil, errors.Trace(err) + } + + // Sort the columns to help with comparison. + sortColsInPlace(fields) + + return &indexFields{ + cols: fields, + tableInfo: tableInfo, + }, nil +} + +func (f *indexFields) MatchesIndex(index *model.IndexInfo) bool { + if f.empty { + // Default config matches all. + return true + } + + // Sanity checks. + if index == nil { + log.Panic("matching with empty index") + } + if len(f.cols) == 0 { + log.Panic("unexpected cols with length 0") + } + + if len(index.Columns) != len(f.cols) { + // We need an exact match. + // Lengths not matching eliminates the possibility. + return false + } + + indexCols := utils.GetColumnsFromIndex(index, f.tableInfo) + // Sort for comparison + sortColsInPlace(indexCols) + + for i := 0; i < len(indexCols); i++ { + if f.cols[i].ID != indexCols[i].ID { + return false + } + } + + return true +} + +func (f *indexFields) Cols() []*model.ColumnInfo { + return f.cols +} + +// IsEmpty returns true if the struct represents an empty +// user-configured "index-fields" option. +func (f *indexFields) IsEmpty() bool { + return f.empty +} + +func sortColsInPlace(cols []*model.ColumnInfo) { + sort.SliceStable(cols, func(i, j int) bool { + return cols[i].ID < cols[j].ID + }) +} diff --git a/sync_diff_inspector/splitter/index_fields_test.go b/sync_diff_inspector/splitter/index_fields_test.go new file mode 100644 index 00000000000..6b6cc5768e7 --- /dev/null +++ b/sync_diff_inspector/splitter/index_fields_test.go @@ -0,0 +1,106 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/stretchr/testify/require" +) + +func TestIndexFieldsSimple(t *testing.T) { + t.Parallel() + + createTableSQL1 := "CREATE TABLE `sbtest1` " + + "(`id` int(11) NOT NULL AUTO_INCREMENT, " + + " `k` int(11) NOT NULL DEFAULT '0', " + + "`c` char(120) NOT NULL DEFAULT '', " + + "PRIMARY KEY (`id`), KEY `k_1` (`k`))" + + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + + fields, err := indexFieldsFromConfigString("k", tableInfo) + require.NoError(t, err) + require.False(t, fields.IsEmpty()) + require.Len(t, fields.Cols(), 1) + + for _, index := range tableInfo.Indices { + switch index.Name.String() { + case "PRIMARY": + require.False(t, fields.MatchesIndex(index)) + case "k_1": + require.True(t, fields.MatchesIndex(index)) + default: + require.FailNow(t, "unreachable") + } + } +} + +func TestIndexFieldsComposite(t *testing.T) { + t.Parallel() + + createTableSQL1 := "CREATE TABLE `sbtest1` " + + "(`id` int(11) NOT NULL AUTO_INCREMENT, " + + " `k` int(11) NOT NULL DEFAULT '0', " + + "`c` char(120) NOT NULL DEFAULT '', " + + "PRIMARY KEY (`id`, `k`)," + + "KEY `k_1` (`k`)," + + "UNIQUE INDEX `c_1` (`c`))" + + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + + fields, err := indexFieldsFromConfigString("id, k", tableInfo) + require.NoError(t, err) + require.False(t, fields.IsEmpty()) + require.Len(t, fields.Cols(), 2) + + for _, index := range tableInfo.Indices { + switch index.Name.String() { + case "PRIMARY": + require.True(t, fields.MatchesIndex(index)) + case "k_1": + require.False(t, fields.MatchesIndex(index)) + case "c_1": + require.False(t, fields.MatchesIndex(index)) + default: + require.FailNow(t, "unreachable") + } + } +} + +func TestIndexFieldsEmpty(t *testing.T) { + t.Parallel() + + createTableSQL1 := "CREATE TABLE `sbtest1` " + + "(`id` int(11) NOT NULL AUTO_INCREMENT, " + + " `k` int(11) NOT NULL DEFAULT '0', " + + "`c` char(120) NOT NULL DEFAULT '', " + + "PRIMARY KEY (`id`), KEY `k_1` (`k`))" + + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + + fields, err := indexFieldsFromConfigString("", tableInfo) + require.NoError(t, err) + require.True(t, fields.IsEmpty()) + + for _, index := range tableInfo.Indices { + // Expected to match all. + require.True(t, fields.MatchesIndex(index)) + } +} diff --git a/sync_diff_inspector/splitter/limit.go b/sync_diff_inspector/splitter/limit.go new file mode 100644 index 00000000000..0075506a673 --- /dev/null +++ b/sync_diff_inspector/splitter/limit.go @@ -0,0 +1,259 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "context" + "database/sql" + "fmt" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/progress" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +type LimitIterator struct { + table *common.TableDiff + tagChunk *chunk.Range + queryTmpl string + + indexID int64 + + chunksCh chan *chunk.Range + errCh chan error + cancel context.CancelFunc + dbConn *sql.DB + + progressID string + columnOffset map[string]int +} + +func NewLimitIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*LimitIterator, error) { + return NewLimitIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil) +} + +func NewLimitIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*LimitIterator, error) { + indices, err := utils.GetBetterIndex(ctx, dbConn, table.Schema, table.Table, table.Info) + if err != nil { + return nil, errors.Trace(err) + } + var indexColumns []*model.ColumnInfo + var tagChunk *chunk.Range + columnOffset := make(map[string]int) + chunksCh := make(chan *chunk.Range, DefaultChannelBuffer) + errCh := make(chan error) + undone := startRange == nil + beginBucketID := 0 + var indexID int64 + for _, index := range indices { + if index == nil { + continue + } + if startRange != nil && startRange.IndexID != index.ID { + continue + } + log.Debug("Limit select index", zap.String("index", index.Name.O)) + + indexColumns = utils.GetColumnsFromIndex(index, table.Info) + + if len(indexColumns) < len(index.Columns) { + // some column in index is ignored. + log.Debug("indexColumns empty, try next index") + indexColumns = nil + continue + } + + indexID = index.ID + for i, indexColumn := range indexColumns { + columnOffset[indexColumn.Name.O] = i + } + + if startRange != nil { + tagChunk = chunk.NewChunkRange() + bounds := startRange.ChunkRange.Bounds + if len(bounds) != len(indexColumns) { + log.Warn("checkpoint node columns are not equal to selected index columns, skip checkpoint.") + break + } + + for _, bound := range bounds { + undone = undone || bound.HasUpper + tagChunk.Update(bound.Column, bound.Upper, "", bound.HasUpper, false) + } + + beginBucketID = startRange.ChunkRange.Index.BucketIndexRight + 1 + + } else { + tagChunk = chunk.NewChunkRangeOffset(columnOffset) + } + + break + } + + if indexColumns == nil { + return nil, errors.NotFoundf("not found index") + } + + chunkSize := table.ChunkSize + if chunkSize <= 0 { + cnt, err := dbutil.GetRowCount(ctx, dbConn, table.Schema, table.Table, "", nil) + if err != nil { + return nil, errors.Trace(err) + } + if len(table.Info.Indices) != 0 { + chunkSize = utils.CalculateChunkSize(cnt) + } else { + // no index + // will use table scan + // so we use one chunk + chunkSize = cnt + } + } + log.Info("get chunk size for table", zap.Int64("chunk size", chunkSize), + zap.String("db", table.Schema), zap.String("table", table.Table)) + + lctx, cancel := context.WithCancel(ctx) + queryTmpl := generateLimitQueryTemplate(indexColumns, table, chunkSize) + + limitIterator := &LimitIterator{ + table, + tagChunk, + queryTmpl, + + indexID, + + chunksCh, + errCh, + + cancel, + dbConn, + + progressID, + columnOffset, + } + + progress.StartTable(progressID, 0, false) + if !undone { + // this table is finished. + close(chunksCh) + } else { + go limitIterator.produceChunks(lctx, beginBucketID) + } + + return limitIterator, nil +} + +func (lmt *LimitIterator) Close() { + lmt.cancel() +} + +func (lmt *LimitIterator) Next() (*chunk.Range, error) { + select { + case err := <-lmt.errCh: + return nil, errors.Trace(err) + case c, ok := <-lmt.chunksCh: + if !ok && c == nil { + return nil, nil + } + return c, nil + } +} + +func (lmt *LimitIterator) GetIndexID() int64 { + return lmt.indexID +} + +func (lmt *LimitIterator) produceChunks(ctx context.Context, bucketID int) { + for { + where, args := lmt.tagChunk.ToString(lmt.table.Collation) + query := fmt.Sprintf(lmt.queryTmpl, where) + dataMap, err := lmt.getLimitRow(ctx, query, args) + if err != nil { + select { + case <-ctx.Done(): + case lmt.errCh <- errors.Trace(err): + } + return + } + + chunkRange := lmt.tagChunk + lmt.tagChunk = nil + if dataMap == nil { + // there is no row in result set + chunk.InitChunk(chunkRange, chunk.Limit, bucketID, bucketID, lmt.table.Collation, lmt.table.Range) + bucketID++ + progress.UpdateTotal(lmt.progressID, 1, true) + select { + case <-ctx.Done(): + case lmt.chunksCh <- chunkRange: + } + close(lmt.chunksCh) + return + } + + newTagChunk := chunk.NewChunkRangeOffset(lmt.columnOffset) + for column, data := range dataMap { + newTagChunk.Update(column, string(data.Data), "", !data.IsNull, false) + chunkRange.Update(column, "", string(data.Data), false, !data.IsNull) + } + + chunk.InitChunk(chunkRange, chunk.Limit, bucketID, bucketID, lmt.table.Collation, lmt.table.Range) + bucketID++ + progress.UpdateTotal(lmt.progressID, 1, false) + select { + case <-ctx.Done(): + return + case lmt.chunksCh <- chunkRange: + } + lmt.tagChunk = newTagChunk + } +} + +func (lmt *LimitIterator) getLimitRow(ctx context.Context, query string, args []interface{}) (map[string]*dbutil.ColumnData, error) { + rows, err := lmt.dbConn.QueryContext(ctx, query, args...) + if err != nil { + return nil, err + } + defer rows.Close() + if !rows.Next() { + if err := rows.Err(); err != nil { + return nil, err + } + return nil, nil + } + dataMap, err := dbutil.ScanRow(rows) + if err != nil { + return nil, err + } + return dataMap, nil +} + +func generateLimitQueryTemplate(indexColumns []*model.ColumnInfo, table *common.TableDiff, chunkSize int64) string { + fields := make([]string, 0, len(indexColumns)) + for _, columnInfo := range indexColumns { + fields = append(fields, dbutil.ColumnName(columnInfo.Name.O)) + } + columns := strings.Join(fields, ", ") + + // TODO: the limit splitter has not been used yet. + // once it is used, need to add `collation` after `ORDER BY`. + return fmt.Sprintf("SELECT %s FROM %s WHERE %%s ORDER BY %s LIMIT %d,1", columns, dbutil.TableName(table.Schema, table.Table), columns, chunkSize) +} diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go new file mode 100644 index 00000000000..470c33c6a24 --- /dev/null +++ b/sync_diff_inspector/splitter/random.go @@ -0,0 +1,248 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "context" + "database/sql" + "fmt" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/failpoint" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/progress" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "go.uber.org/zap" +) + +type RandomIterator struct { + table *common.TableDiff + chunkSize int64 + chunks []*chunk.Range + nextChunk uint + + dbConn *sql.DB +} + +func NewRandomIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*RandomIterator, error) { + return NewRandomIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil) +} + +func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*RandomIterator, error) { + // get the chunk count by data count and chunk size + var splitFieldArr []string + if len(table.Fields) != 0 { + splitFieldArr = strings.Split(table.Fields, ",") + } + + for i := range splitFieldArr { + splitFieldArr[i] = strings.TrimSpace(splitFieldArr[i]) + } + + fields, err := GetSplitFields(table.Info, splitFieldArr) + if err != nil { + return nil, errors.Trace(err) + } + + chunkRange := chunk.NewChunkRange() + beginIndex := 0 + bucketChunkCnt := 0 + chunkCnt := 0 + var chunkSize int64 = 0 + if startRange != nil { + c := startRange.GetChunk() + if c.IsLastChunkForTable() { + return &RandomIterator{ + table: table, + chunkSize: 0, + chunks: nil, + nextChunk: 0, + dbConn: dbConn, + }, nil + } + // The sequences in `chunk.Range.Bounds` should be equivalent. + for _, bound := range c.Bounds { + chunkRange.Update(bound.Column, bound.Upper, "", true, false) + } + + // Recover the chunkIndex. Let it be next to the checkpoint node. + beginIndex = c.Index.ChunkIndex + 1 + bucketChunkCnt = c.Index.ChunkCnt + // For chunk splitted by random splitter, the checkpoint chunk records the tableCnt. + chunkCnt = bucketChunkCnt - beginIndex + } else { + cnt, err := dbutil.GetRowCount(ctx, dbConn, table.Schema, table.Table, table.Range, nil) + if err != nil { + return nil, errors.Trace(err) + } + + chunkSize = table.ChunkSize + // We can use config file to fix chunkSize, + // otherwise chunkSize is 0. + if chunkSize <= 0 { + if len(table.Info.Indices) != 0 { + chunkSize = utils.CalculateChunkSize(cnt) + } else { + // no index + // will use table scan + // so we use one chunk + // plus 1 to avoid chunkSize is 0 + // while chunkCnt = (2cnt)/(cnt+1) <= 1 + chunkSize = cnt + 1 + } + } + log.Info("get chunk size for table", zap.Int64("chunk size", chunkSize), + zap.String("db", table.Schema), zap.String("table", table.Table)) + + // When cnt is 0, chunkCnt should be also 0. + // When cnt is in [1, chunkSize], chunkCnt should be 1. + chunkCnt = int((cnt + chunkSize - 1) / chunkSize) + log.Info("split range by random", zap.Int64("row count", cnt), zap.Int("split chunk num", chunkCnt)) + bucketChunkCnt = chunkCnt + } + + chunks, err := splitRangeByRandom(ctx, dbConn, chunkRange, chunkCnt, table.Schema, table.Table, fields, table.Range, table.Collation) + if err != nil { + return nil, errors.Trace(err) + } + chunk.InitChunks(chunks, chunk.Random, 0, 0, beginIndex, table.Collation, table.Range, bucketChunkCnt) + + failpoint.Inject("ignore-last-n-chunk-in-bucket", func(v failpoint.Value) { + log.Info("failpoint ignore-last-n-chunk-in-bucket injected (random splitter)", zap.Int("n", v.(int))) + if len(chunks) <= 1+v.(int) { + failpoint.Return(nil, nil) + } + chunks = chunks[:(len(chunks) - v.(int))] + }) + + progress.StartTable(progressID, len(chunks), true) + return &RandomIterator{ + table: table, + chunkSize: chunkSize, + chunks: chunks, + nextChunk: 0, + dbConn: dbConn, + }, nil + +} + +func (s *RandomIterator) Next() (*chunk.Range, error) { + if uint(len(s.chunks)) <= s.nextChunk { + return nil, nil + } + c := s.chunks[s.nextChunk] + s.nextChunk = s.nextChunk + 1 + failpoint.Inject("print-chunk-info", func() { + lowerBounds := make([]string, len(c.Bounds)) + upperBounds := make([]string, len(c.Bounds)) + for i, bound := range c.Bounds { + lowerBounds[i] = bound.Lower + upperBounds[i] = bound.Upper + } + log.Info("failpoint print-chunk-info injected (random splitter)", zap.Strings("lowerBounds", lowerBounds), zap.Strings("upperBounds", upperBounds), zap.String("indexCode", c.Index.ToString())) + }) + return c, nil +} + +func (s *RandomIterator) Close() { + +} + +// GetSplitFields returns fields to split chunks, order by pk, uk, index, columns. +func GetSplitFields(table *model.TableInfo, splitFields []string) ([]*model.ColumnInfo, error) { + colsMap := make(map[string]*model.ColumnInfo) + + splitCols := make([]*model.ColumnInfo, 0, 2) + for _, splitField := range splitFields { + col := dbutil.FindColumnByName(table.Columns, splitField) + if col == nil { + return nil, errors.NotFoundf("column %s in table %s", splitField, table.Name) + + } + splitCols = append(splitCols, col) + } + + if len(splitCols) != 0 { + return splitCols, nil + } + + for _, col := range table.Columns { + colsMap[col.Name.O] = col + } + indices := dbutil.FindAllIndex(table) + if len(indices) != 0 { + NEXTINDEX: + for _, idx := range indices { + cols := make([]*model.ColumnInfo, 0, len(table.Columns)) + for _, icol := range idx.Columns { + col := colsMap[icol.Name.O] + if col.Hidden { + continue NEXTINDEX + } + cols = append(cols, col) + } + return cols, nil + } + } + + for _, col := range table.Columns { + if !col.Hidden { + return []*model.ColumnInfo{col}, nil + } + } + return nil, errors.NotFoundf("not found column") +} + +// splitRangeByRandom splits a chunk to multiple chunks by random +// Notice: If the `count <= 1`, it will skip splitting and return `chunk` as a slice directly. +func splitRangeByRandom(ctx context.Context, db *sql.DB, chunk *chunk.Range, count int, schema string, table string, columns []*model.ColumnInfo, limits, collation string) (chunks []*chunk.Range, err error) { + if count <= 1 { + chunks = append(chunks, chunk) + return chunks, nil + } + + chunkLimits, args := chunk.ToString(collation) + limitRange := fmt.Sprintf("(%s) AND (%s)", chunkLimits, limits) + + randomValues, err := utils.GetRandomValues(ctx, db, schema, table, columns, count-1, limitRange, args, collation) + if err != nil { + return nil, errors.Trace(err) + } + log.Debug("get split values by random", zap.Stringer("chunk", chunk), zap.Int("random values num", len(randomValues))) + for i := 0; i <= len(randomValues); i++ { + newChunk := chunk.Copy() + + for j, column := range columns { + if i == 0 { + if len(randomValues) == 0 { + // randomValues is empty, so chunks will append chunk itself. + break + } + newChunk.Update(column.Name.O, "", randomValues[i][j], false, true) + } else if i == len(randomValues) { + newChunk.Update(column.Name.O, randomValues[i-1][j], "", true, false) + } else { + newChunk.Update(column.Name.O, randomValues[i-1][j], randomValues[i][j], true, true) + } + } + chunks = append(chunks, newChunk) + } + log.Debug("split range by random", zap.Stringer("origin chunk", chunk), zap.Int("split num", len(chunks))) + return chunks, nil +} diff --git a/sync_diff_inspector/splitter/splitter.go b/sync_diff_inspector/splitter/splitter.go new file mode 100644 index 00000000000..d2a43ed1ce2 --- /dev/null +++ b/sync_diff_inspector/splitter/splitter.go @@ -0,0 +1,87 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "fmt" + + "github.com/pingcap/tiflow/sync_diff_inspector/checkpoints" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" +) + +const ( + SplitThreshold = 1000 +) + +// ChunkIterator generate next chunk for only one table lazily. +type ChunkIterator interface { + // Next seeks the next chunk, return nil if seeks to end. + Next() (*chunk.Range, error) + Close() +} + +// RangeInfo represents the unit of a process chunk. +// It's the only entrance of checkpoint. +type RangeInfo struct { + ChunkRange *chunk.Range `json:"chunk-range"` + // for bucket checkpoint + IndexID int64 `json:"index-id"` + + ProgressID string `json:"progress-id"` +} + +// GetTableIndex return the index of table diffs. +// IMPORTANT!!! +// We need to keep the tables order during checkpoint. +// So we should have to save the config info to checkpoint file too +func (r *RangeInfo) GetTableIndex() int { return r.ChunkRange.Index.TableIndex } + +func (r *RangeInfo) GetBucketIndexLeft() int { return r.ChunkRange.Index.BucketIndexLeft } + +func (r *RangeInfo) GetBucketIndexRight() int { return r.ChunkRange.Index.BucketIndexRight } + +func (r *RangeInfo) GetChunkIndex() int { return r.ChunkRange.Index.ChunkIndex } + +func (r *RangeInfo) GetChunk() *chunk.Range { + return r.ChunkRange +} + +func (r *RangeInfo) Copy() *RangeInfo { + return &RangeInfo{ + ChunkRange: r.ChunkRange.Clone(), + IndexID: r.IndexID, + ProgressID: r.ProgressID, + } +} + +func (r *RangeInfo) Update(column, lower, upper string, updateLower, updateUpper bool, collation, limits string) { + r.ChunkRange.Update(column, lower, upper, updateLower, updateUpper) + conditions, args := r.ChunkRange.ToString(collation) + r.ChunkRange.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits) + r.ChunkRange.Args = args +} + +func (r *RangeInfo) ToNode() *checkpoints.Node { + return &checkpoints.Node{ + ChunkRange: r.ChunkRange, + IndexID: r.IndexID, + } +} + +func FromNode(n *checkpoints.Node) *RangeInfo { + return &RangeInfo{ + ChunkRange: n.ChunkRange, + IndexID: n.IndexID, + } +} diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go new file mode 100644 index 00000000000..3f641d100ee --- /dev/null +++ b/sync_diff_inspector/splitter/splitter_test.go @@ -0,0 +1,936 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package splitter + +import ( + "context" + "database/sql/driver" + "fmt" + "sort" + "strconv" + "testing" + + sqlmock "github.com/DATA-DOG/go-sqlmock" + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/pingcap/tiflow/sync_diff_inspector/source/common" + "github.com/pingcap/tiflow/sync_diff_inspector/utils" + "github.com/stretchr/testify/require" +) + +type chunkResult struct { + chunkStr string + args []interface{} +} + +func TestSplitRangeByRandom(t *testing.T) { + db, mock, err := sqlmock.New() + require.NoError(t, err) + + testCases := []struct { + createTableSQL string + splitCount int + originChunk *chunk.Range + randomValues [][]string + expectResult []chunkResult + }{ + { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))", + 3, + chunk.NewChunkRange().CopyAndUpdate("a", "0", "10", true, true).CopyAndUpdate("b", "a", "z", true, true), + [][]string{ + {"5", "7"}, + {"g", "n"}, + }, + []chunkResult{ + { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"0", "0", "a", "5", "5", "g"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"5", "5", "g", "7", "7", "n"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"7", "7", "n", "10", "10", "z"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`, `a`))", + 3, + chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true).CopyAndUpdate("a", "0", "10", true, true), + [][]string{ + {"g", "n"}, + {"5", "7"}, + }, + []chunkResult{ + { + "((`b` > ?) OR (`b` = ? AND `a` > ?)) AND ((`b` < ?) OR (`b` = ? AND `a` <= ?))", + []interface{}{"a", "a", "0", "g", "g", "5"}, + }, { + "((`b` > ?) OR (`b` = ? AND `a` > ?)) AND ((`b` < ?) OR (`b` = ? AND `a` <= ?))", + []interface{}{"g", "g", "5", "n", "n", "7"}, + }, { + "((`b` > ?) OR (`b` = ? AND `a` > ?)) AND ((`b` < ?) OR (`b` = ? AND `a` <= ?))", + []interface{}{"n", "n", "7", "z", "z", "10"}, + }, + }, + }, + { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))", + 3, + chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true), + [][]string{ + {"g", "n"}, + }, + []chunkResult{ + { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"a", "g"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"g", "n"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"n", "z"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))", + 2, + chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true), + [][]string{ + {"g"}, + }, + []chunkResult{ + { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"a", "g"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"g", "z"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))", + 3, + chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true), + [][]string{ + {}, + }, + []chunkResult{ + { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"a", "z"}, + }, + }, + }, + } + + for _, testCase := range testCases { + tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New()) + require.NoError(t, err) + + splitCols, err := GetSplitFields(tableInfo, nil) + require.NoError(t, err) + createFakeResultForRandomSplit(mock, 0, testCase.randomValues) + chunks, err := splitRangeByRandom(context.Background(), db, testCase.originChunk, testCase.splitCount, "test", "test", splitCols, "", "") + require.NoError(t, err) + for j, chunk := range chunks { + chunkStr, args := chunk.ToString("") + require.Equal(t, chunkStr, testCase.expectResult[j].chunkStr) + require.Equal(t, args, testCase.expectResult[j].args) + } + } +} + +func TestRandomSpliter(t *testing.T) { + ctx := context.Background() + db, mock, err := sqlmock.New() + require.NoError(t, err) + + testCases := []struct { + createTableSQL string + count int + fields string + IgnoreColumns []string + randomValues [][]string + expectResult []chunkResult + }{ + { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))", + 10, + "", + nil, + [][]string{ + {"1", "2", "3", "4", "5"}, + {"a", "b", "c", "d", "e"}, + }, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"1", "1", "a"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"1", "1", "a", "2", "2", "b"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"2", "2", "b", "3", "3", "c"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"3", "3", "c", "4", "4", "d"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"4", "4", "d", "5", "5", "e"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"5", "5", "e"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))", + 10, + "", + nil, + [][]string{ + {"a", "b", "c", "d", "e"}, + }, + []chunkResult{ + { + "(`b` <= ?)", + []interface{}{"a"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"a", "b"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"b", "c"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"c", "d"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"d", "e"}, + }, { + "(`b` > ?)", + []interface{}{"e"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float)", + 10, + "b,c", + nil, + [][]string{ + {"a", "b", "c", "d", "e"}, + {"1.1", "2.2", "3.3", "4.4", "5.5"}, + }, + []chunkResult{ + { + "(`b` < ?) OR (`b` = ? AND `c` <= ?)", + []interface{}{"a", "a", "1.1"}, + }, { + "((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))", + []interface{}{"a", "a", "1.1", "b", "b", "2.2"}, + }, { + "((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))", + []interface{}{"b", "b", "2.2", "c", "c", "3.3"}, + }, { + "((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))", + []interface{}{"c", "c", "3.3", "d", "d", "4.4"}, + }, { + "((`b` > ?) OR (`b` = ? AND `c` > ?)) AND ((`b` < ?) OR (`b` = ? AND `c` <= ?))", + []interface{}{"d", "d", "4.4", "e", "e", "5.5"}, + }, { + "(`b` > ?) OR (`b` = ? AND `c` > ?)", + []interface{}{"e", "e", "5.5"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float)", + 10, + "", + []string{"a"}, + [][]string{ + {"a", "b", "c", "d", "e"}, + }, + []chunkResult{ + { + "(`b` <= ?)", + []interface{}{"a"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"a", "b"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"b", "c"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"c", "d"}, + }, { + "((`b` > ?)) AND ((`b` <= ?))", + []interface{}{"d", "e"}, + }, { + "(`b` > ?)", + []interface{}{"e"}, + }, + }, + }, { + "create table `test`.`test`(`a` int, `b` varchar(10), `c` float)", + 10, + "", + nil, + [][]string{ + {"1", "2", "3", "4", "5"}, + }, + []chunkResult{ + { + "(`a` <= ?)", + []interface{}{"1"}, + }, { + "((`a` > ?)) AND ((`a` <= ?))", + []interface{}{"1", "2"}, + }, { + "((`a` > ?)) AND ((`a` <= ?))", + []interface{}{"2", "3"}, + }, { + "((`a` > ?)) AND ((`a` <= ?))", + []interface{}{"3", "4"}, + }, { + "((`a` > ?)) AND ((`a` <= ?))", + []interface{}{"4", "5"}, + }, { + "(`a` > ?)", + []interface{}{"5"}, + }, + }, + }, + } + + for _, testCase := range testCases { + tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New()) + require.NoError(t, err) + + info, needUnifiedTimeStamp := utils.ResetColumns(tableInfo, testCase.IgnoreColumns) + tableDiff := &common.TableDiff{ + Schema: "test", + Table: "test", + Info: info, + IgnoreColumns: testCase.IgnoreColumns, + NeedUnifiedTimeZone: needUnifiedTimeStamp, + Fields: testCase.fields, + ChunkSize: 5, + } + + createFakeResultForRandomSplit(mock, testCase.count, testCase.randomValues) + + iter, err := NewRandomIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + + j := 0 + for { + chunk, err := iter.Next() + require.NoError(t, err) + if chunk == nil { + break + } + chunkStr, args := chunk.ToString("") + require.Equal(t, chunkStr, testCase.expectResult[j].chunkStr) + require.Equal(t, args, testCase.expectResult[j].args) + j = j + 1 + } + } + + // Test Checkpoint + stopJ := 3 + tableInfo, err := dbutil.GetTableInfoBySQL(testCases[0].createTableSQL, parser.New()) + require.NoError(t, err) + + tableDiff := &common.TableDiff{ + Schema: "test", + Table: "test", + Info: tableInfo, + //IgnoreColumns: []string{"c"}, + //Fields: "a,b", + ChunkSize: 5, + } + + createFakeResultForRandomSplit(mock, testCases[0].count, testCases[0].randomValues) + + iter, err := NewRandomIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + + var chunk *chunk.Range + for j := 0; j < stopJ; j++ { + chunk, err = iter.Next() + require.NoError(t, err) + } + + bounds1 := chunk.Bounds + chunkID1 := chunk.Index + + rangeInfo := &RangeInfo{ + ChunkRange: chunk, + } + + createFakeResultForRandomSplit(mock, testCases[0].count, testCases[0].randomValues) + + iter, err = NewRandomIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo) + require.NoError(t, err) + + chunk, err = iter.Next() + require.NoError(t, err) + + for i, bound := range chunk.Bounds { + require.Equal(t, bounds1[i].Upper, bound.Lower) + } + + require.Equal(t, chunk.Index.ChunkCnt, chunkID1.ChunkCnt) + require.Equal(t, chunk.Index.ChunkIndex, chunkID1.ChunkIndex+1) + +} + +func createFakeResultForRandomSplit(mock sqlmock.Sqlmock, count int, randomValues [][]string) { + createFakeResultForCount(mock, count) + if randomValues == nil { + return + } + // generate fake result for get random value for column a + columns := []string{"a", "b", "c", "d", "e", "f"} + rowsNames := make([]string, 0, len(randomValues)) + for i := 0; i < len(randomValues); i++ { + rowsNames = append(rowsNames, columns[i]) + } + randomRows := sqlmock.NewRows(rowsNames) + for i := 0; i < len(randomValues[0]); i++ { + row := make([]driver.Value, 0, len(randomValues)) + for j := 0; j < len(randomValues); j++ { + row = append(row, randomValues[j][i]) + } + randomRows.AddRow(row...) + } + mock.ExpectQuery("ORDER BY rand_value").WillReturnRows(randomRows) + +} + +func TestBucketSpliter(t *testing.T) { + ctx := context.Background() + db, mock, err := sqlmock.New() + require.NoError(t, err) + + createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + testCases := []struct { + chunkSize int64 + aRandomValues []interface{} + bRandomValues []interface{} + expectResult []chunkResult + }{ + { + // chunk size less than the count of bucket 64, and the bucket's count 64 >= 32, so will split by random in every bucket + 32, + []interface{}{32, 32 * 3, 32 * 5, 32 * 7, 32 * 9}, + []interface{}{6, 6 * 3, 6 * 5, 6 * 7, 6 * 9}, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"32", "32", "6"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"32", "32", "6", "63", "63", "11"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"63", "63", "11", "96", "96", "18"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"96", "96", "18", "127", "127", "23"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"127", "127", "23", "160", "160", "30"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"160", "160", "30", "191", "191", "35"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"191", "191", "35", "224", "224", "42"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"224", "224", "42", "255", "255", "47"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"255", "255", "47", "288", "288", "54"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"288", "288", "54", "319", "319", "59"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"319", "319", "59"}, + }, + }, + }, { + // chunk size less than the count of bucket 64, but 64 is less than 2*50, so will not split every bucket + 50, + nil, + nil, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"63", "63", "11"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"63", "63", "11", "127", "127", "23"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"127", "127", "23", "191", "191", "35"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"191", "191", "35", "255", "255", "47"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"255", "255", "47", "319", "319", "59"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"319", "319", "59"}, + }, + }, + }, { + // chunk size is equal to the count of bucket 64, so every becket will generate a chunk + 64, + nil, + nil, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"63", "63", "11"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"63", "63", "11", "127", "127", "23"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"127", "127", "23", "191", "191", "35"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"191", "191", "35", "255", "255", "47"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"255", "255", "47", "319", "319", "59"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"319", "319", "59"}, + }, + }, + }, { + // chunk size is greater than the count of bucket 64, will combine two bucket into chunk + 127, + nil, + nil, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"127", "127", "23"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"127", "127", "23", "255", "255", "47"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"255", "255", "47"}, + }, + }, + }, { + // chunk size is equal to the double count of bucket 64, will combine two bucket into one chunk + 128, + nil, + nil, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"127", "127", "23"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"127", "127", "23", "255", "255", "47"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"255", "255", "47"}, + }, + }, + }, { + // chunk size is greater than the double count of bucket 64, will combine three bucket into one chunk + 129, + nil, + nil, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"191", "191", "35"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"191", "191", "35"}, + }, + }, + }, { + // chunk size is greater than the total count, only generate one chunk + 400, + nil, + nil, + []chunkResult{ + { + "TRUE", + nil, + }, + }, + }, + } + + tableDiff := &common.TableDiff{ + Schema: "test", + Table: "test", + Info: tableInfo, + } + + for i, testCase := range testCases { + fmt.Printf("%d", i) + createFakeResultForBucketSplit(mock, testCase.aRandomValues, testCase.bRandomValues) + tableDiff.ChunkSize = testCase.chunkSize + iter, err := NewBucketIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + + obtainChunks := make([]chunkResult, 0, len(testCase.expectResult)) + nextBeginBucket := 0 + for { + chunk, err := iter.Next() + require.NoError(t, err) + if chunk == nil { + break + } + chunkStr, _ := chunk.ToString("") + if nextBeginBucket == 0 { + require.Equal(t, chunk.Index.BucketIndexLeft, 0) + } else { + require.Equal(t, chunk.Index.BucketIndexLeft, nextBeginBucket) + } + if chunk.Index.ChunkIndex+1 == chunk.Index.ChunkCnt { + nextBeginBucket = chunk.Index.BucketIndexRight + 1 + } + obtainChunks = append(obtainChunks, chunkResult{chunkStr, chunk.Args}) + + } + sort.Slice(obtainChunks, func(i, j int) bool { + totalIndex := len(obtainChunks[i].args) + if totalIndex > len(obtainChunks[j].args) { + totalIndex = len(obtainChunks[j].args) + } + for index := 0; index < totalIndex; index++ { + a1, _ := strconv.Atoi(obtainChunks[i].args[index].(string)) + a2, _ := strconv.Atoi(obtainChunks[j].args[index].(string)) + if a1 < a2 { + return true + } else if a1 > a2 { + return false + } + } + if len(obtainChunks[i].args) == len(obtainChunks[j].args) { + // hack way for test case 6 + return len(obtainChunks[i].chunkStr) > len(obtainChunks[j].chunkStr) + } + return len(obtainChunks[i].args) < len(obtainChunks[j].args) + }) + // we expect chunk count is same after we generate chunk concurrently + require.Equal(t, len(obtainChunks), len(testCase.expectResult)) + for i, e := range testCase.expectResult { + require.Equal(t, obtainChunks[i].args, e.args) + require.Equal(t, obtainChunks[i].chunkStr, e.chunkStr) + } + } + + // Test Checkpoint + stopJ := 3 + createFakeResultForBucketSplit(mock, testCases[0].aRandomValues, testCases[0].bRandomValues) + tableDiff.ChunkSize = testCases[0].chunkSize + iter, err := NewBucketIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + j := 0 + var chunk *chunk.Range + for ; j < stopJ; j++ { + chunk, err = iter.Next() + require.NoError(t, err) + } + for { + c, err := iter.Next() + require.NoError(t, err) + if c == nil { + break + } + } + bounds1 := chunk.Bounds + + rangeInfo := &RangeInfo{ + ChunkRange: chunk, + IndexID: iter.GetIndexID(), + } + + // drop the origin db since we cannot ensure order of mock string after we concurrent produce chunks. + db, mock, err = sqlmock.New() + require.NoError(t, err) + createFakeResultForBucketSplit(mock, nil, nil) + createFakeResultForCount(mock, 64) + createFakeResultForRandom(mock, testCases[0].aRandomValues[stopJ:], testCases[0].bRandomValues[stopJ:]) + iter, err = NewBucketIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo, utils.NewWorkerPool(1, "bucketIter")) + require.NoError(t, err) + chunk, err = iter.Next() + require.NoError(t, err) + + for i, bound := range chunk.Bounds { + require.Equal(t, bounds1[i].Upper, bound.Lower) + } +} + +func createFakeResultForBucketSplit(mock sqlmock.Sqlmock, aRandomValues, bRandomValues []interface{}) { + /* + +---------+------------+-------------+----------+-----------+-------+---------+-------------+-------------+ + | Db_name | Table_name | Column_name | Is_index | Bucket_id | Count | Repeats | Lower_Bound | Upper_Bound | + +---------+------------+-------------+----------+-----------+-------+---------+-------------+-------------+ + | test | test | PRIMARY | 1 | 0 | 64 | 1 | (0, 0) | (63, 11) | + | test | test | PRIMARY | 1 | 1 | 128 | 1 | (64, 12) | (127, 23) | + | test | test | PRIMARY | 1 | 2 | 192 | 1 | (128, 24) | (191, 35) | + | test | test | PRIMARY | 1 | 3 | 256 | 1 | (192, 36) | (255, 47) | + | test | test | PRIMARY | 1 | 4 | 320 | 1 | (256, 48) | (319, 59) | + +---------+------------+-------------+----------+-----------+-------+---------+-------------+-------------+ + */ + + statsRows := sqlmock.NewRows([]string{"Db_name", "Table_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound"}) + for i := 0; i < 5; i++ { + statsRows.AddRow("test", "test", "PRIMARY", 1, (i+1)*64, (i+1)*64, 1, fmt.Sprintf("(%d, %d)", i*64, i*12), fmt.Sprintf("(%d, %d)", (i+1)*64-1, (i+1)*12-1)) + } + mock.ExpectQuery("SHOW STATS_BUCKETS").WillReturnRows(statsRows) + + createFakeResultForRandom(mock, aRandomValues, bRandomValues) +} + +func createFakeResultForCount(mock sqlmock.Sqlmock, count int) { + if count > 0 { + // generate fake result for get the row count of this table + countRows := sqlmock.NewRows([]string{"cnt"}).AddRow(count) + mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows) + } +} + +func createFakeResultForRandom(mock sqlmock.Sqlmock, aRandomValues, bRandomValues []interface{}) { + for i := 0; i < len(aRandomValues); i++ { + aRandomRows := sqlmock.NewRows([]string{"a", "b"}) + aRandomRows.AddRow(aRandomValues[i], bRandomValues[i]) + mock.ExpectQuery("ORDER BY rand_value").WillReturnRows(aRandomRows) + } +} + +func TestLimitSpliter(t *testing.T) { + ctx := context.Background() + db, mock, err := sqlmock.New() + require.NoError(t, err) + + createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + testCases := []struct { + limitAValues []string + limitBValues []string + expectResult []chunkResult + }{ + { + []string{"1000", "2000", "3000", "4000"}, + []string{"a", "b", "c", "d"}, + []chunkResult{ + { + "(`a` < ?) OR (`a` = ? AND `b` <= ?)", + []interface{}{"1000", "1000", "a"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"1000", "1000", "a", "2000", "2000", "b"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"2000", "2000", "b", "3000", "3000", "c"}, + }, { + "((`a` > ?) OR (`a` = ? AND `b` > ?)) AND ((`a` < ?) OR (`a` = ? AND `b` <= ?))", + []interface{}{"3000", "3000", "c", "4000", "4000", "d"}, + }, { + "(`a` > ?) OR (`a` = ? AND `b` > ?)", + []interface{}{"4000", "4000", "d"}, + }, + }, + }, + } + + tableDiff := &common.TableDiff{ + Schema: "test", + Table: "test", + Info: tableInfo, + ChunkSize: 1000, + } + + for _, testCase := range testCases { + createFakeResultForLimitSplit(mock, testCase.limitAValues, testCase.limitBValues, true) + + iter, err := NewLimitIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + + j := 0 + for { + chunk, err := iter.Next() + require.NoError(t, err) + if chunk == nil { + break + } + chunkStr, args := chunk.ToString("") + require.Equal(t, chunkStr, testCase.expectResult[j].chunkStr) + require.Equal(t, args, testCase.expectResult[j].args) + j = j + 1 + } + } + + // Test Checkpoint + stopJ := 2 + createFakeResultForLimitSplit(mock, testCases[0].limitAValues[:stopJ], testCases[0].limitBValues[:stopJ], true) + iter, err := NewLimitIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + j := 0 + var chunk *chunk.Range + for ; j < stopJ; j++ { + chunk, err = iter.Next() + require.NoError(t, err) + } + bounds1 := chunk.Bounds + + rangeInfo := &RangeInfo{ + ChunkRange: chunk, + IndexID: iter.GetIndexID(), + } + + createFakeResultForLimitSplit(mock, testCases[0].limitAValues[stopJ:], testCases[0].limitBValues[stopJ:], true) + iter, err = NewLimitIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo) + require.NoError(t, err) + chunk, err = iter.Next() + require.NoError(t, err) + + for i, bound := range chunk.Bounds { + require.Equal(t, bounds1[i].Upper, bound.Lower) + } +} + +func createFakeResultForLimitSplit(mock sqlmock.Sqlmock, aValues []string, bValues []string, needEnd bool) { + for i, a := range aValues { + limitRows := sqlmock.NewRows([]string{"a", "b"}) + limitRows.AddRow(a, bValues[i]) + mock.ExpectQuery("SELECT `a`,.*").WillReturnRows(limitRows) + } + + if needEnd { + mock.ExpectQuery("SELECT `a`,.*").WillReturnRows(sqlmock.NewRows([]string{"a", "b"})) + } +} + +func TestRangeInfo(t *testing.T) { + rangeInfo := &RangeInfo{ + ChunkRange: chunk.NewChunkRange(), + IndexID: 2, + ProgressID: "324312", + } + rangeInfo.Update("a", "1", "2", true, true, "[23]", "[sdg]") + rangeInfo.ChunkRange.Index.TableIndex = 1 + chunkRange := rangeInfo.GetChunk() + require.Equal(t, chunkRange.Where, "((((`a` COLLATE '[23]' > ?)) AND ((`a` COLLATE '[23]' <= ?))) AND ([sdg]))") + require.Equal(t, chunkRange.Args, []interface{}{"1", "2"}) + + require.Equal(t, rangeInfo.GetTableIndex(), 1) + + rangeInfo2 := FromNode(rangeInfo.ToNode()) + + chunkRange = rangeInfo2.GetChunk() + require.Equal(t, chunkRange.Where, "((((`a` COLLATE '[23]' > ?)) AND ((`a` COLLATE '[23]' <= ?))) AND ([sdg]))") + require.Equal(t, chunkRange.Args, []interface{}{"1", "2"}) + + require.Equal(t, rangeInfo2.GetTableIndex(), 1) + +} + +func TestChunkSize(t *testing.T) { + ctx := context.Background() + db, mock, err := sqlmock.New() + require.NoError(t, err) + + createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + tableDiff := &common.TableDiff{ + Schema: "test", + Table: "test", + Info: tableInfo, + ChunkSize: 0, + } + + // test bucket splitter chunksize + statsRows := sqlmock.NewRows([]string{"Db_name", "Table_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound"}) + // Notice, use wrong Bound to kill bucket producer + statsRows.AddRow("test", "test", "PRIMARY", 1, 0, 1000000000, 1, "(1, 2, wrong!)", "(2, 3, wrong!)") + mock.ExpectQuery("SHOW STATS_BUCKETS").WillReturnRows(statsRows) + + bucketIter, err := NewBucketIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + require.Equal(t, bucketIter.chunkSize, int64(100000)) + + createFakeResultForBucketSplit(mock, nil, nil) + bucketIter, err = NewBucketIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + require.Equal(t, bucketIter.chunkSize, int64(50000)) + + // test random splitter chunksize + // chunkNum is only 1, so don't need randomValues + createFakeResultForRandomSplit(mock, 1000, nil) + randomIter, err := NewRandomIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + require.Equal(t, randomIter.chunkSize, int64(50000)) + + createFakeResultForRandomSplit(mock, 1000000000, [][]string{ + {"1", "2", "3", "4", "5"}, + {"a", "b", "c", "d", "e"}, + }) + randomIter, err = NewRandomIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + require.Equal(t, randomIter.chunkSize, int64(100000)) + + createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime)" + tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + tableDiff_noindex := &common.TableDiff{ + Schema: "test", + Table: "test", + Info: tableInfo, + ChunkSize: 0, + } + // no index + createFakeResultForRandomSplit(mock, 1000, nil) + randomIter, err = NewRandomIterator(ctx, "", tableDiff_noindex, db) + require.NoError(t, err) + require.Equal(t, randomIter.chunkSize, int64(1001)) + + // test limit splitter chunksize + createFakeResultForCount(mock, 1000) + mock.ExpectQuery("SELECT `a`,.*limit 50000.*").WillReturnRows(sqlmock.NewRows([]string{"a", "b"})) + _, err = NewLimitIterator(ctx, "", tableDiff, db) + require.NoError(t, err) + +} diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go new file mode 100644 index 00000000000..b9604f81129 --- /dev/null +++ b/sync_diff_inspector/utils/pd.go @@ -0,0 +1,288 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "database/sql" + "fmt" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/coreos/go-semver/semver" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/util/dbutil" + pd "github.com/tikv/pd/client" + clientv3 "go.etcd.io/etcd/client/v3" + "go.uber.org/zap" +) + +const ( + tidbServerInformationPath = "/tidb/server/info" + defaultEtcdDialTimeOut = 3 * time.Second + + defaultGCSafePointTTL = 5 * 60 +) + +var ( + tidbVersionRegex = regexp.MustCompile(`-[v]?\d+\.\d+\.\d+([0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*)?`) + autoGCSafePointVersion = semver.New("4.0.0") +) + +func getPDDDLIDs(pCtx context.Context, cli *clientv3.Client) ([]string, error) { + ctx, cancel := context.WithTimeout(pCtx, 10*time.Second) + defer cancel() + + resp, err := cli.Get(ctx, tidbServerInformationPath, clientv3.WithPrefix()) + if err != nil { + return nil, errors.Trace(err) + } + pdDDLIds := make([]string, len(resp.Kvs)) + for i, kv := range resp.Kvs { + items := strings.Split(string(kv.Key), "/") + pdDDLIds[i] = items[len(items)-1] + } + return pdDDLIds, nil +} + +// getTiDBDDLIDs gets DDL IDs from TiDB +func getTiDBDDLIDs(ctx context.Context, db *sql.DB) ([]string, error) { + query := "SELECT * FROM information_schema.tidb_servers_info;" + rows, err := db.QueryContext(ctx, query) + if err != nil { + return []string{}, errors.Annotatef(err, "sql: %s", query) + } + return GetSpecifiedColumnValueAndClose(rows, "DDL_ID") +} + +func checkSameCluster(ctx context.Context, db *sql.DB, pdAddrs []string) (bool, error) { + cli, err := clientv3.New(clientv3.Config{ + Endpoints: pdAddrs, + DialTimeout: defaultEtcdDialTimeOut, + }) + if err != nil { + return false, errors.Trace(err) + } + tidbDDLIDs, err := getTiDBDDLIDs(ctx, db) + if err != nil { + return false, err + } + pdDDLIDs, err := getPDDDLIDs(ctx, cli) + if err != nil { + return false, err + } + sort.Strings(tidbDDLIDs) + sort.Strings(pdDDLIDs) + + return sameStringArray(tidbDDLIDs, pdDDLIDs), nil +} + +func sameStringArray(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +// GetPDClientForGC is an initialization step. +func GetPDClientForGC(ctx context.Context, db *sql.DB) (pd.Client, error) { + if ok, _ := dbutil.IsTiDB(ctx, db); ok { + pdAddrs, err := GetPDAddrs(ctx, db) + if err != nil { + return nil, err + } + if len(pdAddrs) > 0 { + if same, err := checkSameCluster(ctx, db, pdAddrs); err != nil { + log.Info("[automatically GC] check whether fetched pd addr and TiDB belong to one cluster failed", zap.Strings("pd address", pdAddrs), zap.Error(err)) + } else if same { + pdClient, err := pd.NewClientWithContext(ctx, pdAddrs, pd.SecurityOption{}) + if err != nil { + log.Info("[automatically GC] create pd client to control GC failed", zap.Strings("pd address", pdAddrs), zap.Error(err)) + return nil, err + } + return pdClient, nil + } + } + } + return nil, nil +} + +// GetPDAddrs gets PD address from TiDB +func GetPDAddrs(ctx context.Context, db *sql.DB) ([]string, error) { + query := "SELECT * FROM information_schema.cluster_info where type = 'pd';" + rows, err := db.QueryContext(ctx, query) + if err != nil { + return []string{}, errors.Annotatef(err, "sql: %s", query) + } + return GetSpecifiedColumnValueAndClose(rows, "STATUS_ADDRESS") +} + +// GetSpecifiedColumnValueAndClose get columns' values whose name is equal to columnName and close the given rows +func GetSpecifiedColumnValueAndClose(rows *sql.Rows, columnName string) ([]string, error) { + if rows == nil { + return []string{}, nil + } + defer rows.Close() + columnName = strings.ToUpper(columnName) + var strs []string + columns, _ := rows.Columns() + addr := make([]interface{}, len(columns)) + oneRow := make([]sql.NullString, len(columns)) + fieldIndex := -1 + for i, col := range columns { + if strings.ToUpper(col) == columnName { + fieldIndex = i + } + addr[i] = &oneRow[i] + } + if fieldIndex == -1 { + return strs, nil + } + for rows.Next() { + err := rows.Scan(addr...) + if err != nil { + return strs, errors.Trace(err) + } + if oneRow[fieldIndex].Valid { + strs = append(strs, oneRow[fieldIndex].String) + } + } + return strs, errors.Trace(rows.Err()) +} + +// parse versino string to semver.Version +func parseVersion(versionStr string) (*semver.Version, error) { + versionStr = tidbVersionRegex.FindString(versionStr)[1:] + versionStr = strings.TrimPrefix(versionStr, "v") + return semver.NewVersion(versionStr) +} + +// It's OK to failed to get db version +func TryToGetVersion(ctx context.Context, db *sql.DB) *semver.Version { + versionStr, err := dbutil.GetDBVersion(ctx, db) + if err != nil { + return nil + } + if !strings.Contains(strings.ToLower(versionStr), "tidb") { + return nil + } + version, err := parseVersion(versionStr) + if err != nil { + // It's OK when parse version failed + version = nil + } + return version +} + +// StartGCSavepointUpdateService keeps GC safePoint stop moving forward. +func StartGCSavepointUpdateService(ctx context.Context, pdCli pd.Client, db *sql.DB, snapshot string) error { + versionStr, err := selectVersion(db) + if err != nil { + log.Info("detect version of tidb failed") + return nil + } + tidbVersion, err := parseVersion(versionStr) + if err != nil { + log.Info("parse version of tidb failed") + return nil + } + // get latest snapshot + snapshotTS, err := parseSnapshotToTSO(db, snapshot) + if tidbVersion.Compare(*autoGCSafePointVersion) > 0 { + log.Info("tidb support auto gc safepoint", zap.Stringer("version", tidbVersion)) + if err != nil { + return err + } + go updateServiceSafePoint(ctx, pdCli, snapshotTS) + } else { + log.Info("tidb doesn't support auto gc safepoint", zap.Stringer("version", tidbVersion)) + } + return nil +} + +func updateServiceSafePoint(ctx context.Context, pdClient pd.Client, snapshotTS uint64) { + updateInterval := time.Duration(defaultGCSafePointTTL/2) * time.Second + tick := time.NewTicker(updateInterval) + DiffServiceSafePointID := fmt.Sprintf("Sync_diff_%d", time.Now().UnixNano()) + log.Info("generate dumpling gc safePoint id", zap.String("id", DiffServiceSafePointID)) + for { + log.Debug("update PD safePoint limit with ttl", + zap.Uint64("safePoint", snapshotTS), + zap.Duration("updateInterval", updateInterval)) + for retryCnt := 0; retryCnt <= 10; retryCnt++ { + _, err := pdClient.UpdateServiceGCSafePoint(ctx, DiffServiceSafePointID, defaultGCSafePointTTL, snapshotTS) + if err == nil { + break + } + log.Debug("update PD safePoint failed", zap.Error(err), zap.Int("retryTime", retryCnt)) + select { + case <-ctx.Done(): + return + case <-time.After(time.Second): + } + } + select { + case <-ctx.Done(): + return + case <-tick.C: + } + } +} + +func parseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) { + snapshotTS, err := strconv.ParseUint(snapshot, 10, 64) + if err == nil { + return snapshotTS, nil + } + var tso sql.NullInt64 + query := "SELECT unix_timestamp(?)" + row := pool.QueryRow(query, snapshot) + err = row.Scan(&tso) + if err != nil { + return 0, errors.Annotatef(err, "sql: %s", strings.ReplaceAll(query, "?", fmt.Sprintf(`"%s"`, snapshot))) + } + if !tso.Valid { + return 0, errors.Errorf("snapshot %s format not supported. please use tso or '2006-01-02 15:04:05' format time", snapshot) + } + return uint64(tso.Int64*1000) << 18, nil +} + +func GetSnapshot(ctx context.Context, db *sql.DB) ([]string, error) { + query := "SHOW MASTER STATUS;" + rows, err := db.QueryContext(ctx, query) + if err != nil { + return []string{}, errors.Annotatef(err, "sql: %s", query) + } + return GetSpecifiedColumnValueAndClose(rows, "Position") +} + +func selectVersion(db *sql.DB) (string, error) { + var versionInfo string + const query = "SELECT version()" + row := db.QueryRow(query) + err := row.Scan(&versionInfo) + if err != nil { + return "", errors.Annotatef(err, "sql: %s", query) + } + return versionInfo, nil +} diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go new file mode 100644 index 00000000000..1c99dcb5d74 --- /dev/null +++ b/sync_diff_inspector/utils/table.go @@ -0,0 +1,187 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "fmt" + "regexp" + "strings" + "time" + + "github.com/coreos/go-semver/semver" + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/ddl" + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/collate" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" + "github.com/pingcap/tidb/pkg/util/mock" +) + +const ( + AnnotationClusteredReplaceString = "${1} /*T![clustered_index] CLUSTERED */${2}\n" + AnnotationNonClusteredReplaceString = "${1} /*T![clustered_index] NONCLUSTERED */${2}\n" +) + +func init() { + collate.SetNewCollationEnabledForTest(false) +} + +// addClusteredAnnotation add the `/*T![clustered_index] NONCLUSTERED */` for primary key of create table info +// In the older version, the create table info hasn't `/*T![clustered_index] NONCLUSTERED */`, +// which lead the issue https://github.com/pingcap/tidb-tools/issues/678 +// +// Before Get Create Table Info: +// mysql> SHOW CREATE TABLE `test`.`itest`; +// +// +-------+--------------------------------------------------------------------+ +// | Table | Create Table | +// +-------+--------------------------------------------------------------------+ +// | itest | CREATE TABLE `itest` ( +// `id` int(11) DEFAULT NULL, +// `name` varchar(24) DEFAULT NULL, +// PRIMARY KEY (`id`) +// ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin | +// +-------+--------------------------------------------------------------------+ +// +// After Add the annotation: +// +// +-------+--------------------------------------------------------------------+ +// | Table | Create Table | +// +-------+--------------------------------------------------------------------+ +// | itest | CREATE TABLE `itest` ( +// `id` int(11) DEFAULT NULL, +// `name` varchar(24) DEFAULT NULL, +// PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */ +// ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin | +// +-------+--------------------------------------------------------------------+ +func addClusteredAnnotationForPrimaryKey(raw string, replace string) (string, error) { + reg, regErr := regexp.Compile(`(PRIMARY\sKEY.*\))(\s*,?)\s*\n`) + if reg == nil || regErr != nil { + return raw, errors.Annotate(regErr, "failed to compile regex for add clustered annotation, err: %s") + } + return reg.ReplaceAllString(raw, replace), nil +} + +func getTableInfoBySQL(ctx sessionctx.Context, createTableSQL string, parser2 *parser.Parser) (table *model.TableInfo, err error) { + stmt, err := parser2.ParseOneStmt(createTableSQL, "", "") + if err != nil { + return nil, errors.Trace(err) + } + + s, ok := stmt.(*ast.CreateTableStmt) + if ok { + table, err := ddl.BuildTableInfoWithStmt(ctx, s, mysql.DefaultCharset, "", nil) + if err != nil { + return nil, errors.Trace(err) + } + + // put primary key in indices + if table.PKIsHandle { + pkIndex := &model.IndexInfo{ + Name: model.NewCIStr("PRIMARY"), + Primary: true, + State: model.StatePublic, + Unique: true, + Tp: model.IndexTypeBtree, + Columns: []*model.IndexColumn{ + { + Name: table.GetPkName(), + Length: types.UnspecifiedLength, + }, + }, + } + + table.Indices = append(table.Indices, pkIndex) + } + + return table, nil + } + + return nil, errors.Errorf("get table info from sql %s failed!", createTableSQL) +} + +func isPKISHandle( + ctx context.Context, + db dbutil.QueryExecutor, + schemaName, tableName string, +) bool { + query := fmt.Sprintf("SELECT _tidb_rowid FROM %s LIMIT 0;", dbutil.TableName(schemaName, tableName)) + rows, err := db.QueryContext(ctx, query) + if err != nil && strings.Contains(err.Error(), "Unknown column") { + return true + } + if rows != nil { + rows.Close() + } + return false +} + +func GetTableInfoWithVersion( + ctx context.Context, + db dbutil.QueryExecutor, + schemaName, tableName string, + version *semver.Version, +) (*model.TableInfo, error) { + createTableSQL, err := dbutil.GetCreateTableSQL(ctx, db, schemaName, tableName) + if err != nil { + return nil, errors.Trace(err) + } + + if version != nil && version.Major <= 4 { + var replaceString string + if isPKISHandle(ctx, db, schemaName, tableName) { + replaceString = AnnotationClusteredReplaceString + } else { + replaceString = AnnotationNonClusteredReplaceString + } + createTableSQL, err = addClusteredAnnotationForPrimaryKey(createTableSQL, replaceString) + if err != nil { + return nil, errors.Trace(err) + } + } + parser2, err := dbutil.GetParserForDB(ctx, db) + if err != nil { + return nil, errors.Trace(err) + } + sctx := mock.NewContext() + // unify the timezone to UTC +0:00 + sctx.GetSessionVars().TimeZone = time.UTC + sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictTransTables) + sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictAllTables) + return getTableInfoBySQL(sctx, createTableSQL, parser2) +} + +// GetTableInfo returns table information. +func GetTableInfo( + ctx context.Context, db dbutil.QueryExecutor, + schemaName, tableName string, +) (*model.TableInfo, error) { + createTableSQL, err := dbutil.GetCreateTableSQL(ctx, db, schemaName, tableName) + if err != nil { + return nil, errors.Trace(err) + } + + parser2, err := dbutil.GetParserForDB(ctx, db) + if err != nil { + return nil, errors.Trace(err) + } + return dbutiltest.GetTableInfoBySQL(createTableSQL, parser2) +} diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go new file mode 100644 index 00000000000..d80bc669123 --- /dev/null +++ b/sync_diff_inspector/utils/utils.go @@ -0,0 +1,1059 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "math" + "reflect" + "sort" + "strconv" + "strings" + "sync" + + "github.com/olekukonko/tablewriter" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/parser/mysql" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "go.uber.org/zap" +) + +// SecretString is a wrapper for sensitive strings like password, +// which yields redacted string when being marshaled. +type SecretString string + +func (s SecretString) MarshalJSON() ([]byte, error) { + return []byte(`"******"`), nil +} + +func (s SecretString) String() string { + return "******" +} + +// Plain unwraps the secret string. +func (s SecretString) Plain() string { + return string(s) +} + +// IsBlobType returns true if tp is Blob type +func IsBlobType(tp byte) bool { + switch tp { + case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeBlob, mysql.TypeLongBlob: + return true + } + + return false +} + +// WorkerPool contains a pool of workers. +// The number of workers in the channel represents how many goruntines +// can be created to execute the task. +// After the task is done, worker will be sent back to the channel. +type WorkerPool struct { + limit uint + workers chan *Worker + name string + wg sync.WaitGroup +} + +// Worker identified by ID. +type Worker struct { + ID uint64 +} + +type taskFunc func() + +// NewWorkerPool returns a WorkerPool with `limit` workers in the channel. +func NewWorkerPool(limit uint, name string) *WorkerPool { + workers := make(chan *Worker, limit) + for i := uint(0); i < limit; i++ { + workers <- &Worker{ID: uint64(i + 1)} + } + return &WorkerPool{ + limit: limit, + workers: workers, + name: name, + } +} + +// Apply wait for an idle worker to run `taskFunc`. +// Notice: function `Apply` and `WaitFinished` cannot be called in parallel +func (pool *WorkerPool) Apply(fn taskFunc) { + worker := pool.apply() + pool.wg.Add(1) + go func() { + defer pool.wg.Done() + defer pool.recycle(worker) + fn() + }() +} + +// apply waits for an idle worker from the channel and return it +func (pool *WorkerPool) apply() *Worker { + var worker *Worker + select { + case worker = <-pool.workers: + default: + log.Debug("wait for workers", zap.String("pool", pool.name)) + worker = <-pool.workers + } + return worker +} + +// recycle sends an idle worker back to the channel +func (pool *WorkerPool) recycle(worker *Worker) { + if worker == nil { + panic("invalid restore worker") + } + pool.workers <- worker +} + +// HasWorker checks if the pool has unallocated workers. +func (pool *WorkerPool) HasWorker() bool { + return len(pool.workers) > 0 +} + +// WaitFinished waits till the pool finishs all the tasks. +func (pool *WorkerPool) WaitFinished() { + pool.wg.Wait() +} + +// GetColumnsFromIndex returns `ColumnInfo`s of the specified index. +func GetColumnsFromIndex(index *model.IndexInfo, tableInfo *model.TableInfo) []*model.ColumnInfo { + indexColumns := make([]*model.ColumnInfo, 0, len(index.Columns)) + for _, indexColumn := range index.Columns { + indexColumns = append(indexColumns, tableInfo.Columns[indexColumn.Offset]) + } + + return indexColumns +} + +// GetTableRowsQueryFormat returns a rowsQuerySQL template for the specific table. +// +// e.g. SELECT /*!40001 SQL_NO_CACHE */ `a`, `b` FROM `schema`.`table` WHERE %s ORDER BY `a`. +func GetTableRowsQueryFormat(schema, table string, tableInfo *model.TableInfo, collation string) (string, []*model.ColumnInfo) { + orderKeys, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo) + + columnNames := make([]string, 0, len(tableInfo.Columns)) + for _, col := range tableInfo.Columns { + if col.Hidden { + continue + } + + name := dbutil.ColumnName(col.Name.O) + // When col value is 0, the result is NULL. + // But we can use ISNULL to distinguish between null and 0. + if col.FieldType.GetType() == mysql.TypeFloat { + name = fmt.Sprintf("round(%s, 5-floor(log10(abs(%s)))) as %s", name, name, name) + } else if col.FieldType.GetType() == mysql.TypeDouble { + name = fmt.Sprintf("round(%s, 14-floor(log10(abs(%s)))) as %s", name, name, name) + } + columnNames = append(columnNames, name) + } + columns := strings.Join(columnNames, ", ") + if collation != "" { + collation = fmt.Sprintf(" COLLATE '%s'", collation) + } + + for i, key := range orderKeys { + orderKeys[i] = dbutil.ColumnName(key) + } + + query := fmt.Sprintf("SELECT /*!40001 SQL_NO_CACHE */ %s FROM %s WHERE %%s ORDER BY %s%s", + columns, dbutil.TableName(schema, table), strings.Join(orderKeys, ","), collation) + + return query, orderKeyCols +} + +// GenerateReplaceDML returns the insert SQL for the specific row values. +func GenerateReplaceDML(data map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string { + colNames := make([]string, 0, len(table.Columns)) + values := make([]string, 0, len(table.Columns)) + for _, col := range table.Columns { + if col.IsGenerated() { + continue + } + + colNames = append(colNames, dbutil.ColumnName(col.Name.O)) + if data[col.Name.O].IsNull { + values = append(values, "NULL") + continue + } + + if NeedQuotes(col.FieldType.GetType()) { + if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) { + values = append(values, fmt.Sprintf("x'%x'", data[col.Name.O].Data)) + } else { + values = append(values, fmt.Sprintf("'%s'", strings.Replace(string(data[col.Name.O].Data), "'", "\\'", -1))) + } + } else { + values = append(values, string(data[col.Name.O].Data)) + } + } + + return fmt.Sprintf("REPLACE INTO %s(%s) VALUES (%s);", dbutil.TableName(schema, table.Name.O), strings.Join(colNames, ","), strings.Join(values, ",")) +} + +// GerateReplaceDMLWithAnnotation returns the replace SQL for the specific 2 rows. +// And add Annotations to show the different columns. +func GenerateReplaceDMLWithAnnotation(source, target map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string { + sqlColNames := make([]string, 0, len(table.Columns)) + sqlValues := make([]string, 0, len(table.Columns)) + colNames := append(make([]string, 0, len(table.Columns)+1), "diff columns") + values1 := append(make([]string, 0, len(table.Columns)+1), "source data") + values2 := append(make([]string, 0, len(table.Columns)+1), "target data") + tableString := &strings.Builder{} + diffTable := tablewriter.NewWriter(tableString) + for _, col := range table.Columns { + if col.IsGenerated() { + continue + } + + var data1, data2 *dbutil.ColumnData + var value1 string + data1 = source[col.Name.O] + data2 = target[col.Name.O] + + if data1.IsNull { + value1 = "NULL" + } else { + if NeedQuotes(col.FieldType.GetType()) { + if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) { + value1 = fmt.Sprintf("x'%x'", data1.Data) + } else { + value1 = fmt.Sprintf("'%s'", strings.Replace(string(data1.Data), "'", "\\'", -1)) + } + } else { + value1 = string(data1.Data) + } + } + colName := dbutil.ColumnName(col.Name.O) + sqlColNames = append(sqlColNames, colName) + sqlValues = append(sqlValues, value1) + + // Only show different columns in annotations. + if (string(data1.Data) == string(data2.Data)) && (data1.IsNull == data2.IsNull) { + continue + } + + colNames = append(colNames, colName) + values1 = append(values1, value1) + + if data2.IsNull { + values2 = append(values2, "NULL") + } else { + if NeedQuotes(col.FieldType.GetType()) { + if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) { + values2 = append(values2, fmt.Sprintf("x'%x'", data1.Data)) + } else { + values2 = append(values2, fmt.Sprintf("'%s'", strings.Replace(string(data2.Data), "'", "\\'", -1))) + } + } else { + values2 = append(values2, string(data2.Data)) + } + } + + } + + diffTable.SetRowLine(true) + diffTable.SetHeader(colNames) + diffTable.Append(values1) + diffTable.Append(values2) + diffTable.SetCenterSeparator("╋") + diffTable.SetColumnSeparator("╏") + diffTable.SetRowSeparator("╍") + diffTable.SetAlignment(tablewriter.ALIGN_LEFT) + diffTable.SetBorder(false) + diffTable.Render() + + return fmt.Sprintf("/*\n%s*/\nREPLACE INTO %s(%s) VALUES (%s);", tableString.String(), dbutil.TableName(schema, table.Name.O), strings.Join(sqlColNames, ","), strings.Join(sqlValues, ",")) +} + +// GerateReplaceDMLWithAnnotation returns the delete SQL for the specific row. +func GenerateDeleteDML(data map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string { + kvs := make([]string, 0, len(table.Columns)) + for _, col := range table.Columns { + if col.IsGenerated() { + continue + } + + if data[col.Name.O].IsNull { + kvs = append(kvs, fmt.Sprintf("%s is NULL", dbutil.ColumnName(col.Name.O))) + continue + } + + if NeedQuotes(col.FieldType.GetType()) { + if IsBlobType(col.FieldType.GetType()) || IsBinaryColumn(col) { + kvs = append(kvs, fmt.Sprintf("%s = x'%x'", dbutil.ColumnName(col.Name.O), data[col.Name.O].Data)) + } else { + kvs = append(kvs, fmt.Sprintf("%s = '%s'", dbutil.ColumnName(col.Name.O), strings.Replace(string(data[col.Name.O].Data), "'", "\\'", -1))) + } + } else { + kvs = append(kvs, fmt.Sprintf("%s = %s", dbutil.ColumnName(col.Name.O), string(data[col.Name.O].Data))) + } + } + return fmt.Sprintf("DELETE FROM %s WHERE %s LIMIT 1;", dbutil.TableName(schema, table.Name.O), strings.Join(kvs, " AND ")) + +} + +// isCompatible checks whether 2 column types are compatible. +// e.g. char and vachar. +func isCompatible(tp1, tp2 byte) bool { + if tp1 == tp2 { + return true + } + + log.Warn("column type different, check compatibility.") + var t1, t2 int + switch tp1 { + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24: + t1 = 1 + case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: + t1 = 2 + case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar: + t1 = 3 + default: + return false + } + + switch tp2 { + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24: + t2 = 1 + case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: + t2 = 2 + case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar: + t2 = 3 + default: + return false + } + + return t1 == t2 +} + +func sameProperties(c1, c2 *model.ColumnInfo) bool { + switch c1.GetType() { + case mysql.TypeVarString, mysql.TypeString, mysql.TypeVarchar: + if c1.FieldType.GetCharset() != c2.FieldType.GetCharset() { + log.Warn("Ignoring character set differences", + zap.String("column name", c1.Name.O), + zap.String("charset source", c1.FieldType.GetCharset()), + zap.String("charset target", c2.FieldType.GetCharset()), + ) + } + if c1.FieldType.GetCollate() != c2.FieldType.GetCollate() { + log.Warn("Ignoring collation differences", + zap.String("column name", c1.Name.O), + zap.String("collation source", c1.FieldType.GetCollate()), + zap.String("collation target", c2.FieldType.GetCollate()), + ) + } + return c1.FieldType.GetFlen() == c2.FieldType.GetFlen() + default: + return true + } +} + +// CompareStruct compare tables' columns and indices from upstream and downstream. +// There are 2 return values: +// +// isEqual : result of comparing tables' columns and indices +// isPanic : the differences of tables' struct can not be ignored. Need to skip data comparing. +func CompareStruct(upstreamTableInfos []*model.TableInfo, downstreamTableInfo *model.TableInfo) (isEqual bool, isPanic bool) { + // compare columns + for _, upstreamTableInfo := range upstreamTableInfos { + if len(upstreamTableInfo.Columns) != len(downstreamTableInfo.Columns) { + // the numbers of each columns are different, don't compare data + log.Error("column num not equal", + zap.String("upstream table", upstreamTableInfo.Name.O), + zap.Int("column num", len(upstreamTableInfo.Columns)), + zap.String("downstream table", downstreamTableInfo.Name.O), + zap.Int("column num", len(downstreamTableInfo.Columns)), + ) + return false, true + } + + for i, column := range upstreamTableInfo.Columns { + if column.Name.O != downstreamTableInfo.Columns[i].Name.O { + // names are different, panic! + log.Error("column name not equal", + zap.String("upstream table", upstreamTableInfo.Name.O), + zap.String("column name", column.Name.O), + zap.String("downstream table", downstreamTableInfo.Name.O), + zap.String("column name", downstreamTableInfo.Columns[i].Name.O), + ) + return false, true + } + + if !isCompatible(column.GetType(), downstreamTableInfo.Columns[i].GetType()) { + // column types are different, panic! + log.Error("column type not compatible", + zap.String("upstream table", upstreamTableInfo.Name.O), + zap.String("column name", column.Name.O), + zap.Uint8("column type", column.GetType()), + zap.String("downstream table", downstreamTableInfo.Name.O), + zap.String("column name", downstreamTableInfo.Columns[i].Name.O), + zap.Uint8("column type", downstreamTableInfo.Columns[i].GetType()), + ) + return false, true + } + + if !sameProperties(column, downstreamTableInfo.Columns[i]) { + // column properties are different, panic! + log.Error("column properties not compatible", + zap.String("upstream table", upstreamTableInfo.Name.O), + zap.String("column name", column.Name.O), + zap.Uint8("column type", column.GetType()), + zap.String("downstream table", downstreamTableInfo.Name.O), + zap.String("column name", downstreamTableInfo.Columns[i].Name.O), + zap.Uint8("column type", downstreamTableInfo.Columns[i].GetType()), + ) + return false, true + } + } + } + + // compare indices + deleteIndicesSet := make(map[string]struct{}) + unilateralIndicesSet := make(map[string]struct{}) + downstreamIndicesMap := make(map[string]*struct { + index *model.IndexInfo + cnt int + }) + for _, index := range downstreamTableInfo.Indices { + downstreamIndicesMap[index.Name.O] = &struct { + index *model.IndexInfo + cnt int + }{index, 0} + } + for _, upstreamTableInfo := range upstreamTableInfos { + + NextIndex: + for _, upstreamIndex := range upstreamTableInfo.Indices { + if _, ok := deleteIndicesSet[upstreamIndex.Name.O]; ok { + continue NextIndex + } + + indexU, ok := downstreamIndicesMap[upstreamIndex.Name.O] + if ok { + if len(indexU.index.Columns) != len(upstreamIndex.Columns) { + // different index, should be removed + deleteIndicesSet[upstreamIndex.Name.O] = struct{}{} + continue NextIndex + } + + for i, indexColumn := range upstreamIndex.Columns { + if indexColumn.Offset != indexU.index.Columns[i].Offset || indexColumn.Name.O != indexU.index.Columns[i].Name.O { + // different index, should be removed + deleteIndicesSet[upstreamIndex.Name.O] = struct{}{} + continue NextIndex + } + } + indexU.cnt = indexU.cnt + 1 + } else { + unilateralIndicesSet[upstreamIndex.Name.O] = struct{}{} + } + } + } + + existBilateralIndex := false + for _, indexU := range downstreamIndicesMap { + if _, ok := deleteIndicesSet[indexU.index.Name.O]; ok { + continue + } + if indexU.cnt < len(upstreamTableInfos) { + // Some upstreamInfos don't have this index. + unilateralIndicesSet[indexU.index.Name.O] = struct{}{} + } else { + // there is an index the whole tables have, + // so unilateral indices can be deleted. + existBilateralIndex = true + } + } + + // delete indices + // If there exist bilateral index, unilateral indices can be deleted. + if existBilateralIndex { + for indexName := range unilateralIndicesSet { + deleteIndicesSet[indexName] = struct{}{} + } + } else { + log.Warn("no index exists in both upstream and downstream", zap.String("table", downstreamTableInfo.Name.O)) + } + if len(deleteIndicesSet) > 0 { + newDownstreamIndices := make([]*model.IndexInfo, 0, len(downstreamTableInfo.Indices)) + for _, index := range downstreamTableInfo.Indices { + if _, ok := deleteIndicesSet[index.Name.O]; !ok { + newDownstreamIndices = append(newDownstreamIndices, index) + } else { + log.Debug("delete downstream index", zap.String("name", downstreamTableInfo.Name.O), zap.String("index", index.Name.O)) + } + } + downstreamTableInfo.Indices = newDownstreamIndices + + for _, upstreamTableInfo := range upstreamTableInfos { + newUpstreamIndices := make([]*model.IndexInfo, 0, len(upstreamTableInfo.Indices)) + for _, index := range upstreamTableInfo.Indices { + if _, ok := deleteIndicesSet[index.Name.O]; !ok { + newUpstreamIndices = append(newUpstreamIndices, index) + } else { + log.Debug("delete upstream index", zap.String("name", upstreamTableInfo.Name.O), zap.String("index", index.Name.O)) + } + } + upstreamTableInfo.Indices = newUpstreamIndices + } + + } + + return len(deleteIndicesSet) == 0, false +} + +// NeedQuotes determines whether an escape character is required for `'`. +func NeedQuotes(tp byte) bool { + return !(dbutil.IsNumberType(tp) || dbutil.IsFloatType(tp)) +} + +// CompareData compare two row datas. +// equal = true: map1 = map2 +// equal = false: +// 1. cmp = 0: map1 and map2 have the same orderkeycolumns, but other columns are in difference. +// 2. cmp = -1: map1 < map2 (by comparing the orderkeycolumns) +// 3. cmp = 1: map1 > map2 +func CompareData(map1, map2 map[string]*dbutil.ColumnData, orderKeyCols, columns []*model.ColumnInfo) (equal bool, cmp int32, err error) { + var ( + data1, data2 *dbutil.ColumnData + str1, str2 string + key string + ok bool + ) + + equal = true + + defer func() { + if equal || err != nil { + return + } + + if cmp == 0 { + log.Warn("find different row", zap.String("column", key), zap.String("row1", rowToString(map1)), zap.String("row2", rowToString(map2))) + } else if cmp > 0 { + log.Warn("target had superfluous data", zap.String("row", rowToString(map2))) + } else { + log.Warn("target lack data", zap.String("row", rowToString(map1))) + } + }() + + for _, column := range columns { + if data1, ok = map1[column.Name.O]; !ok { + return false, 0, errors.Errorf("upstream don't have key %s", column.Name.O) + } + if data2, ok = map2[column.Name.O]; !ok { + return false, 0, errors.Errorf("downstream don't have key %s", column.Name.O) + } + str1 = string(data1.Data) + str2 = string(data2.Data) + if column.FieldType.GetType() == mysql.TypeFloat || column.FieldType.GetType() == mysql.TypeDouble { + if data1.IsNull && data2.IsNull { + continue + } else if !data1.IsNull && !data2.IsNull { + num1, err1 := strconv.ParseFloat(str1, 64) + num2, err2 := strconv.ParseFloat(str2, 64) + if err1 != nil || err2 != nil { + err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", str1, str2, err1, err2) + return + } + if math.Abs(num1-num2) <= 1e-6 { + continue + } + } + } else if column.FieldType.GetType() == mysql.TypeJSON { + if (str1 == str2) || (data1.IsNull && data2.IsNull) { + continue + } + if !data1.IsNull && !data2.IsNull { + var v1, v2 any + err := json.Unmarshal(data1.Data, &v1) + if err != nil { + return false, 0, errors.Errorf("unmarshal json %s failed, error %v", str1, err) + } + err = json.Unmarshal(data2.Data, &v2) + if err != nil { + return false, 0, errors.Errorf("unmarshal json %s failed, error %v", str2, err) + } + if reflect.DeepEqual(v1, v2) { + continue + } + } + } else { + if (str1 == str2) && (data1.IsNull == data2.IsNull) { + continue + } + } + + equal = false + key = column.Name.O + break + + } + if equal { + return + } + + // Not Equal. Compare orderkeycolumns. + for _, col := range orderKeyCols { + if data1, ok = map1[col.Name.O]; !ok { + err = errors.Errorf("don't have key %s", col.Name.O) + return + } + if data2, ok = map2[col.Name.O]; !ok { + err = errors.Errorf("don't have key %s", col.Name.O) + return + } + + if NeedQuotes(col.FieldType.GetType()) { + strData1 := string(data1.Data) + strData2 := string(data2.Data) + + if len(strData1) == len(strData2) && strData1 == strData2 { + continue + } + + if strData1 < strData2 { + cmp = -1 + } else { + cmp = 1 + } + break + } else if data1.IsNull || data2.IsNull { + if data1.IsNull && data2.IsNull { + continue + } + + if data1.IsNull { + cmp = -1 + } else { + cmp = 1 + } + break + } else { + num1, err1 := strconv.ParseFloat(string(data1.Data), 64) + num2, err2 := strconv.ParseFloat(string(data2.Data), 64) + if err1 != nil || err2 != nil { + err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", string(data1.Data), string(data2.Data), err1, err2) + return + } + + if num1 == num2 { + continue + } + + if num1 < num2 { + cmp = -1 + } else { + cmp = 1 + } + break + } + } + + return +} + +// rowtoString covert rowData to String +func rowToString(row map[string]*dbutil.ColumnData) string { + var s strings.Builder + s.WriteString("{ ") + for key, val := range row { + if val.IsNull { + s.WriteString(fmt.Sprintf("%s: IsNull, ", key)) + } else { + s.WriteString(fmt.Sprintf("%s: %s, ", key, val.Data)) + } + } + s.WriteString(" }") + + return s.String() +} + +// MinLenInSlices returns the smallest length among slices. +func MinLenInSlices(slices [][]string) int { + min := 0 + for i, slice := range slices { + if i == 0 || len(slice) < min { + min = len(slice) + } + } + + return min +} + +// SliceToMap converts Slice to Set +func SliceToMap(slice []string) map[string]interface{} { + sMap := make(map[string]interface{}) + for _, str := range slice { + sMap[str] = struct{}{} + } + return sMap +} + +// GetApproximateMidBySize return the `count`th row in rows that meet the `limitRange`. +func GetApproximateMidBySize(ctx context.Context, db *sql.DB, schema, table string, indexColumns []*model.ColumnInfo, limitRange string, args []interface{}, count int64) (map[string]string, error) { + /* + example + mysql> select i_id, i_im_id, i_name from item where i_id > 0 order by i_id, i_im_id, i_name collate limit 5000,1; + +------+---------+-----------------+ + | i_id | i_im_id | i_name | + +------+---------+-----------------+ + | 5001 | 3494 | S66WiWB3t1FUG02 | + +------+---------+-----------------+ + 1 row in set (0.09 sec) + */ + columnNames := make([]string, 0, len(indexColumns)) + for _, col := range indexColumns { + columnNames = append(columnNames, dbutil.ColumnName(col.Name.O)) + } + + // Note: add collation after order by will largely reduce the speed. + query := fmt.Sprintf("SELECT %s FROM %s WHERE %s ORDER BY %s LIMIT 1 OFFSET %d", + strings.Join(columnNames, ", "), + dbutil.TableName(schema, table), + limitRange, + strings.Join(columnNames, ", "), + count/2) + log.Debug("get mid by size", zap.String("sql", query), zap.Reflect("args", args)) + rows, err := db.QueryContext(ctx, query, args...) + if err != nil { + return nil, errors.Trace(err) + } + defer rows.Close() + columns := make([]interface{}, len(indexColumns)) + for i := range columns { + columns[i] = new(string) + } + if !rows.Next() { + if err := rows.Err(); err != nil { + return nil, errors.Trace(err) + } + log.Error("there is no row in result set") + return nil, nil + } + err = rows.Scan(columns...) + if err != nil { + return nil, errors.Trace(err) + } + columnValues := make(map[string]string) + for i, column := range columns { + columnValues[indexColumns[i].Name.O] = *column.(*string) + } + return columnValues, nil +} + +// GetTableSize loads the TableSize from `information_schema`.`tables`. +func GetTableSize(ctx context.Context, db *sql.DB, schemaName, tableName string) (int64, error) { + query := "select sum(data_length) as data from `information_schema`.`tables` where table_schema=? and table_name=? GROUP BY data_length;" + var dataSize sql.NullInt64 + err := db.QueryRowContext(ctx, query, schemaName, tableName).Scan(&dataSize) + if err != nil { + return int64(0), errors.Trace(err) + } + return dataSize.Int64, nil +} + +// GetCountAndMd5Checksum returns checksum code and count of some data by given condition +func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, args []interface{}) (int64, uint64, error) { + /* + calculate MD5 checksum and count example: + mysql> SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `a`.`t`; + +--------+---------------------- + | CNT | CHECKSUM | + +--------+---------------------- + | 100000 | 3462532621352132810 | + +--------+---------------------- + 1 row in set (0.46 sec) + */ + columnNames := make([]string, 0, len(tbInfo.Columns)) + columnIsNull := make([]string, 0, len(tbInfo.Columns)) + log.Debug("table columns", zap.Any("columns", tbInfo.Columns)) + for _, col := range tbInfo.Columns { + if col.Hidden { + continue + } + name := dbutil.ColumnName(col.Name.O) + // When col value is 0, the result is NULL. + // But we can use ISNULL to distinguish between null and 0. + if col.FieldType.GetType() == mysql.TypeFloat { + name = fmt.Sprintf("round(%s, 5-floor(log10(abs(%s))))", name, name) + } else if col.FieldType.GetType() == mysql.TypeDouble { + name = fmt.Sprintf("round(%s, 14-floor(log10(abs(%s))))", name, name) + } + columnNames = append(columnNames, name) + columnIsNull = append(columnIsNull, fmt.Sprintf("ISNULL(%s)", name)) + } + + query := fmt.Sprintf("SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', %s, CONCAT(%s))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', %s, CONCAT(%s))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM %s WHERE %s;", + strings.Join(columnNames, ", "), strings.Join(columnIsNull, ", "), strings.Join(columnNames, ", "), strings.Join(columnIsNull, ", "), dbutil.TableName(schemaName, tableName), limitRange) + log.Debug("count and checksum", zap.String("sql", query), zap.Reflect("args", args)) + + var count sql.NullInt64 + var checksum uint64 + err := db.QueryRowContext(ctx, query, args...).Scan(&count, &checksum) + if err != nil { + log.Warn("execute checksum query fail", zap.String("query", query), zap.Reflect("args", args), zap.Error(err)) + return -1, 0, errors.Trace(err) + } + if !count.Valid { + // if don't have any data, the checksum will be `NULL` + log.Warn("get empty count", zap.String("sql", query), zap.Reflect("args", args)) + return 0, 0, nil + } + return count.Int64, checksum, nil +} + +// GetRandomValues returns some random values. Different from /pkg/dbutil.GetRandomValues, it returns multi-columns at the same time. +func GetRandomValues(ctx context.Context, db *sql.DB, schema, table string, columns []*model.ColumnInfo, num int, limitRange string, limitArgs []interface{}, collation string) ([][]string, error) { + /* + example: there is one index consists of `id`, `a`, `b`. + mysql> SELECT `id`, `a`, `b` FROM (SELECT `id`, `a`, `b`, rand() rand_value FROM `test`.`test` WHERE `id` COLLATE "latin1_bin" > 0 AND `id` COLLATE "latin1_bin" < 100 ORDER BY rand_value LIMIT 5) rand_tmp ORDER BY `id` COLLATE "latin1_bin"; + +------+------+------+ + | id | a | b | + +------+------+------+ + | 1 | 2 | 3 | + | 2 | 3 | 4 | + | 3 | 4 | 5 | + +------+------+------+ + */ + + if limitRange == "" { + limitRange = "TRUE" + } + + if collation != "" { + collation = fmt.Sprintf(" COLLATE '%s'", collation) + } + + columnNames := make([]string, 0, len(columns)) + for _, col := range columns { + columnNames = append(columnNames, dbutil.ColumnName(col.Name.O)) + } + + query := fmt.Sprintf("SELECT %[1]s FROM (SELECT %[1]s, rand() rand_value FROM %[2]s WHERE %[3]s ORDER BY rand_value LIMIT %[4]d)rand_tmp ORDER BY %[1]s%[5]s", + strings.Join(columnNames, ", "), dbutil.TableName(schema, table), limitRange, num, collation) + log.Debug("get random values", zap.String("sql", query), zap.Reflect("args", limitArgs)) + + rows, err := db.QueryContext(ctx, query, limitArgs...) + if err != nil { + return nil, errors.Trace(err) + } + defer rows.Close() + + randomValues := make([][]string, 0, num) +NEXTROW: + for rows.Next() { + colVals := make([][]byte, len(columns)) + colValsI := make([]interface{}, len(colVals)) + for i := range colValsI { + colValsI[i] = &colVals[i] + } + err = rows.Scan(colValsI...) + if err != nil { + return nil, errors.Trace(err) + } + + randomValue := make([]string, len(columns)) + + for i, col := range colVals { + if col == nil { + continue NEXTROW + } + randomValue[i] = string(col) + } + randomValues = append(randomValues, randomValue) + } + + return randomValues, errors.Trace(rows.Err()) +} + +// ResetColumns removes index from `tableInfo.Indices`, whose columns appear in `columns`. +// And removes column from `tableInfo.Columns`, which appears in `columns`. +// And initializes the offset of the column of each index to new `tableInfo.Columns`. +// +// Return the new tableInfo and the flag whether the columns have timestamp type. +func ResetColumns(tableInfo *model.TableInfo, columns []string) (*model.TableInfo, bool) { + // Although columns is empty, need to initialize indices' offset mapping to column. + + hasTimeStampType := false + // Remove all index from `tableInfo.Indices`, whose columns are involved of any column in `columns`. + removeColMap := SliceToMap(columns) + for i := 0; i < len(tableInfo.Indices); i++ { + index := tableInfo.Indices[i] + for j := 0; j < len(index.Columns); j++ { + col := index.Columns[j] + if _, ok := removeColMap[col.Name.O]; ok { + tableInfo.Indices = append(tableInfo.Indices[:i], tableInfo.Indices[i+1:]...) + i-- + break + } + } + } + + // Remove column from `tableInfo.Columns`, which appears in `columns`. + for j := 0; j < len(tableInfo.Columns); j++ { + col := tableInfo.Columns[j] + if _, ok := removeColMap[col.Name.O]; ok { + tableInfo.Columns = append(tableInfo.Columns[:j], tableInfo.Columns[j+1:]...) + j-- + } + } + + // calculate column offset + colMap := make(map[string]int, len(tableInfo.Columns)) + for i, col := range tableInfo.Columns { + col.Offset = i + colMap[col.Name.O] = i + hasTimeStampType = hasTimeStampType || (col.FieldType.GetType() == mysql.TypeTimestamp) + } + + // Initialize the offset of the column of each index to new `tableInfo.Columns`. + for _, index := range tableInfo.Indices { + for _, col := range index.Columns { + offset, ok := colMap[col.Name.O] + if !ok { + // this should never happened + log.Fatal("column not exists", zap.String("column", col.Name.O)) + } + col.Offset = offset + } + } + + return tableInfo, hasTimeStampType +} + +// UniqueID returns `schema`.`table` +func UniqueID(schema string, table string) string { + // QuoteSchema quotes a full table name + return fmt.Sprintf("`%s`.`%s`", EscapeName(schema), EscapeName(table)) +} + +// EscapeName replaces all "`" in name with "“" +func EscapeName(name string) string { + return strings.Replace(name, "`", "``", -1) +} + +// GetBetterIndex returns the index more dinstict. +// If the index is primary key or unique, it can be return directly. +// Otherwise select the index which has higher value of `COUNT(DISTINCT a)/COUNT(*)`. +func GetBetterIndex(ctx context.Context, db *sql.DB, schema, table string, tableInfo *model.TableInfo) ([]*model.IndexInfo, error) { + // SELECT COUNT(DISTINCT city)/COUNT(*) FROM `schema`.`table`; + indices := dbutil.FindAllIndex(tableInfo) + for _, index := range indices { + if index.Primary || index.Unique { + return []*model.IndexInfo{index}, nil + } + } + sels := make([]float64, len(indices)) + for _, index := range indices { + column := GetColumnsFromIndex(index, tableInfo)[0] + selectivity, err := GetSelectivity(ctx, db, schema, table, column.Name.O, tableInfo) + if err != nil { + return indices, errors.Trace(err) + } + log.Debug("index selectivity", zap.String("table", dbutil.TableName(schema, table)), zap.Float64("selectivity", selectivity)) + sels = append(sels, selectivity) + } + sort.Slice(indices, func(i, j int) bool { + return sels[i] > sels[j] + }) + return indices, nil +} + +// GetSelectivity returns the value of `COUNT(DISTINCT col)/COUNT(1)` SQL. +func GetSelectivity(ctx context.Context, db *sql.DB, schemaName, tableName, columnName string, tbInfo *model.TableInfo) (float64, error) { + query := fmt.Sprintf("SELECT COUNT(DISTINCT %s)/COUNT(1) as SEL FROM %s;", dbutil.ColumnName(columnName), dbutil.TableName(schemaName, tableName)) + var selectivity sql.NullFloat64 + args := []interface{}{} + err := db.QueryRowContext(ctx, query, args...).Scan(&selectivity) + if err != nil { + log.Warn("execute get selectivity query fail", zap.String("query", query)) + return 0.0, errors.Trace(err) + } + if !selectivity.Valid { + // if don't have any data, the checksum will be `NULL` + log.Warn("get empty count or checksum", zap.String("sql", query)) + return 0.0, nil + } + return selectivity.Float64, nil +} + +// CalculateChunkSize returns chunkSize according to table rows count. +func CalculateChunkSize(rowCount int64) int64 { + // we assume chunkSize is 50000 for any cluster. + chunkSize := int64(50000) + if rowCount > int64(chunkSize)*10000 { + // we assume we only need 10k chunks for any table. + chunkSize = rowCount / 10000 + } + return chunkSize +} + +// AnalyzeTable do 'ANALYZE TABLE `table`' SQL. +func AnalyzeTable(ctx context.Context, db *sql.DB, tableName string) error { + _, err := db.ExecContext(ctx, "ANALYZE TABLE "+tableName) + return err +} + +// GetSQLFileName returns filename of fix-SQL identified by chunk's `Index`. +func GetSQLFileName(index *chunk.ChunkID) string { + return fmt.Sprintf("%d:%d-%d:%d", index.TableIndex, index.BucketIndexLeft, index.BucketIndexRight, index.ChunkIndex) +} + +// GetChunkIDFromSQLFileName convert the filename to chunk's `Index`. +func GetChunkIDFromSQLFileName(fileIDStr string) (int, int, int, int, error) { + ids := strings.Split(fileIDStr, ":") + tableIndex, err := strconv.Atoi(ids[0]) + if err != nil { + return 0, 0, 0, 0, errors.Trace(err) + } + bucketIndex := strings.Split(ids[1], "-") + bucketIndexLeft, err := strconv.Atoi(bucketIndex[0]) + if err != nil { + return 0, 0, 0, 0, errors.Trace(err) + } + bucketIndexRight, err := strconv.Atoi(bucketIndex[1]) + if err != nil { + return 0, 0, 0, 0, errors.Trace(err) + } + chunkIndex, err := strconv.Atoi(ids[2]) + if err != nil { + return 0, 0, 0, 0, errors.Trace(err) + } + return tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, nil +} + +// IsRangeTrivial checks if a user configured Range is empty or `TRUE`. +func IsRangeTrivial(rangeCond string) bool { + if rangeCond == "" { + return true + } + return strings.ToLower(rangeCond) == "true" +} + +func IsBinaryColumn(col *model.ColumnInfo) bool { + // varbinary or binary + return (col.GetType() == mysql.TypeVarchar || col.GetType() == mysql.TypeString) && mysql.HasBinaryFlag(col.GetFlag()) +} diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go new file mode 100644 index 00000000000..969add2c0f6 --- /dev/null +++ b/sync_diff_inspector/utils/utils_test.go @@ -0,0 +1,688 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "database/sql/driver" + "fmt" + "testing" + "time" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" + "github.com/stretchr/testify/require" +) + +type tableCaseType struct { + schema string + table string + createTableSQL string + rowColumns []string + rows [][]driver.Value + indices []string + sels []float64 + selected string +} + +func TestWorkerPool(t *testing.T) { + pool := NewWorkerPool(2, "test") + infoCh := make(chan uint64) + doneCh := make(chan struct{}) + var v uint64 = 0 + pool.Apply(func() { + infoCh <- 2 + }) + pool.Apply(func() { + new_v := <-infoCh + v = new_v + doneCh <- struct{}{} + }) + <-doneCh + require.Equal(t, v, uint64(2)) + require.True(t, pool.HasWorker()) + pool.WaitFinished() +} + +func TestStringsToInterface(t *testing.T) { + res := []interface{}{"1", "2", "3"} + require.Equal(t, res[0], "1") + require.Equal(t, res[1], "2") + require.Equal(t, res[2], "3") + + require.Equal(t, MinLenInSlices([][]string{{"123", "324", "r32"}, {"32", "23"}}), 2) + + expectSlice := []string{"2", "3", "4"} + sliceMap := SliceToMap(expectSlice) + for _, expect := range expectSlice { + _, ok := sliceMap[expect] + require.True(t, ok) + } + require.Equal(t, len(sliceMap), len(expectSlice)) + + require.Equal(t, UniqueID("123", "456"), "`123`.`456`") + +} + +func TestBasicTableUtilOperation(t *testing.T) { + createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + query, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123") + require.Equal(t, query, "SELECT /*!40001 SQL_NO_CACHE */ `a`, `b`, round(`c`, 5-floor(log10(abs(`c`)))) as `c`, `d` FROM `test`.`test` WHERE %s ORDER BY `a`,`b` COLLATE '123'") + expectName := []string{"a", "b"} + for i, col := range orderKeyCols { + require.Equal(t, col.Name.O, expectName[i]) + } + + data1 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("a"), IsNull: false}, + "c": {Data: []byte("1.22"), IsNull: false}, + "d": {Data: []byte("sdf"), IsNull: false}, + } + data2 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("b"), IsNull: false}, + "c": {Data: []byte("2.22"), IsNull: false}, + "d": {Data: []byte("sdf"), IsNull: false}, + } + data3 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("2"), IsNull: false}, + "b": {Data: []byte("a"), IsNull: false}, + "c": {Data: []byte("0.22"), IsNull: false}, + "d": {Data: []byte("asdf"), IsNull: false}, + } + data4 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("a"), IsNull: true}, + "c": {Data: []byte("0.221"), IsNull: false}, + "d": {Data: []byte("asdf"), IsNull: false}, + } + data5 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("2"), IsNull: false}, + "b": {Data: []byte("a"), IsNull: true}, + "c": {Data: []byte("0.222"), IsNull: false}, + "d": {Data: []byte("asdf"), IsNull: false}, + } + data6 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: true}, + "b": {Data: []byte("a"), IsNull: false}, + "c": {Data: []byte("0.2221"), IsNull: false}, + "d": {Data: []byte("asdf"), IsNull: false}, + } + data7 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: true}, + "b": {Data: []byte("a"), IsNull: false}, + "c": {Data: []byte("0.2221"), IsNull: false}, + "d": {Data: []byte("asdf"), IsNull: false}, + } + data8 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("a"), IsNull: false}, + "c": {Data: []byte(""), IsNull: true}, + "d": {Data: []byte("sdf"), IsNull: false}, + } + data9 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("a"), IsNull: false}, + "c": {Data: []byte("0"), IsNull: false}, + "d": {Data: []byte("sdf"), IsNull: false}, + } + + columns := tableInfo.Columns + + require.Equal(t, GenerateReplaceDML(data1, tableInfo, "schema"), "REPLACE INTO `schema`.`test`(`a`,`b`,`c`,`d`) VALUES (1,'a',1.22,'sdf');") + require.Equal(t, GenerateDeleteDML(data8, tableInfo, "schema"), "DELETE FROM `schema`.`test` WHERE `a` = 1 AND `b` = 'a' AND `c` is NULL AND `d` = 'sdf' LIMIT 1;") + require.Equal(t, GenerateDeleteDML(data9, tableInfo, "schema"), "DELETE FROM `schema`.`test` WHERE `a` = 1 AND `b` = 'a' AND `c` = 0 AND `d` = 'sdf' LIMIT 1;") + require.Equal(t, GenerateReplaceDMLWithAnnotation(data1, data2, tableInfo, "schema"), + "/*\n"+ + " DIFF COLUMNS ╏ `B` ╏ `C` \n"+ + "╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍\n"+ + " source data ╏ 'a' ╏ 1.22 \n"+ + "╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍\n"+ + " target data ╏ 'b' ╏ 2.22 \n"+ + "╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╋╍╍╍╍╍╋╍╍╍╍╍╍╍\n"+ + "*/\n"+ + "REPLACE INTO `schema`.`test`(`a`,`b`,`c`,`d`) VALUES (1,'a',1.22,'sdf');") + require.Equal(t, GenerateDeleteDML(data1, tableInfo, "schema"), "DELETE FROM `schema`.`test` WHERE `a` = 1 AND `b` = 'a' AND `c` = 1.22 AND `d` = 'sdf' LIMIT 1;") + + // same + equal, cmp, err := CompareData(data1, data1, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.True(t, equal) + + // orderkey same but other column different + equal, cmp, err = CompareData(data1, data3, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(-1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data3, data1, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(1)) + require.False(t, equal) + + // orderKey different + equal, cmp, err = CompareData(data1, data2, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(-1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data2, data1, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data4, data1, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.False(t, equal) + + equal, cmp, err = CompareData(data1, data4, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.False(t, equal) + + equal, cmp, err = CompareData(data5, data4, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data4, data5, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(-1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data4, data6, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data6, data4, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(-1)) + require.False(t, equal) + + equal, cmp, err = CompareData(data6, data7, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.True(t, equal) + + equal, cmp, err = CompareData(data1, data8, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.False(t, equal) + + equal, cmp, err = CompareData(data8, data1, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.False(t, equal) + + equal, cmp, err = CompareData(data8, data9, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.False(t, equal) + + // Test ignore columns + createTableSQL = "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, `e` timestamp, primary key(`a`, `b`), key(`c`, `d`))" + tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + require.Equal(t, len(tableInfo.Indices), 2) + require.Equal(t, len(tableInfo.Columns), 5) + require.Equal(t, tableInfo.Indices[0].Columns[1].Name.O, "b") + require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 2) + info, hasTimeStampType := ResetColumns(tableInfo, []string{"c"}) + require.True(t, hasTimeStampType) + require.Equal(t, len(info.Indices), 1) + require.Equal(t, len(info.Columns), 4) + require.Equal(t, tableInfo.Indices[0].Columns[1].Name.O, "b") + require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 1) +} + +func TestGetCountAndMd5Checksum(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + createTableSQL := "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, primary key(`a`, `b`), key(`c`, `d`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456)) + + count, checksum, err := GetCountAndMd5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", []interface{}{"123", "234"}) + require.NoError(t, err) + require.Equal(t, count, int64(123)) + require.Equal(t, checksum, uint64(0x1c8)) +} + +func TestGetApproximateMid(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), primary key(`a`, `b`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + rows := sqlmock.NewRows([]string{"a", "b"}).AddRow("5", "10") + mock.ExpectQuery("SELECT `a`, `b` FROM `test`.`test_utils` WHERE 2222 ORDER BY `a`, `b` LIMIT 1 OFFSET 10").WithArgs("aaaa").WillReturnRows(rows) + + data, err := GetApproximateMidBySize(ctx, conn, "test", "test_utils", tableInfo.Columns, "2222", []interface{}{"aaaa"}, 20) + require.NoError(t, err) + require.Equal(t, data["a"], "5") + require.Equal(t, data["b"], "10") + + // no data + rows = sqlmock.NewRows([]string{"a", "b"}) + mock.ExpectQuery("SELECT `a`, `b` FROM `test`\\.`test_utils` WHERE 2222.* LIMIT 1 OFFSET 10*").WithArgs("aaaa").WillReturnRows(rows) + + data, err = GetApproximateMidBySize(ctx, conn, "test", "test_utils", tableInfo.Columns, "2222", []interface{}{"aaaa"}, 20) + require.NoError(t, err) + require.Nil(t, data) +} + +func TestGenerateSQLs(t *testing.T) { + createTableSQL := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), `id_gen` int(11) GENERATED ALWAYS AS ((`id` + 1)) VIRTUAL, primary key(`id`, `name`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + rowsData := map[string]*dbutil.ColumnData{ + "id": {Data: []byte("1"), IsNull: false}, + "name": {Data: []byte("xxx"), IsNull: false}, + "birthday": {Data: []byte("2018-01-01 00:00:00"), IsNull: false}, + "update_time": {Data: []byte("10:10:10"), IsNull: false}, + "money": {Data: []byte("11.1111"), IsNull: false}, + "id_gen": {Data: []byte("2"), IsNull: false}, // generated column should not be contained in fix sql + } + + replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test") + deleteSQL := GenerateDeleteDML(rowsData, tableInfo, "diff_test") + require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (1,'xxx','2018-01-01 00:00:00','10:10:10',11.1111);") + require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `name` = 'xxx' AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;") + + // test the unique key + createTableSQL2 := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), unique key(`id`, `name`))" + tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + replaceSQL = GenerateReplaceDML(rowsData, tableInfo2, "diff_test") + deleteSQL = GenerateDeleteDML(rowsData, tableInfo2, "diff_test") + require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (1,'xxx','2018-01-01 00:00:00','10:10:10',11.1111);") + require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `name` = 'xxx' AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;") + + // test value is nil + rowsData["name"] = &dbutil.ColumnData{Data: []byte(""), IsNull: true} + replaceSQL = GenerateReplaceDML(rowsData, tableInfo, "diff_test") + deleteSQL = GenerateDeleteDML(rowsData, tableInfo, "diff_test") + require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (1,NULL,'2018-01-01 00:00:00','10:10:10',11.1111);") + require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `name` is NULL AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;") + + rowsData["id"] = &dbutil.ColumnData{Data: []byte(""), IsNull: true} + replaceSQL = GenerateReplaceDML(rowsData, tableInfo, "diff_test") + deleteSQL = GenerateDeleteDML(rowsData, tableInfo, "diff_test") + require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (NULL,NULL,'2018-01-01 00:00:00','10:10:10',11.1111);") + require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` is NULL AND `name` is NULL AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;") + + // test value with "'" + rowsData["name"] = &dbutil.ColumnData{Data: []byte("a'a"), IsNull: false} + replaceSQL = GenerateReplaceDML(rowsData, tableInfo, "diff_test") + deleteSQL = GenerateDeleteDML(rowsData, tableInfo, "diff_test") + require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`name`,`birthday`,`update_time`,`money`) VALUES (NULL,'a\\'a','2018-01-01 00:00:00','10:10:10',11.1111);") + require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` is NULL AND `name` = 'a\\'a' AND `birthday` = '2018-01-01 00:00:00' AND `update_time` = '10:10:10' AND `money` = 11.1111 LIMIT 1;") +} + +func TestResetColumns(t *testing.T) { + createTableSQL1 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`))" + tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + require.NoError(t, err) + tbInfo, hasTimeStampType := ResetColumns(tableInfo1, []string{"a"}) + require.Equal(t, len(tbInfo.Columns), 3) + require.Equal(t, len(tbInfo.Indices), 0) + require.Equal(t, tbInfo.Columns[2].Offset, 2) + require.False(t, hasTimeStampType) + + createTableSQL2 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))" + tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + tbInfo, _ = ResetColumns(tableInfo2, []string{"a", "b"}) + require.Equal(t, len(tbInfo.Columns), 2) + require.Equal(t, len(tbInfo.Indices), 0) + + createTableSQL3 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))" + tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New()) + require.NoError(t, err) + tbInfo, _ = ResetColumns(tableInfo3, []string{"b", "c"}) + require.Equal(t, len(tbInfo.Columns), 2) + require.Equal(t, len(tbInfo.Indices), 1) +} + +func TestGetTableSize(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + dataRows := sqlmock.NewRows([]string{"a", "b"}) + rowNums := 1000 + for k := 0; k < rowNums; k++ { + str := fmt.Sprintf("%d", k) + dataRows.AddRow(str, str) + } + sizeRows := sqlmock.NewRows([]string{"data"}) + sizeRows.AddRow("8000") + mock.ExpectQuery("data").WillReturnRows(sizeRows) + size, err := GetTableSize(ctx, conn, "test", "test") + require.NoError(t, err) + require.Equal(t, size, int64(8000)) +} + +func TestGetBetterIndex(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + tableCases := []*tableCaseType{ + { + schema: "single_index", + table: "test1", + createTableSQL: "CREATE TABLE `single_index`.`test1` (`a` int, `b` char, primary key(`a`), index(`b`))", + rowColumns: []string{"a", "b"}, + rows: [][]driver.Value{ + {"1", "a"}, + {"2", "a"}, + {"3", "b"}, + {"4", "b"}, + {"5", "c"}, + {"6", "c"}, + {"7", "d"}, + {"8", "d"}, + {"9", "e"}, + {"A", "e"}, + {"B", "f"}, + {"C", "f"}, + }, + indices: []string{"PRIMARY", "b"}, + sels: []float64{1.0, 0.5}, + selected: "PRIMARY", + }, { + schema: "single_index", + table: "test1", + createTableSQL: "CREATE TABLE `single_index`.`test1` (`a` int, `b` char, index(a), index(b))", + rowColumns: []string{"a", "b"}, + rows: [][]driver.Value{ + {"1", "a"}, + {"2", "a"}, + {"3", "b"}, + {"4", "b"}, + {"5", "c"}, + {"6", "c"}, + {"7", "d"}, + {"8", "d"}, + {"9", "e"}, + {"A", "e"}, + {"B", "f"}, + {"C", "f"}, + }, + indices: []string{"a", "b"}, + sels: []float64{1.0, 0.5}, + selected: "a", + }, + } + tableCase := tableCases[0] + tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) + require.NoError(t, err) + indices := dbutil.FindAllIndex(tableInfo) + for i, index := range indices { + require.Equal(t, index.Name.O, tableCase.indices[i]) + } + for i, col := range tableCase.rowColumns { + retRows := sqlmock.NewRows([]string{"SEL"}) + retRows.AddRow(tableCase.sels[i]) + mock.ExpectQuery("SELECT").WillReturnRows(retRows) + sel, err := GetSelectivity(ctx, conn, tableCase.schema, tableCase.table, col, tableInfo) + require.NoError(t, err) + require.Equal(t, sel, tableCase.sels[i]) + } + indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo) + require.NoError(t, err) + require.Equal(t, indices[0].Name.O, tableCase.selected) + + tableCase = tableCases[1] + tableInfo, err = dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) + require.NoError(t, err) + indices = dbutil.FindAllIndex(tableInfo) + for i, index := range indices { + require.Equal(t, index.Name.O, tableCase.indices[i]) + } + for i, col := range tableCase.rowColumns { + retRows := sqlmock.NewRows([]string{"SEL"}) + retRows.AddRow(tableCase.sels[i]) + mock.ExpectQuery("SELECT").WillReturnRows(retRows) + sel, err := GetSelectivity(ctx, conn, tableCase.schema, tableCase.table, col, tableInfo) + require.NoError(t, err) + require.Equal(t, sel, tableCase.sels[i]) + } + mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2")) + mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("5")) + indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo) + require.NoError(t, err) + require.Equal(t, indices[0].Name.O, tableCase.selected) + +} + +func TestCalculateChunkSize(t *testing.T) { + require.Equal(t, CalculateChunkSize(1000), int64(50000)) + require.Equal(t, CalculateChunkSize(1000000000), int64(100000)) +} + +func TestGetSQLFileName(t *testing.T) { + index := &chunk.ChunkID{ + TableIndex: 1, + BucketIndexLeft: 2, + BucketIndexRight: 3, + ChunkIndex: 4, + ChunkCnt: 10, + } + require.Equal(t, GetSQLFileName(index), "1:2-3:4") +} + +func TestGetChunkIDFromSQLFileName(t *testing.T) { + tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := GetChunkIDFromSQLFileName("11:12-13:14") + require.NoError(t, err) + require.Equal(t, tableIndex, 11) + require.Equal(t, bucketIndexLeft, 12) + require.Equal(t, bucketIndexRight, 13) + require.Equal(t, chunkIndex, 14) +} + +func TestCompareStruct(t *testing.T) { + createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + var isEqual bool + var isPanic bool + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo}, tableInfo) + require.True(t, isEqual) + require.False(t, isPanic) + + // column length different + createTableSQL2 := "create table `test`(`a` int, `b` varchar(10), `c` float, primary key(`a`, `b`), index(`c`))" + tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.False(t, isEqual) + require.True(t, isPanic) + + // column name differernt + createTableSQL2 = "create table `test`(`aa` int, `b` varchar(10), `c` float, `d` datetime, primary key(`aa`, `b`), index(`c`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.False(t, isEqual) + require.True(t, isPanic) + + // column type compatible + createTableSQL2 = "create table `test`(`a` int, `b` char(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.True(t, isEqual) + require.False(t, isPanic) + + createTableSQL2 = "create table `test`(`a` int(11), `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.True(t, isEqual) + require.False(t, isPanic) + + // column type not compatible + createTableSQL2 = "create table `test`(`a` int, `b` varchar(10), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.False(t, isEqual) + require.True(t, isPanic) + + // column properties not compatible + createTableSQL2 = "create table `test`(`a` int, `b` varchar(11), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.False(t, isEqual) + require.True(t, isPanic) + + // index check + + // index different + createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.False(t, isEqual) + require.False(t, isPanic) + require.Equal(t, len(tableInfo.Indices), 1) + require.Equal(t, tableInfo.Indices[0].Name.O, "PRIMARY") + + // index column different + createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" + tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `c`), index(`c`))" + tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + require.NoError(t, err) + + isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) + require.False(t, isEqual) + require.False(t, isPanic) + require.Equal(t, len(tableInfo.Indices), 1) + require.Equal(t, tableInfo.Indices[0].Name.O, "c") + +} + +func TestGenerateSQLBlob(t *testing.T) { + rowsData := map[string]*dbutil.ColumnData{ + "id": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("foo"), IsNull: false}, + } + + cases := []struct { + createTableSql string + }{ + {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` tinyblob)"}, + {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` blob)"}, + {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` mediumblob)"}, + {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` longblob)"}, + } + + for _, c := range cases { + tableInfo, err := dbutil.GetTableInfoBySQL(c.createTableSql, parser.New()) + require.NoError(t, err) + + replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test") + deleteSQL := GenerateDeleteDML(rowsData, tableInfo, "diff_test") + require.Equal(t, replaceSQL, "REPLACE INTO `diff_test`.`atest`(`id`,`b`) VALUES (1,x'666f6f');") + require.Equal(t, deleteSQL, "DELETE FROM `diff_test`.`atest` WHERE `id` = 1 AND `b` = x'666f6f' LIMIT 1;") + } +} + +func TestCompareBlob(t *testing.T) { + createTableSQL := "create table `test`.`test`(`a` int primary key, `b` blob)" + tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + require.NoError(t, err) + + _, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123") + + data1 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte{0xff, 0xfe}, IsNull: false}, + } + data2 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte{0xfe, 0xff}, IsNull: false}, + } + data3 := map[string]*dbutil.ColumnData{ + "a": {Data: []byte("1"), IsNull: false}, + "b": {Data: []byte("foobar"), IsNull: false}, + } + + columns := tableInfo.Columns + + cases := []struct { + data1 map[string]*dbutil.ColumnData + dataOthers []map[string]*dbutil.ColumnData + }{ + {data1, []map[string]*dbutil.ColumnData{data2, data3}}, + {data2, []map[string]*dbutil.ColumnData{data1, data3}}, + {data3, []map[string]*dbutil.ColumnData{data1, data2}}, + } + + for _, c := range cases { + equal, cmp, err := CompareData(c.data1, c.data1, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.True(t, equal) + + for _, data := range c.dataOthers { + equal, cmp, err = CompareData(c.data1, data, orderKeyCols, columns) + require.NoError(t, err) + require.Equal(t, cmp, int32(0)) + require.False(t, equal) + } + } +} From 4437f90d455420adc6bc2be95597cf0774c18f30 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 13:58:20 +0800 Subject: [PATCH 02/22] Update --- sync_diff_inspector/checkpoints/checkpoints.go | 8 +++----- sync_diff_inspector/config/config.go | 2 +- sync_diff_inspector/diff/diff.go | 2 +- sync_diff_inspector/source/common/rows.go | 2 +- sync_diff_inspector/source/common/table_diff.go | 2 +- sync_diff_inspector/source/mysql_shard.go | 5 ++--- sync_diff_inspector/source/source.go | 2 +- sync_diff_inspector/source/source_test.go | 8 ++++---- sync_diff_inspector/source/tidb.go | 5 ++--- sync_diff_inspector/splitter/bucket.go | 2 +- sync_diff_inspector/splitter/index_fields.go | 2 +- sync_diff_inspector/splitter/limit.go | 2 +- sync_diff_inspector/splitter/random.go | 2 +- sync_diff_inspector/utils/table.go | 14 ++++++++------ sync_diff_inspector/utils/utils.go | 2 +- 15 files changed, 29 insertions(+), 31 deletions(-) diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go index 010fdd2a9c2..8863cad1c51 100644 --- a/sync_diff_inspector/checkpoints/checkpoints.go +++ b/sync_diff_inspector/checkpoints/checkpoints.go @@ -20,14 +20,12 @@ import ( "os" "sync" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/config" "github.com/pingcap/tiflow/sync_diff_inspector/report" - - "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/siddontang/go/ioutil2" - - "github.com/pingcap/errors" - "github.com/pingcap/log" "go.uber.org/zap" ) diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go index 3ab749bc890..789176ed37d 100644 --- a/sync_diff_inspector/config/config.go +++ b/sync_diff_inspector/config/config.go @@ -32,7 +32,7 @@ import ( "github.com/google/uuid" "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" tidbutil "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/dbutil" filter "github.com/pingcap/tidb/pkg/util/table-filter" diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go index 6ebc69f65cd..239e7a84f81 100644 --- a/sync_diff_inspector/diff/diff.go +++ b/sync_diff_inspector/diff/diff.go @@ -30,7 +30,7 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/log" tidbconfig "github.com/pingcap/tidb/pkg/config" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tiflow/sync_diff_inspector/checkpoints" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" diff --git a/sync_diff_inspector/source/common/rows.go b/sync_diff_inspector/source/common/rows.go index a97204881f2..27470c15042 100644 --- a/sync_diff_inspector/source/common/rows.go +++ b/sync_diff_inspector/source/common/rows.go @@ -17,7 +17,7 @@ import ( "strconv" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tiflow/sync_diff_inspector/utils" "go.uber.org/zap" diff --git a/sync_diff_inspector/source/common/table_diff.go b/sync_diff_inspector/source/common/table_diff.go index 1d8befb7a11..2960f0ba7cb 100644 --- a/sync_diff_inspector/source/common/table_diff.go +++ b/sync_diff_inspector/source/common/table_diff.go @@ -16,7 +16,7 @@ package common import ( "database/sql" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" ) // TableShardSource represents the origin schema and table and DB connection before router. diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go index 2a43f48081f..ff0edd9e8c9 100644 --- a/sync_diff_inspector/source/mysql_shard.go +++ b/sync_diff_inspector/source/mysql_shard.go @@ -20,13 +20,12 @@ import ( "fmt" "time" - tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" - "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tidb/pkg/util/filter" + tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/pingcap/tiflow/sync_diff_inspector/config" "github.com/pingcap/tiflow/sync_diff_inspector/source/common" "github.com/pingcap/tiflow/sync_diff_inspector/splitter" diff --git a/sync_diff_inspector/source/source.go b/sync_diff_inspector/source/source.go index 5e615488886..a3f7500a019 100644 --- a/sync_diff_inspector/source/source.go +++ b/sync_diff_inspector/source/source.go @@ -23,7 +23,7 @@ import ( "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tidb/pkg/util/filter" tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go index bb3ea1b02ab..58bbe853eca 100644 --- a/sync_diff_inspector/source/source_test.go +++ b/sync_diff_inspector/source/source_test.go @@ -25,8 +25,10 @@ import ( "time" "github.com/DATA-DOG/go-sqlmock" + _ "github.com/go-sql-driver/mysql" "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" filter "github.com/pingcap/tidb/pkg/util/table-filter" router "github.com/pingcap/tidb/pkg/util/table-router" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" @@ -35,8 +37,6 @@ import ( "github.com/pingcap/tiflow/sync_diff_inspector/splitter" "github.com/pingcap/tiflow/sync_diff_inspector/utils" "github.com/stretchr/testify/require" - - _ "github.com/go-sql-driver/mysql" ) type tableCaseType struct { @@ -296,7 +296,7 @@ func TestFallbackToRandomIfRangeIsSet(t *testing.T) { "`c` char(120) NOT NULL DEFAULT '', " + "PRIMARY KEY (`id`), KEY `k_1` (`k`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) table1 := &common.TableDiff{ @@ -631,7 +631,7 @@ func TestTiDBRouter(t *testing.T) { func prepareTiDBTables(t *testing.T, tableCases []*tableCaseType) []*common.TableDiff { tableDiffs := make([]*common.TableDiff, 0, len(tableCases)) for n, tableCase := range tableCases { - tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) require.NoError(t, err) tableDiffs = append(tableDiffs, &common.TableDiff{ Schema: "source_test", diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go index 0af78384cc1..a021275ee6e 100644 --- a/sync_diff_inspector/source/tidb.go +++ b/sync_diff_inspector/source/tidb.go @@ -20,13 +20,12 @@ import ( "time" "github.com/coreos/go-semver/semver" - tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" - "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tidb/pkg/util/filter" + tableFilter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/pingcap/tiflow/sync_diff_inspector/config" "github.com/pingcap/tiflow/sync_diff_inspector/source/common" "github.com/pingcap/tiflow/sync_diff_inspector/splitter" diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go index b64b3ae68bd..46a53f99d28 100644 --- a/sync_diff_inspector/splitter/bucket.go +++ b/sync_diff_inspector/splitter/bucket.go @@ -21,7 +21,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/progress" diff --git a/sync_diff_inspector/splitter/index_fields.go b/sync_diff_inspector/splitter/index_fields.go index 1508d59075d..9c35164aa9d 100644 --- a/sync_diff_inspector/splitter/index_fields.go +++ b/sync_diff_inspector/splitter/index_fields.go @@ -19,7 +19,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tiflow/sync_diff_inspector/utils" "go.uber.org/zap" ) diff --git a/sync_diff_inspector/splitter/limit.go b/sync_diff_inspector/splitter/limit.go index 0075506a673..3138d53181f 100644 --- a/sync_diff_inspector/splitter/limit.go +++ b/sync_diff_inspector/splitter/limit.go @@ -21,7 +21,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/progress" diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go index 470c33c6a24..839073a48b7 100644 --- a/sync_diff_inspector/splitter/random.go +++ b/sync_diff_inspector/splitter/random.go @@ -22,7 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/progress" diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go index 1c99dcb5d74..10f73e588d3 100644 --- a/sync_diff_inspector/utils/table.go +++ b/sync_diff_inspector/utils/table.go @@ -23,11 +23,12 @@ import ( "github.com/coreos/go-semver/semver" "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/ddl" + "github.com/pingcap/tidb/pkg/meta/metabuild" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/ast" - "github.com/pingcap/tidb/pkg/parser/model" + pmodel "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/mysql" - "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/collate" "github.com/pingcap/tidb/pkg/util/dbutil" @@ -80,7 +81,7 @@ func addClusteredAnnotationForPrimaryKey(raw string, replace string) (string, er return reg.ReplaceAllString(raw, replace), nil } -func getTableInfoBySQL(ctx sessionctx.Context, createTableSQL string, parser2 *parser.Parser) (table *model.TableInfo, err error) { +func getTableInfoBySQL(ctx *metabuild.Context, createTableSQL string, parser2 *parser.Parser) (table *model.TableInfo, err error) { stmt, err := parser2.ParseOneStmt(createTableSQL, "", "") if err != nil { return nil, errors.Trace(err) @@ -96,11 +97,11 @@ func getTableInfoBySQL(ctx sessionctx.Context, createTableSQL string, parser2 *p // put primary key in indices if table.PKIsHandle { pkIndex := &model.IndexInfo{ - Name: model.NewCIStr("PRIMARY"), + Name: pmodel.NewCIStr("PRIMARY"), Primary: true, State: model.StatePublic, Unique: true, - Tp: model.IndexTypeBtree, + Tp: pmodel.IndexTypeBtree, Columns: []*model.IndexColumn{ { Name: table.GetPkName(), @@ -166,7 +167,8 @@ func GetTableInfoWithVersion( sctx.GetSessionVars().TimeZone = time.UTC sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictTransTables) sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictAllTables) - return getTableInfoBySQL(sctx, createTableSQL, parser2) + metaBuildCtx := ddl.NewMetaBuildContextWithSctx(sctx) + return getTableInfoBySQL(metaBuildCtx, createTableSQL, parser2) } // GetTableInfo returns table information. diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go index d80bc669123..a22fa3ac971 100644 --- a/sync_diff_inspector/utils/utils.go +++ b/sync_diff_inspector/utils/utils.go @@ -28,7 +28,7 @@ import ( "github.com/olekukonko/tablewriter" "github.com/pingcap/errors" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/util/dbutil" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" From 6463065280ebcfb77e3b36e5e9d660b359fc21c3 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 14:11:59 +0800 Subject: [PATCH 03/22] Update build --- Makefile | 2 +- scripts/download-integration-test-binaries.sh | 4 +--- tests/integration_tests/README.md | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index f4e3d4d7615..82b2501c711 100644 --- a/Makefile +++ b/Makefile @@ -228,7 +228,7 @@ check_third_party_binary: @which bin/minio @which bin/bin/schema-registry-start -integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server +integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server sync_diff_inspector $(FAILPOINT_ENABLE) $(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \ -coverpkg=github.com/pingcap/tiflow/... \ diff --git a/scripts/download-integration-test-binaries.sh b/scripts/download-integration-test-binaries.sh index 765d848aede..ec0d8849438 100755 --- a/scripts/download-integration-test-binaries.sh +++ b/scripts/download-integration-test-binaries.sh @@ -91,7 +91,7 @@ download_community_binaries() { mv ${THIRD_BIN_DIR}/tiflash ${THIRD_BIN_DIR}/_tiflash mv ${THIRD_BIN_DIR}/_tiflash/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/_tiflash tar -xz -C ${THIRD_BIN_DIR} pd-ctl -f ${TMP_DIR}/$tidb_file_name/ctl-${dist}.tar.gz - tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl $toolkit_file_name/sync_diff_inspector -f ${TMP_DIR}/$toolkit_tar_name + tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl -f ${TMP_DIR}/$toolkit_tar_name mv ${THIRD_BIN_DIR}/$toolkit_file_name/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/$toolkit_file_name # Download additional tools @@ -147,7 +147,6 @@ download_binaries() { local minio_download_url="${FILE_SERVER_URL}/download/minio.tar.gz" local go_ycsb_download_url="${FILE_SERVER_URL}/download/builds/pingcap/go-ycsb/test-br/go-ycsb" local etcd_download_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/etcd-v3.4.7-linux-amd64.tar.gz" - local sync_diff_inspector_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/sync_diff_inspector_hash-a129f096_linux-amd64.tar.gz" local jq_download_url="${FILE_SERVER_URL}/download/builds/pingcap/test/jq-1.6/jq-linux64" local schema_registry_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/schema-registry.tar.gz" @@ -158,7 +157,6 @@ download_binaries() { download_and_extract "$tiflash_download_url" "tiflash.tar.gz" download_and_extract "$minio_download_url" "minio.tar.gz" download_and_extract "$etcd_download_url" "etcd.tar.gz" "etcd-v3.4.7-linux-amd64/etcdctl" - download_and_extract "$sync_diff_inspector_url" "sync_diff_inspector.tar.gz" download_and_extract "$schema_registry_url" "schema-registry.tar.gz" download_file "$go_ycsb_download_url" "go-ycsb" "${THIRD_BIN_DIR}/go-ycsb" diff --git a/tests/integration_tests/README.md b/tests/integration_tests/README.md index 483f697338f..b2c3d471f85 100644 --- a/tests/integration_tests/README.md +++ b/tests/integration_tests/README.md @@ -14,7 +14,6 @@ If you need to specify a version, os or arch, you can use, for example: `make pr * `pd-ctl` # version >= 6.0.0-rc.1 * `tiflash` # tiflash binary * `libc++.so, libc++abi.so, libgmssl.so, libtiflash_proxy.so` # some necessary so files related to tiflash - * `sync_diff_inspector` * [go-ycsb](https://github.com/pingcap/go-ycsb) * [etcdctl](https://github.com/etcd-io/etcd/tree/master/etcdctl) * [jq](https://stedolan.github.io/jq/) From fa78f45de60e05c3dbf6361d61b841995290ff07 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 14:26:37 +0800 Subject: [PATCH 04/22] Fix format --- sync_diff_inspector/chunk/chunk_test.go | 2 - sync_diff_inspector/config/config_test.go | 1 - sync_diff_inspector/config/dm.go | 2 +- sync_diff_inspector/progress/progress.go | 1 - sync_diff_inspector/source/source_test.go | 14 ++--- sync_diff_inspector/source/tidb.go | 3 +- sync_diff_inspector/splitter/random.go | 3 -- sync_diff_inspector/splitter/splitter_test.go | 33 ++++++------ sync_diff_inspector/utils/utils.go | 2 - sync_diff_inspector/utils/utils_test.go | 52 +++++++++---------- 10 files changed, 50 insertions(+), 63 deletions(-) diff --git a/sync_diff_inspector/chunk/chunk_test.go b/sync_diff_inspector/chunk/chunk_test.go index b5d62dd9449..694bea6f949 100644 --- a/sync_diff_inspector/chunk/chunk_test.go +++ b/sync_diff_inspector/chunk/chunk_test.go @@ -419,7 +419,6 @@ func TestChunkToString(t *testing.T) { } require.Equal(t, chunk.String(), `{"index":null,"type":0,"bounds":[{"column":"a","lower":"1","upper":"1","has-lower":true,"has-upper":true},{"column":"b","lower":"3","upper":"3","has-lower":true,"has-upper":true},{"column":"c","lower":"6","upper":"6","has-lower":true,"has-upper":true}],"is-first":false,"is-last":false,"where":"","args":null}`) require.Equal(t, chunk.ToMeta(), "range in sequence: (1,3,6) < (a,b,c) <= (1,3,6)") - } func TestChunkInit(t *testing.T) { @@ -599,7 +598,6 @@ func TestChunkID(t *testing.T) { chunkIDtmp.FromString(str) require.Equal(t, chunkIDLarge.Compare(chunkIDtmp), 0) } - } func TestChunkIndex(t *testing.T) { diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go index 7c12c260bb2..32d2a9f40ce 100644 --- a/sync_diff_inspector/config/config_test.go +++ b/sync_diff_inspector/config/config_test.go @@ -58,7 +58,6 @@ func TestParseConfig(t *testing.T) { require.True(t, cfg.TableConfigs["config1"].Valid()) require.NoError(t, os.RemoveAll(cfg.Task.OutputDir)) - } func TestError(t *testing.T) { diff --git a/sync_diff_inspector/config/dm.go b/sync_diff_inspector/config/dm.go index 68564910e4b..4591b74f79e 100644 --- a/sync_diff_inspector/config/dm.go +++ b/sync_diff_inspector/config/dm.go @@ -50,7 +50,7 @@ func getDMTaskCfgURL(dmAddr, task string) string { func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) { tr := &http.Transport{ // TODO: support tls - //TLSClientConfig: tlsCfg, + // TLSClientConfig: tlsCfg, } client := &http.Client{Transport: tr} req, err := http.NewRequest("GET", getDMTaskCfgURL(dmAddr, task), nil) diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go index b559ac85937..3fed728a8d1 100644 --- a/sync_diff_inspector/progress/progress.go +++ b/sync_diff_inspector/progress/progress.go @@ -217,7 +217,6 @@ func (tpp *TableProgressPrinter) PrintSummary() { } fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr) - } func (tpp *TableProgressPrinter) Error(err error) { diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go index 58bbe853eca..692452af4b7 100644 --- a/sync_diff_inspector/source/source_test.go +++ b/sync_diff_inspector/source/source_test.go @@ -59,8 +59,10 @@ type MockChunkIterator struct { index *chunk.ChunkID } -const CHUNKS = 5 -const BUCKETS = 1 +const ( + CHUNKS = 5 + BUCKETS = 1 +) func (m *MockChunkIterator) Next() (*chunk.Range, error) { if m.index.ChunkIndex == m.index.ChunkCnt-1 { @@ -79,11 +81,9 @@ func (m *MockChunkIterator) Next() (*chunk.Range, error) { } func (m *MockChunkIterator) Close() { - } -type MockAnalyzer struct { -} +type MockAnalyzer struct{} func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.TableDiff, rangeInfo *splitter.RangeInfo) (splitter.ChunkIterator, error) { i := &chunk.ChunkID{ @@ -430,7 +430,7 @@ func TestMysqlShardSources(t *testing.T) { break } for j, value := range tableCase.rows[i] { - //c.Log(j) + // c.Log(j) require.Equal(t, columns[tableCase.rowColumns[j]].IsNull, false) require.Equal(t, columns[tableCase.rowColumns[j]].Data, []byte(value.(string))) } @@ -763,7 +763,7 @@ func TestRouterRules(t *testing.T) { } portStr, isExist := os.LookupEnv("MYSQL_PORT") if portStr == "" || !isExist { - //return + // return } port, err := strconv.Atoi(portStr) require.NoError(t, err) diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go index a021275ee6e..92d80c5d34b 100644 --- a/sync_diff_inspector/source/tidb.go +++ b/sync_diff_inspector/source/tidb.go @@ -63,7 +63,6 @@ func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.T return nil, errors.Trace(err) } return randIter, nil - } type TiDBRowsIterator struct { @@ -119,6 +118,7 @@ func (s *TiDBSource) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo func (s *TiDBSource) Close() { s.dbConn.Close() } + func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { beginTime := time.Now() table := s.tableDiffs[tableRange.GetTableIndex()] @@ -222,7 +222,6 @@ func NewTiDBSource( // instance -> db -> table allTablesMap := make(map[string]map[string]interface{}) sourceSchemas, err := dbutil.GetSchemas(ctx, ds.Conn) - if err != nil { return nil, errors.Annotatef(err, "get schemas from database") } diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go index 839073a48b7..98771d0eb28 100644 --- a/sync_diff_inspector/splitter/random.go +++ b/sync_diff_inspector/splitter/random.go @@ -139,7 +139,6 @@ func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, tab nextChunk: 0, dbConn: dbConn, }, nil - } func (s *RandomIterator) Next() (*chunk.Range, error) { @@ -161,7 +160,6 @@ func (s *RandomIterator) Next() (*chunk.Range, error) { } func (s *RandomIterator) Close() { - } // GetSplitFields returns fields to split chunks, order by pk, uk, index, columns. @@ -173,7 +171,6 @@ func GetSplitFields(table *model.TableInfo, splitFields []string) ([]*model.Colu col := dbutil.FindColumnByName(table.Columns, splitField) if col == nil { return nil, errors.NotFoundf("column %s in table %s", splitField, table.Name) - } splitCols = append(splitCols, col) } diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go index 3f641d100ee..9b16fc5c0ea 100644 --- a/sync_diff_inspector/splitter/splitter_test.go +++ b/sync_diff_inspector/splitter/splitter_test.go @@ -23,7 +23,7 @@ import ( sqlmock "github.com/DATA-DOG/go-sqlmock" "github.com/pingcap/tidb/pkg/parser" - "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/source/common" "github.com/pingcap/tiflow/sync_diff_inspector/utils" @@ -66,7 +66,8 @@ func TestSplitRangeByRandom(t *testing.T) { []interface{}{"7", "7", "n", "10", "10", "z"}, }, }, - }, { + }, + { "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`, `a`))", 3, chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true).CopyAndUpdate("a", "0", "10", true, true), @@ -106,7 +107,8 @@ func TestSplitRangeByRandom(t *testing.T) { []interface{}{"n", "z"}, }, }, - }, { + }, + { "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))", 2, chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true), @@ -122,7 +124,8 @@ func TestSplitRangeByRandom(t *testing.T) { []interface{}{"g", "z"}, }, }, - }, { + }, + { "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`b`))", 3, chunk.NewChunkRange().CopyAndUpdate("b", "a", "z", true, true), @@ -139,7 +142,7 @@ func TestSplitRangeByRandom(t *testing.T) { } for _, testCase := range testCases { - tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(testCase.createTableSQL, parser.New()) require.NoError(t, err) splitCols, err := GetSplitFields(tableInfo, nil) @@ -319,7 +322,7 @@ func TestRandomSpliter(t *testing.T) { } for _, testCase := range testCases { - tableInfo, err := dbutil.GetTableInfoBySQL(testCase.createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(testCase.createTableSQL, parser.New()) require.NoError(t, err) info, needUnifiedTimeStamp := utils.ResetColumns(tableInfo, testCase.IgnoreColumns) @@ -354,15 +357,15 @@ func TestRandomSpliter(t *testing.T) { // Test Checkpoint stopJ := 3 - tableInfo, err := dbutil.GetTableInfoBySQL(testCases[0].createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(testCases[0].createTableSQL, parser.New()) require.NoError(t, err) tableDiff := &common.TableDiff{ Schema: "test", Table: "test", Info: tableInfo, - //IgnoreColumns: []string{"c"}, - //Fields: "a,b", + // IgnoreColumns: []string{"c"}, + // Fields: "a,b", ChunkSize: 5, } @@ -398,7 +401,6 @@ func TestRandomSpliter(t *testing.T) { require.Equal(t, chunk.Index.ChunkCnt, chunkID1.ChunkCnt) require.Equal(t, chunk.Index.ChunkIndex, chunkID1.ChunkIndex+1) - } func createFakeResultForRandomSplit(mock sqlmock.Sqlmock, count int, randomValues [][]string) { @@ -421,7 +423,6 @@ func createFakeResultForRandomSplit(mock sqlmock.Sqlmock, count int, randomValue randomRows.AddRow(row...) } mock.ExpectQuery("ORDER BY rand_value").WillReturnRows(randomRows) - } func TestBucketSpliter(t *testing.T) { @@ -430,7 +431,7 @@ func TestBucketSpliter(t *testing.T) { require.NoError(t, err) createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) testCases := []struct { @@ -741,7 +742,7 @@ func TestLimitSpliter(t *testing.T) { require.NoError(t, err) createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) testCases := []struct { @@ -862,7 +863,6 @@ func TestRangeInfo(t *testing.T) { require.Equal(t, chunkRange.Args, []interface{}{"1", "2"}) require.Equal(t, rangeInfo2.GetTableIndex(), 1) - } func TestChunkSize(t *testing.T) { @@ -871,7 +871,7 @@ func TestChunkSize(t *testing.T) { require.NoError(t, err) createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) tableDiff := &common.TableDiff{ @@ -912,7 +912,7 @@ func TestChunkSize(t *testing.T) { require.Equal(t, randomIter.chunkSize, int64(100000)) createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime)" - tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) tableDiff_noindex := &common.TableDiff{ @@ -932,5 +932,4 @@ func TestChunkSize(t *testing.T) { mock.ExpectQuery("SELECT `a`,.*limit 50000.*").WillReturnRows(sqlmock.NewRows([]string{"a", "b"})) _, err = NewLimitIterator(ctx, "", tableDiff, db) require.NoError(t, err) - } diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go index a22fa3ac971..5e4351c1c7e 100644 --- a/sync_diff_inspector/utils/utils.go +++ b/sync_diff_inspector/utils/utils.go @@ -310,7 +310,6 @@ func GenerateDeleteDML(data map[string]*dbutil.ColumnData, table *model.TableInf } } return fmt.Sprintf("DELETE FROM %s WHERE %s LIMIT 1;", dbutil.TableName(schema, table.Name.O), strings.Join(kvs, " AND ")) - } // isCompatible checks whether 2 column types are compatible. @@ -443,7 +442,6 @@ func CompareStruct(upstreamTableInfos []*model.TableInfo, downstreamTableInfo *m }{index, 0} } for _, upstreamTableInfo := range upstreamTableInfos { - NextIndex: for _, upstreamIndex := range upstreamTableInfo.Indices { if _, ok := deleteIndicesSet[upstreamIndex.Name.O]; ok { diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go index 969add2c0f6..fde005ebb3c 100644 --- a/sync_diff_inspector/utils/utils_test.go +++ b/sync_diff_inspector/utils/utils_test.go @@ -21,9 +21,10 @@ import ( "time" "github.com/DATA-DOG/go-sqlmock" + "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/parser" - "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/stretchr/testify/require" ) @@ -75,12 +76,11 @@ func TestStringsToInterface(t *testing.T) { require.Equal(t, len(sliceMap), len(expectSlice)) require.Equal(t, UniqueID("123", "456"), "`123`.`456`") - } func TestBasicTableUtilOperation(t *testing.T) { createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) query, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123") @@ -242,7 +242,7 @@ func TestBasicTableUtilOperation(t *testing.T) { // Test ignore columns createTableSQL = "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, `e` timestamp, primary key(`a`, `b`), key(`c`, `d`))" - tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) require.Equal(t, len(tableInfo.Indices), 2) @@ -266,7 +266,7 @@ func TestGetCountAndMd5Checksum(t *testing.T) { defer conn.Close() createTableSQL := "create table `test`.`test`(`a` int, `c` float, `b` varchar(10), `d` datetime, primary key(`a`, `b`), key(`c`, `d`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456)) @@ -286,7 +286,7 @@ func TestGetApproximateMid(t *testing.T) { defer conn.Close() createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) rows := sqlmock.NewRows([]string{"a", "b"}).AddRow("5", "10") @@ -308,7 +308,7 @@ func TestGetApproximateMid(t *testing.T) { func TestGenerateSQLs(t *testing.T) { createTableSQL := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), `id_gen` int(11) GENERATED ALWAYS AS ((`id` + 1)) VIRTUAL, primary key(`id`, `name`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) rowsData := map[string]*dbutil.ColumnData{ @@ -327,7 +327,7 @@ func TestGenerateSQLs(t *testing.T) { // test the unique key createTableSQL2 := "CREATE TABLE `diff_test`.`atest` (`id` int(24), `name` varchar(24), `birthday` datetime, `update_time` time, `money` decimal(20,2), unique key(`id`, `name`))" - tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) replaceSQL = GenerateReplaceDML(rowsData, tableInfo2, "diff_test") deleteSQL = GenerateDeleteDML(rowsData, tableInfo2, "diff_test") @@ -357,7 +357,7 @@ func TestGenerateSQLs(t *testing.T) { func TestResetColumns(t *testing.T) { createTableSQL1 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`))" - tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) tbInfo, hasTimeStampType := ResetColumns(tableInfo1, []string{"a"}) require.Equal(t, len(tbInfo.Columns), 3) @@ -366,14 +366,14 @@ func TestResetColumns(t *testing.T) { require.False(t, hasTimeStampType) createTableSQL2 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))" - tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) tbInfo, _ = ResetColumns(tableInfo2, []string{"a", "b"}) require.Equal(t, len(tbInfo.Columns), 2) require.Equal(t, len(tbInfo.Indices), 0) createTableSQL3 := "CREATE TABLE `test`.`atest` (`a` int, `b` int, `c` int, `d` int, primary key(`a`), index idx(`b`, `c`))" - tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New()) + tableInfo3, err := dbutiltest.GetTableInfoBySQL(createTableSQL3, parser.New()) require.NoError(t, err) tbInfo, _ = ResetColumns(tableInfo3, []string{"b", "c"}) require.Equal(t, len(tbInfo.Columns), 2) @@ -455,7 +455,7 @@ func TestGetBetterIndex(t *testing.T) { }, } tableCase := tableCases[0] - tableInfo, err := dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) require.NoError(t, err) indices := dbutil.FindAllIndex(tableInfo) for i, index := range indices { @@ -474,7 +474,7 @@ func TestGetBetterIndex(t *testing.T) { require.Equal(t, indices[0].Name.O, tableCase.selected) tableCase = tableCases[1] - tableInfo, err = dbutil.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) + tableInfo, err = dbutiltest.GetTableInfoBySQL(tableCase.createTableSQL, parser.New()) require.NoError(t, err) indices = dbutil.FindAllIndex(tableInfo) for i, index := range indices { @@ -493,7 +493,6 @@ func TestGetBetterIndex(t *testing.T) { indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo) require.NoError(t, err) require.Equal(t, indices[0].Name.O, tableCase.selected) - } func TestCalculateChunkSize(t *testing.T) { @@ -523,7 +522,7 @@ func TestGetChunkIDFromSQLFileName(t *testing.T) { func TestCompareStruct(t *testing.T) { createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) var isEqual bool @@ -534,7 +533,7 @@ func TestCompareStruct(t *testing.T) { // column length different createTableSQL2 := "create table `test`(`a` int, `b` varchar(10), `c` float, primary key(`a`, `b`), index(`c`))" - tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -543,7 +542,7 @@ func TestCompareStruct(t *testing.T) { // column name differernt createTableSQL2 = "create table `test`(`aa` int, `b` varchar(10), `c` float, `d` datetime, primary key(`aa`, `b`), index(`c`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -552,7 +551,7 @@ func TestCompareStruct(t *testing.T) { // column type compatible createTableSQL2 = "create table `test`(`a` int, `b` char(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -560,7 +559,7 @@ func TestCompareStruct(t *testing.T) { require.False(t, isPanic) createTableSQL2 = "create table `test`(`a` int(11), `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -569,7 +568,7 @@ func TestCompareStruct(t *testing.T) { // column type not compatible createTableSQL2 = "create table `test`(`a` int, `b` varchar(10), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -578,7 +577,7 @@ func TestCompareStruct(t *testing.T) { // column properties not compatible createTableSQL2 = "create table `test`(`a` int, `b` varchar(11), `c` int, `d` datetime, primary key(`a`, `b`), index(`c`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -589,7 +588,7 @@ func TestCompareStruct(t *testing.T) { // index different createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -600,11 +599,11 @@ func TestCompareStruct(t *testing.T) { // index column different createTableSQL = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`), index(`c`))" - tableInfo, err = dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) createTableSQL2 = "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `c`), index(`c`))" - tableInfo2, err = dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err = dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) isEqual, isPanic = CompareStruct([]*model.TableInfo{tableInfo, tableInfo2}, tableInfo) @@ -612,7 +611,6 @@ func TestCompareStruct(t *testing.T) { require.False(t, isPanic) require.Equal(t, len(tableInfo.Indices), 1) require.Equal(t, tableInfo.Indices[0].Name.O, "c") - } func TestGenerateSQLBlob(t *testing.T) { @@ -631,7 +629,7 @@ func TestGenerateSQLBlob(t *testing.T) { } for _, c := range cases { - tableInfo, err := dbutil.GetTableInfoBySQL(c.createTableSql, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(c.createTableSql, parser.New()) require.NoError(t, err) replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test") @@ -643,7 +641,7 @@ func TestGenerateSQLBlob(t *testing.T) { func TestCompareBlob(t *testing.T) { createTableSQL := "create table `test`.`test`(`a` int primary key, `b` blob)" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) _, orderKeyCols := GetTableRowsQueryFormat("test", "test", tableInfo, "123") From 19d3529a10a531b453e27bdaa3a7c2848846fee7 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 17:03:45 +0800 Subject: [PATCH 05/22] Fix lint --- go.mod | 2 +- .../checkpoints/checkpoints.go | 40 +-- .../checkpoints/checkpoints_test.go | 6 +- sync_diff_inspector/chunk/chunk.go | 161 +++++++----- sync_diff_inspector/chunk/chunk_test.go | 12 +- sync_diff_inspector/config/config.go | 26 +- sync_diff_inspector/config/config_test.go | 2 +- sync_diff_inspector/config/dm.go | 13 +- sync_diff_inspector/config/template.go | 3 +- sync_diff_inspector/diff/diff.go | 57 ++-- sync_diff_inspector/main.go | 6 +- sync_diff_inspector/progress/progress.go | 243 +++++++++--------- sync_diff_inspector/progress/progress_test.go | 4 +- sync_diff_inspector/report/report.go | 19 +- sync_diff_inspector/report/report_test.go | 118 ++++----- sync_diff_inspector/source/chunks_iter.go | 7 +- .../source/common/common_test.go | 3 +- sync_diff_inspector/source/common/rows.go | 14 +- .../source/common/table_diff.go | 8 +- sync_diff_inspector/source/mysql_shard.go | 49 +++- sync_diff_inspector/source/source.go | 33 ++- sync_diff_inspector/source/source_test.go | 44 ++-- sync_diff_inspector/source/tidb.go | 37 ++- sync_diff_inspector/splitter/bucket.go | 7 + .../splitter/index_fields_test.go | 8 +- sync_diff_inspector/splitter/limit.go | 15 +- sync_diff_inspector/splitter/random.go | 13 +- sync_diff_inspector/splitter/splitter.go | 11 +- sync_diff_inspector/splitter/splitter_test.go | 4 +- sync_diff_inspector/utils/pd.go | 2 + sync_diff_inspector/utils/table.go | 21 +- sync_diff_inspector/utils/utils.go | 55 ++-- sync_diff_inspector/utils/utils_test.go | 26 +- 33 files changed, 618 insertions(+), 451 deletions(-) diff --git a/go.mod b/go.mod index 8ce5bf41de4..d0aec8d663a 100644 --- a/go.mod +++ b/go.mod @@ -346,7 +346,7 @@ require ( github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c // indirect github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect - github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 // indirect + github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go index 8863cad1c51..82b4def15ec 100644 --- a/sync_diff_inspector/checkpoints/checkpoints.go +++ b/sync_diff_inspector/checkpoints/checkpoints.go @@ -30,22 +30,23 @@ import ( ) const ( - // SuccessState - // for chunk: means this chunk's data is equal + // SuccessState means + // for chunk: this chunk's data is equal // for table: means this all chunk in this table is equal(except ignore chunk) SuccessState = "success" - // FailedState - // for chunk: means this chunk's data is not equal - // for table: means some chunks' data is not equal or some chunk check failed in this table + // FailedState means + // for chunk: this chunk's data is not equal + // for table: some chunks' data is not equal or some chunk check failed in this table FailedState = "failed" - // IgnoreState + // IgnoreState means // for chunk: this chunk is ignored. if it is Empty chunk, will ignore some chunk // for table: don't have this state IgnoreState = "ignore" ) +// Node is the struct for node type Node struct { State string `json:"state"` // indicate the state ("success" or "failed") of the chunk @@ -53,16 +54,22 @@ type Node struct { IndexID int64 `json:"index-id"` } -func (n *Node) GetID() *chunk.ChunkID { return n.ChunkRange.Index } +// GetID returns id from the node +func (n *Node) GetID() *chunk.CID { return n.ChunkRange.Index } +// GetState returns the state from the node func (n *Node) GetState() string { return n.State } +// GetTableIndex returns table index func (n *Node) GetTableIndex() int { return n.ChunkRange.Index.TableIndex } +// GetBucketIndexLeft returns BucketIndexLeft func (n *Node) GetBucketIndexLeft() int { return n.ChunkRange.Index.BucketIndexLeft } +// GetBucketIndexRight returns BucketIndexRight func (n *Node) GetBucketIndexRight() int { return n.ChunkRange.Index.BucketIndexRight } +// GetChunkIndex returns ChunkIndex func (n *Node) GetChunkIndex() int { return n.ChunkRange.Index.ChunkIndex } // IsAdjacent represents whether the next node is adjacent node. @@ -121,7 +128,7 @@ type Checkpoint struct { hp *nodeHeap } -// SaveState contains the information of the latest checked chunk and state of `report` +// SavedState contains the information of the latest checked chunk and state of `report` // When sync-diff start from the checkpoint, it will load this information and continue running type SavedState struct { Chunk *Node `json:"chunk-info"` @@ -133,37 +140,39 @@ func (cp *Checkpoint) InitCurrentSavedID(n *Node) { cp.hp.CurrentSavedNode = n } +// GetCurrentSavedID returns the saved id with lock func (cp *Checkpoint) GetCurrentSavedID() *Node { cp.hp.mu.Lock() defer cp.hp.mu.Unlock() return cp.hp.CurrentSavedNode } +// Insert inserts a new node func (cp *Checkpoint) Insert(node *Node) { cp.hp.mu.Lock() heap.Push(cp.hp, node) cp.hp.mu.Unlock() } -// Len - get the length of the heap +// Len gets the length of the heap func (hp *nodeHeap) Len() int { return len(hp.Nodes) } -// Less - determine which is more priority than another +// Less determines which is more priority than another func (hp *nodeHeap) Less(i, j int) bool { return hp.Nodes[i].IsLess(hp.Nodes[j]) } -// Swap - implementation of swap for the heap interface +// Swap implementation of swap for the heap interface func (hp *nodeHeap) Swap(i, j int) { hp.Nodes[i], hp.Nodes[j] = hp.Nodes[j], hp.Nodes[i] } -// Push - implementation of push for the heap interface +// Push implementation of push for the heap interface func (hp *nodeHeap) Push(x interface{}) { hp.Nodes = append(hp.Nodes, x.(*Node)) } -// Pop - implementation of pop for heap interface +// Pop implementation of pop for heap interface func (hp *nodeHeap) Pop() (item interface{}) { if len(hp.Nodes) == 0 { return @@ -173,13 +182,14 @@ func (hp *nodeHeap) Pop() (item interface{}) { return } +// Init initialize the Checkpoint func (cp *Checkpoint) Init() { hp := &nodeHeap{ mu: &sync.Mutex{}, Nodes: make([]*Node, 0), CurrentSavedNode: &Node{ ChunkRange: &chunk.Range{ - Index: chunk.GetInitChunkID(), + Index: chunk.GetInitCID(), IsFirst: true, IsLast: true, }, @@ -202,7 +212,7 @@ func (cp *Checkpoint) GetChunkSnapshot() (cur *Node) { } // SaveChunk saves the chunk to file. -func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, reportInfo *report.Report) (*chunk.ChunkID, error) { +func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, reportInfo *report.Report) (*chunk.CID, error) { if cur == nil { return nil, nil } diff --git a/sync_diff_inspector/checkpoints/checkpoints_test.go b/sync_diff_inspector/checkpoints/checkpoints_test.go index 29b1a76a586..eb73274ae5b 100644 --- a/sync_diff_inspector/checkpoints/checkpoints_test.go +++ b/sync_diff_inspector/checkpoints/checkpoints_test.go @@ -41,7 +41,7 @@ func TestSaveChunk(t *testing.T) { go func(i int) { node := &Node{ ChunkRange: &chunk.Range{ - Index: &chunk.ChunkID{ + Index: &chunk.CID{ TableIndex: 0, BucketIndexLeft: i / 10, BucketIndexRight: i / 10, @@ -74,7 +74,7 @@ func TestSaveChunk(t *testing.T) { require.NotNil(t, cur) id, err = checker.SaveChunk(ctx, "TestSaveChunk", cur, nil) require.NoError(t, err) - require.Equal(t, id.Compare(&chunk.ChunkID{TableIndex: 0, BucketIndexLeft: 9, BucketIndexRight: 9, ChunkIndex: 9}), 0) + require.Equal(t, id.Compare(&chunk.CID{TableIndex: 0, BucketIndexLeft: 9, BucketIndexRight: 9, ChunkIndex: 9}), 0) } func TestLoadChunk(t *testing.T) { @@ -96,7 +96,7 @@ func TestLoadChunk(t *testing.T) { HasUpper: i != rounds, }, }, - Index: &chunk.ChunkID{ + Index: &chunk.CID{ TableIndex: 0, BucketIndexLeft: i / 10, BucketIndexRight: i / 10, diff --git a/sync_diff_inspector/chunk/chunk.go b/sync_diff_inspector/chunk/chunk.go index 6943f413d96..91ef5f27d7a 100644 --- a/sync_diff_inspector/chunk/chunk.go +++ b/sync_diff_inspector/chunk/chunk.go @@ -31,10 +31,12 @@ const ( gt = ">" ) -type ChunkType int +// Type is the type of the chunk +type Type int +// List all chunk types const ( - Bucket ChunkType = iota + 1 + Bucket Type = iota + 1 Random Limit Others @@ -51,8 +53,8 @@ type Bound struct { HasUpper bool `json:"has-upper"` } -// ChunkID is to identify the sequence of chunks -type ChunkID struct { +// CID is to identify the sequence of chunks +type CID struct { TableIndex int `json:"table-index"` // we especially treat random split has only one bucket // which is the whole table @@ -66,8 +68,9 @@ type ChunkID struct { ChunkCnt int `json:"chunk-count"` } -func GetInitChunkID() *ChunkID { - return &ChunkID{ +// GetInitCID return an empty CID +func GetInitCID() *CID { + return &CID{ TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, @@ -76,7 +79,8 @@ func GetInitChunkID() *ChunkID { } } -func (c *ChunkID) Compare(o *ChunkID) int { +// Compare compare two CIDs +func (c *CID) Compare(o *CID) int { if c.TableIndex < o.TableIndex { return -1 } @@ -101,16 +105,19 @@ func (c *ChunkID) Compare(o *ChunkID) int { return 1 } -func (c *ChunkID) Copy() *ChunkID { +// Copy return a same CID +func (c *CID) Copy() *CID { cp := *c return &cp } -func (c *ChunkID) ToString() string { +// ToString return string for CID +func (c *CID) ToString() string { return fmt.Sprintf("%d:%d-%d:%d:%d", c.TableIndex, c.BucketIndexLeft, c.BucketIndexRight, c.ChunkIndex, c.ChunkCnt) } -func (c *ChunkID) FromString(s string) error { +// FromString get CID from given string +func (c *CID) FromString(s string) error { ids := strings.Split(s, ":") tableIndex, err := strconv.Atoi(ids[0]) if err != nil { @@ -141,11 +148,11 @@ func (c *ChunkID) FromString(s string) error { // Range represents chunk range type Range struct { - Index *ChunkID `json:"index"` - Type ChunkType `json:"type"` - Bounds []*Bound `json:"bounds"` - IsFirst bool `json:"is-first"` - IsLast bool `json:"is-last"` + Index *CID `json:"index"` + Type Type `json:"type"` + Bounds []*Bound `json:"bounds"` + IsFirst bool `json:"is-first"` + IsLast bool `json:"is-last"` Where string `json:"where"` Args []interface{} `json:"args"` @@ -153,10 +160,12 @@ type Range struct { columnOffset map[string]int } +// IsFirstChunkForBucket return true if it's the first chunk func (r *Range) IsFirstChunkForBucket() bool { return r.Index.ChunkIndex == 0 } +// IsLastChunkForBucket return true if it's the last chunk func (r *Range) IsLastChunkForBucket() bool { return r.Index.ChunkIndex == r.Index.ChunkCnt-1 } @@ -166,7 +175,7 @@ func NewChunkRange() *Range { return &Range{ Bounds: make([]*Bound, 0, 2), columnOffset: make(map[string]int), - Index: &ChunkID{}, + Index: &CID{}, } } @@ -186,12 +195,13 @@ func NewChunkRangeOffset(columnOffset map[string]int) *Range { } } -func (c *Range) IsLastChunkForTable() bool { - if c.IsLast { +// IsLastChunkForTable return true if it's the last chunk +func (r *Range) IsLastChunkForTable() bool { + if r.IsLast { return true } // calculate from bounds - for _, b := range c.Bounds { + for _, b := range r.Bounds { if b.HasUpper { return false } @@ -199,12 +209,13 @@ func (c *Range) IsLastChunkForTable() bool { return true } -func (c *Range) IsFirstChunkForTable() bool { - if c.IsFirst { +// IsFirstChunkForTable return true if it's the first chunk +func (r *Range) IsFirstChunkForTable() bool { + if r.IsFirst { return true } // calculate from bounds - for _, b := range c.Bounds { + for _, b := range r.Bounds { if b.HasLower { return false } @@ -213,8 +224,8 @@ func (c *Range) IsFirstChunkForTable() bool { } // String returns the string of Range, used for log. -func (c *Range) String() string { - chunkBytes, err := json.Marshal(c) +func (r *Range) String() string { + chunkBytes, err := json.Marshal(r) if err != nil { log.Warn("fail to encode chunk into string", zap.Error(err)) return "" @@ -223,7 +234,8 @@ func (c *Range) String() string { return string(chunkBytes) } -func (c *Range) ToString(collation string) (string, []interface{}) { +// ToString return string for range +func (r *Range) ToString(collation string) (string, []interface{}) { if collation != "" { collation = fmt.Sprintf(" COLLATE '%s'", collation) } @@ -246,8 +258,8 @@ func (c *Range) ToString(collation string) (string, []interface{}) { preConditionArgsForUpper := make([]interface{}, 0, 1) i := 0 - for ; i < len(c.Bounds); i++ { - bound := c.Bounds[i] + for ; i < len(r.Bounds); i++ { + bound := r.Bounds[i] if !(bound.HasLower && bound.HasUpper) { break } @@ -260,16 +272,16 @@ func (c *Range) ToString(collation string) (string, []interface{}) { sameArgs = append(sameArgs, bound.Lower) } - if i == len(c.Bounds) && i > 0 { + if i == len(r.Bounds) && i > 0 { // All the columns are equal in bounds, should return FALSE! return "FALSE", nil } - for ; i < len(c.Bounds); i++ { - bound := c.Bounds[i] + for ; i < len(r.Bounds); i++ { + bound := r.Bounds[i] lowerSymbol := gt upperSymbol := lt - if i == len(c.Bounds)-1 { + if i == len(r.Bounds)-1 { upperSymbol = lte } @@ -312,28 +324,29 @@ func (c *Range) ToString(collation string) (string, []interface{}) { } return fmt.Sprintf("(%s) AND (%s)", strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(lowerArgs, upperArgs...) - } else { - if len(upperCondition) == 0 && len(lowerCondition) == 0 { - return strings.Join(sameCondition, " AND "), sameArgs - } + } - if len(upperCondition) == 0 { - return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR ")), append(sameArgs, lowerArgs...) - } + if len(upperCondition) == 0 && len(lowerCondition) == 0 { + return strings.Join(sameCondition, " AND "), sameArgs + } - if len(lowerCondition) == 0 { - return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(upperCondition, " OR ")), append(sameArgs, upperArgs...) - } + if len(upperCondition) == 0 { + return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR ")), append(sameArgs, lowerArgs...) + } - return fmt.Sprintf("(%s) AND (%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(append(sameArgs, lowerArgs...), upperArgs...) + if len(lowerCondition) == 0 { + return fmt.Sprintf("(%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(upperCondition, " OR ")), append(sameArgs, upperArgs...) } + + return fmt.Sprintf("(%s) AND (%s) AND (%s)", strings.Join(sameCondition, " AND "), strings.Join(lowerCondition, " OR "), strings.Join(upperCondition, " OR ")), append(append(sameArgs, lowerArgs...), upperArgs...) } -func (c *Range) ToMeta() string { +// ToMeta return string for range +func (r *Range) ToMeta() string { lowerCondition := make([]string, 0, 1) upperCondition := make([]string, 0, 1) columnName := make([]string, 0, 1) - for _, bound := range c.Bounds { + for _, bound := range r.Bounds { columnName = append(columnName, bound.Column) if bound.HasLower { lowerCondition = append(lowerCondition, bound.Lower) @@ -354,28 +367,29 @@ func (c *Range) ToMeta() string { return fmt.Sprintf("range in sequence: (%s) < (%s) <= (%s)", strings.Join(lowerCondition, ","), strings.Join(columnName, ","), strings.Join(upperCondition, ",")) } -func (c *Range) addBound(bound *Bound) { - c.Bounds = append(c.Bounds, bound) - c.columnOffset[bound.Column] = len(c.Bounds) - 1 +func (r *Range) addBound(bound *Bound) { + r.Bounds = append(r.Bounds, bound) + r.columnOffset[bound.Column] = len(r.Bounds) - 1 } -func (c *Range) Update(column, lower, upper string, updateLower, updateUpper bool) { - if offset, ok := c.columnOffset[column]; ok { +// Update update the range +func (r *Range) Update(column, lower, upper string, updateLower, updateUpper bool) { + if offset, ok := r.columnOffset[column]; ok { // update the bound if updateLower { - c.Bounds[offset].Lower = lower - c.Bounds[offset].HasLower = true + r.Bounds[offset].Lower = lower + r.Bounds[offset].HasLower = true } if updateUpper { - c.Bounds[offset].Upper = upper - c.Bounds[offset].HasUpper = true + r.Bounds[offset].Upper = upper + r.Bounds[offset].HasUpper = true } return } // add a new bound - c.addBound(&Bound{ + r.addBound(&Bound{ Column: column, Lower: lower, Upper: upper, @@ -384,9 +398,10 @@ func (c *Range) Update(column, lower, upper string, updateLower, updateUpper boo }) } -func (c *Range) Copy() *Range { +// Copy return a new range +func (r *Range) Copy() *Range { newChunk := NewChunkRange() - for _, bound := range c.Bounds { + for _, bound := range r.Bounds { newChunk.addBound(&Bound{ Column: bound.Column, Lower: bound.Lower, @@ -399,9 +414,10 @@ func (c *Range) Copy() *Range { return newChunk } -func (c *Range) Clone() *Range { +// Clone return a new range +func (r *Range) Clone() *Range { newChunk := NewChunkRange() - for _, bound := range c.Bounds { + for _, bound := range r.Bounds { newChunk.addBound(&Bound{ Column: bound.Column, Lower: bound.Lower, @@ -410,28 +426,30 @@ func (c *Range) Clone() *Range { HasUpper: bound.HasUpper, }) } - newChunk.Type = c.Type - newChunk.Where = c.Where - newChunk.Args = c.Args - for i, v := range c.columnOffset { + newChunk.Type = r.Type + newChunk.Where = r.Where + newChunk.Args = r.Args + for i, v := range r.columnOffset { newChunk.columnOffset[i] = v } - newChunk.Index = c.Index.Copy() - newChunk.IsFirst = c.IsFirst - newChunk.IsLast = c.IsLast + newChunk.Index = r.Index.Copy() + newChunk.IsFirst = r.IsFirst + newChunk.IsLast = r.IsLast return newChunk } -func (c *Range) CopyAndUpdate(column, lower, upper string, updateLower, updateUpper bool) *Range { - newChunk := c.Copy() +// CopyAndUpdate update the range +func (r *Range) CopyAndUpdate(column, lower, upper string, updateLower, updateUpper bool) *Range { + newChunk := r.Copy() newChunk.Update(column, lower, upper, updateLower, updateUpper) return newChunk } +// InitChunks init the given chunks // Notice: chunk may contain not only one bucket, which can be expressed as a range [3, 5], // // And `lastBucketID` means the `5` and `firstBucketID` means the `3`. -func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, index int, collation, limits string, chunkCnt int) { +func InitChunks(chunks []*Range, t Type, firstBucketID, lastBucketID int, index int, collation, limits string, chunkCnt int) { if chunks == nil { return } @@ -439,7 +457,7 @@ func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, i conditions, args := chunk.ToString(collation) chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits) chunk.Args = args - chunk.Index = &ChunkID{ + chunk.Index = &CID{ BucketIndexLeft: firstBucketID, BucketIndexRight: lastBucketID, ChunkIndex: index, @@ -450,11 +468,12 @@ func InitChunks(chunks []*Range, t ChunkType, firstBucketID, lastBucketID int, i } } -func InitChunk(chunk *Range, t ChunkType, firstBucketID, lastBucketID int, collation, limits string) { +// InitChunk initialize the chunk +func InitChunk(chunk *Range, t Type, firstBucketID, lastBucketID int, collation, limits string) { conditions, args := chunk.ToString(collation) chunk.Where = fmt.Sprintf("((%s) AND (%s))", conditions, limits) chunk.Args = args - chunk.Index = &ChunkID{ + chunk.Index = &CID{ BucketIndexLeft: firstBucketID, BucketIndexRight: lastBucketID, ChunkIndex: 0, diff --git a/sync_diff_inspector/chunk/chunk_test.go b/sync_diff_inspector/chunk/chunk_test.go index 694bea6f949..31dca1caa06 100644 --- a/sync_diff_inspector/chunk/chunk_test.go +++ b/sync_diff_inspector/chunk/chunk_test.go @@ -511,7 +511,7 @@ func TestChunkCopyAndUpdate(t *testing.T) { } func TestChunkID(t *testing.T) { - chunkIDBase := &ChunkID{ + chunkIDBase := &CID{ TableIndex: 2, BucketIndexLeft: 2, BucketIndexRight: 2, @@ -521,11 +521,11 @@ func TestChunkID(t *testing.T) { str := chunkIDBase.ToString() require.Equal(t, str, "2:2-2:2:4") - chunkIDtmp := &ChunkID{} + chunkIDtmp := &CID{} chunkIDtmp.FromString(str) require.Equal(t, chunkIDBase.Compare(chunkIDtmp), 0) - chunkIDSmalls := []*ChunkID{ + chunkIDSmalls := []*CID{ { TableIndex: 1, BucketIndexLeft: 3, @@ -557,12 +557,12 @@ func TestChunkID(t *testing.T) { require.Equal(t, chunkIDBase.Compare(chunkIDSmall), 1) str = chunkIDSmall.ToString() require.Equal(t, str, stringRes[i]) - chunkIDtmp = &ChunkID{} + chunkIDtmp = &CID{} chunkIDtmp.FromString(str) require.Equal(t, chunkIDSmall.Compare(chunkIDtmp), 0) } - chunkIDLarges := []*ChunkID{ + chunkIDLarges := []*CID{ { TableIndex: 3, BucketIndexLeft: 1, @@ -594,7 +594,7 @@ func TestChunkID(t *testing.T) { require.Equal(t, chunkIDBase.Compare(chunkIDLarge), -1) str = chunkIDLarge.ToString() require.Equal(t, str, stringRes[i]) - chunkIDtmp = &ChunkID{} + chunkIDtmp = &CID{} chunkIDtmp.FromString(str) require.Equal(t, chunkIDLarge.Compare(chunkIDtmp), 0) } diff --git a/sync_diff_inspector/config/config.go b/sync_diff_inspector/config/config.go index 789176ed37d..11f613a299e 100644 --- a/sync_diff_inspector/config/config.go +++ b/sync_diff_inspector/config/config.go @@ -44,13 +44,17 @@ import ( ) const ( - LocalDirPerm os.FileMode = 0o755 + // LocalFilePerm is the permission for local files LocalFilePerm os.FileMode = 0o644 + localDirPerm os.FileMode = 0o755 + + // LogFileName is the filename of the log LogFileName = "sync_diff.log" baseSplitThreadCount = 3 + // UnifiedTimeZone is the time zone UnifiedTimeZone string = "+0:00" ) @@ -92,7 +96,7 @@ func (t *TableConfig) Valid() bool { return true } -// TLS Security wrapper +// Security is the wrapper for TLS Security type Security struct { TLSName string `json:"tls-name"` @@ -112,7 +116,7 @@ type DataSource struct { Port int `toml:"port" json:"port"` User string `toml:"user" json:"user"` Password utils.SecretString `toml:"password" json:"password"` - SqlMode string `toml:"sql-mode" json:"sql-mode"` + SQLMode string `toml:"sql-mode" json:"sql-mode"` Snapshot string `toml:"snapshot" json:"snapshot"` Security *Security `toml:"security" json:"security"` @@ -136,6 +140,7 @@ func (d *DataSource) SetSnapshot(newSnapshot string) { d.Snapshot = newSnapshot } +// ToDBConfig get the current config from data source func (d *DataSource) ToDBConfig() *dbutil.DBConfig { return &dbutil.DBConfig{ Host: d.Host, @@ -146,7 +151,7 @@ func (d *DataSource) ToDBConfig() *dbutil.DBConfig { } } -// register TLS config for driver +// RegisterTLS register TLS config for driver func (d *DataSource) RegisterTLS() error { if d.Security == nil { return nil @@ -173,6 +178,7 @@ func (d *DataSource) RegisterTLS() error { return errors.Trace(err) } +// ToDriverConfig get the driver config func (d *DataSource) ToDriverConfig() *mysql.Config { cfg := mysql.NewConfig() cfg.Params = make(map[string]string) @@ -195,6 +201,7 @@ func (d *DataSource) ToDriverConfig() *mysql.Config { return cfg } +// TaskConfig is the config for sync diff type TaskConfig struct { Source []string `toml:"source-instances" json:"source-instances"` Routes []string `toml:"source-routes" json:"source-routes"` @@ -219,6 +226,7 @@ type TaskConfig struct { HashFile string } +// Init return a new config func (t *TaskConfig) Init( dataSources map[string]*DataSource, tableConfigs map[string]*TableConfig, @@ -310,7 +318,7 @@ func (t *TaskConfig) Init( } if !ok { // not match, raise error - return errors.Errorf("config changes breaking the checkpoint, please use another outputDir and start over again!") + return errors.Errorf("config changes breaking the checkpoint, please use another outputDir and start over again") } } @@ -522,7 +530,7 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) { Port: subTaskCfgs[0].To.Port, User: subTaskCfgs[0].To.User, Password: utils.SecretString(subTaskCfgs[0].To.Password), - SqlMode: sqlMode, + SQLMode: sqlMode, Security: parseTLSFromDMConfig(subTaskCfgs[0].To.Security), } for _, subTaskCfg := range subTaskCfgs { @@ -543,7 +551,7 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) { Port: subTaskCfg.From.Port, User: subTaskCfg.From.User, Password: utils.SecretString(subTaskCfg.From.Password), - SqlMode: sqlMode, + SQLMode: sqlMode, Security: parseTLSFromDMConfig(subTaskCfg.From.Security), Router: tableRouter, @@ -561,6 +569,7 @@ func (c *Config) adjustConfigByDMSubTasks() (err error) { return nil } +// Init initialize the config func (c *Config) Init() (err error) { if len(c.DMAddr) > 0 { err := c.adjustConfigByDMSubTasks() @@ -599,6 +608,7 @@ func (c *Config) Init() (err error) { return nil } +// CheckConfig check whether the config is vaild func (c *Config) CheckConfig() bool { if c.CheckThreadCount <= 0 { log.Error("check-thread-count must greater than 0!") @@ -636,7 +646,7 @@ func pathExists(_path string) (bool, error) { func mkdirAll(base string) error { mask := syscall.Umask(0) - err := os.MkdirAll(base, LocalDirPerm) + err := os.MkdirAll(base, localDirPerm) syscall.Umask(mask) return errors.Trace(err) } diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go index 32d2a9f40ce..5a36caa4504 100644 --- a/sync_diff_inspector/config/config_test.go +++ b/sync_diff_inspector/config/config_test.go @@ -91,7 +91,7 @@ func TestNoSecretLeak(t *testing.T) { Port: 5432, User: "postgres", Password: "AVeryV#ryStr0ngP@ssw0rd", - SqlMode: "MYSQL", + SQLMode: "MYSQL", Snapshot: "2022/10/24", } cfg := &Config{} diff --git a/sync_diff_inspector/config/dm.go b/sync_diff_inspector/config/dm.go index 4591b74f79e..5179414036d 100644 --- a/sync_diff_inspector/config/dm.go +++ b/sync_diff_inspector/config/dm.go @@ -15,6 +15,7 @@ package config import ( "bytes" + "context" "crypto/aes" "crypto/cipher" "encoding/base64" @@ -33,7 +34,6 @@ import ( "github.com/pingcap/tiflow/dm/pb" "github.com/pingcap/tiflow/dm/pkg/terror" "github.com/pingcap/tiflow/pkg/column-mapping" - flag "github.com/spf13/pflag" "go.uber.org/zap" ) @@ -53,7 +53,7 @@ func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) { // TLSClientConfig: tlsCfg, } client := &http.Client{Transport: tr} - req, err := http.NewRequest("GET", getDMTaskCfgURL(dmAddr, task), nil) + req, err := http.NewRequestWithContext(context.Background(), "GET", getDMTaskCfgURL(dmAddr, task), nil) if err != nil { return nil, err } @@ -95,12 +95,6 @@ func getDMTaskCfg(dmAddr, task string) ([]*SubTaskConfig, error) { // SubTaskConfig is the configuration for SubTask. type SubTaskConfig struct { - // BurntSushi/toml seems have a bug for flag "-" - // when doing encoding, if we use `toml:"-"`, it still try to encode it - // and it will panic because of unsupported type (reflect.Func) - // so we should not export flagSet - flagSet *flag.FlagSet - // when in sharding, multi dm-workers do one task IsSharding bool `toml:"is-sharding" json:"is-sharding"` ShardMode string `toml:"shard-mode" json:"shard-mode"` @@ -173,9 +167,6 @@ type SubTaskConfig struct { // deprecated, will auto discover SQL mode EnableANSIQuotes bool `toml:"ansi-quotes" json:"ansi-quotes"` - // still needed by Syncer / Loader bin - printVersion bool - // which DM worker is running the subtask, this will be injected when the real worker starts running the subtask(StartSubTask). WorkerName string `toml:"-" json:"-"` // task experimental configs diff --git a/sync_diff_inspector/config/template.go b/sync_diff_inspector/config/template.go index 0296856520e..189d211a53a 100644 --- a/sync_diff_inspector/config/template.go +++ b/sync_diff_inspector/config/template.go @@ -111,6 +111,7 @@ collation = "" ` ) +// ExportTemplateConfig print the configType func ExportTemplateConfig(configType string) error { switch configType { case "dm", "DM", "Dm", "dM": @@ -118,7 +119,7 @@ func ExportTemplateConfig(configType string) error { case "norm", "normal", "Norm", "Normal": fmt.Print(normConfig) default: - return errors.Errorf("Error: unexpect template name: %s\n-T dm: export a dm config\n-T norm: export a normal config\n", configType) + return errors.Errorf("Error: unexpect template name: %s\n-T dm: export a dm config\n-T norm: export a normal config", configType) } return nil } diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go index 239e7a84f81..b2eec9afd17 100644 --- a/sync_diff_inspector/diff/diff.go +++ b/sync_diff_inspector/diff/diff.go @@ -100,6 +100,7 @@ func NewDiff(ctx context.Context, cfg *config.Config) (diff *Diff, err error) { return diff, nil } +// PrintSummary print the summary and return true if report is passed func (df *Diff) PrintSummary(ctx context.Context) bool { // Stop updating progress bar so that summary won't be flushed. progress.Close() @@ -112,6 +113,7 @@ func (df *Diff) PrintSummary(ctx context.Context) bool { return df.report.Result == report.Pass } +// Close the current struct func (df *Diff) Close() { if df.upstream != nil { df.upstream.Close() @@ -163,15 +165,15 @@ func (df *Diff) initCheckpoint() error { node, reportInfo, err := df.cp.LoadChunk(path) if err != nil { return errors.Annotate(err, "the checkpoint load process failed") - } else { - // this need not be synchronized, because at the moment, the is only one thread access the section - log.Info("load checkpoint", - zap.Any("chunk index", node.GetID()), - zap.Reflect("chunk", node), - zap.String("state", node.GetState())) - df.cp.InitCurrentSavedID(node) } + // this need not be synchronized, because at the moment, the is only one thread access the section + log.Info("load checkpoint", + zap.Any("chunk index", node.GetID()), + zap.Reflect("chunk", node), + zap.String("state", node.GetState())) + df.cp.InitCurrentSavedID(node) + if node != nil { // remove the sql file that ID bigger than node. // cause we will generate these sql again. @@ -189,7 +191,7 @@ func (df *Diff) initCheckpoint() error { } } else { log.Info("not found checkpoint file, start from beginning") - id := &chunk.ChunkID{TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, ChunkIndex: -1, ChunkCnt: 0} + id := &chunk.CID{TableIndex: -1, BucketIndexLeft: -1, BucketIndexRight: -1, ChunkIndex: -1, ChunkCnt: 0} err := df.removeSQLFiles(id) if err != nil { return errors.Trace(err) @@ -199,7 +201,7 @@ func (df *Diff) initCheckpoint() error { return nil } -func encodeReportConfig(config *report.ReportConfig) ([]byte, error) { +func encodeConfig(config *report.Config) ([]byte, error) { buf := new(bytes.Buffer) if err := toml.NewEncoder(buf).Encode(config); err != nil { return nil, errors.Trace(err) @@ -208,35 +210,35 @@ func encodeReportConfig(config *report.ReportConfig) ([]byte, error) { } func getConfigsForReport(cfg *config.Config) ([][]byte, []byte, error) { - sourceConfigs := make([]*report.ReportConfig, len(cfg.Task.SourceInstances)) + sourceConfigs := make([]*report.Config, len(cfg.Task.SourceInstances)) for i := 0; i < len(cfg.Task.SourceInstances); i++ { instance := cfg.Task.SourceInstances[i] - sourceConfigs[i] = &report.ReportConfig{ + sourceConfigs[i] = &report.Config{ Host: instance.Host, Port: instance.Port, User: instance.User, Snapshot: instance.Snapshot, - SqlMode: instance.SqlMode, + SQLMode: instance.SQLMode, } } instance := cfg.Task.TargetInstance - targetConfig := &report.ReportConfig{ + targetConfig := &report.Config{ Host: instance.Host, Port: instance.Port, User: instance.User, Snapshot: instance.Snapshot, - SqlMode: instance.SqlMode, + SQLMode: instance.SQLMode, } sourceBytes := make([][]byte, len(sourceConfigs)) var err error for i := range sourceBytes { - sourceBytes[i], err = encodeReportConfig(sourceConfigs[i]) + sourceBytes[i], err = encodeConfig(sourceConfigs[i]) if err != nil { return nil, nil, errors.Trace(err) } } - targetBytes, err := encodeReportConfig(targetConfig) + targetBytes, err := encodeConfig(targetConfig) if err != nil { return nil, nil, errors.Trace(err) } @@ -290,6 +292,7 @@ func (df *Diff) Equal(ctx context.Context) error { return nil } +// StructEqual compare tables from downstream func (df *Diff) StructEqual(ctx context.Context) error { tables := df.downstream.GetTables() tableIndex := 0 @@ -464,6 +467,7 @@ func (df *Diff) consume(ctx context.Context, rangeInfo *splitter.RangeInfo) bool return isEqual } +// BinGenerate ... func (df *Diff) BinGenerate(ctx context.Context, targetSource source.Source, tableRange *splitter.RangeInfo, count int64) (*splitter.RangeInfo, error) { if count <= splitter.SplitThreshold { return tableRange, nil @@ -570,11 +574,11 @@ func (df *Diff) binSearch(ctx context.Context, targetSource source.Source, table return nil, errors.Trace(err) } return c, nil - } else { - // TODO: handle the error to foreground - log.Fatal("the isEqual1 and isEqual2 cannot be both true") - return nil, nil } + + // TODO: handle the error to foreground + log.Fatal("the isEqual1 and isEqual2 cannot be both true") + return nil, nil } func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *splitter.RangeInfo) (bool, int64, int64, error) { @@ -583,9 +587,9 @@ func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *spli wg.Add(1) go func() { defer wg.Done() - upstreamInfo = df.upstream.GetCountAndMd5(ctx, tableRange) + upstreamInfo = df.upstream.GetCountAndMD5(ctx, tableRange) }() - downstreamInfo = df.downstream.GetCountAndMd5(ctx, tableRange) + downstreamInfo = df.downstream.GetCountAndMD5(ctx, tableRange) wg.Wait() if upstreamInfo.Err != nil { @@ -743,7 +747,6 @@ func (df *Diff) writeSQLs(ctx context.Context) { fixSQLFile, err := os.Create(fixSQLPath) if err != nil { log.Fatal("write sql failed: cannot create file", zap.Strings("sql", dml.sqls), zap.Error(err)) - continue } // write chunk meta chunkRange := dml.node.ChunkRange @@ -765,7 +768,7 @@ func (df *Diff) writeSQLs(ctx context.Context) { } } -func (df *Diff) removeSQLFiles(checkPointId *chunk.ChunkID) error { +func (df *Diff) removeSQLFiles(checkPointID *chunk.CID) error { ts := time.Now().Format("2006-01-02T15:04:05Z07:00") dirName := fmt.Sprintf(".trash-%s", ts) folderPath := filepath.Join(df.FixSQLDir, dirName) @@ -807,17 +810,17 @@ func (df *Diff) removeSQLFiles(checkPointId *chunk.ChunkID) error { if len(fileIDSubstrs) != 3 { return nil } - tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := utils.GetChunkIDFromSQLFileName(fileIDSubstrs[2]) + tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := utils.GetCIDFromSQLFileName(fileIDSubstrs[2]) if err != nil { return errors.Trace(err) } - fileID := &chunk.ChunkID{ + fileID := &chunk.CID{ TableIndex: tableIndex, BucketIndexLeft: bucketIndexLeft, BucketIndexRight: bucketIndexRight, ChunkIndex: chunkIndex, ChunkCnt: 0, } if err != nil { return errors.Trace(err) } - if fileID.Compare(checkPointId) > 0 { + if fileID.Compare(checkPointID) > 0 { // move to trash err = os.Rename(oldPath, newPath) if err != nil { diff --git a/sync_diff_inspector/main.go b/sync_diff_inspector/main.go index 761fe3f026e..4867738c34c 100644 --- a/sync_diff_inspector/main.go +++ b/sync_diff_inspector/main.go @@ -117,7 +117,7 @@ func checkSyncState(ctx context.Context, cfg *config.Config) bool { d, err := diff.NewDiff(ctx, cfg) if err != nil { - fmt.Printf("An error occured while initializing diff: %s, please check log info in %s for full details\n", + fmt.Printf("An error occurred while initializing diff: %s, please check log info in %s for full details\n", err, filepath.Join(cfg.Task.OutputDir, config.LogFileName)) log.Fatal("failed to initialize diff process", zap.Error(err)) return false @@ -127,7 +127,7 @@ func checkSyncState(ctx context.Context, cfg *config.Config) bool { if !cfg.CheckDataOnly { err = d.StructEqual(ctx) if err != nil { - fmt.Printf("An error occured while comparing table structure: %s, please check log info in %s for full details\n", + fmt.Printf("An error occurred while comparing table structure: %s, please check log info in %s for full details\n", err, filepath.Join(cfg.Task.OutputDir, config.LogFileName)) log.Fatal("failed to check structure difference", zap.Error(err)) return false @@ -138,7 +138,7 @@ func checkSyncState(ctx context.Context, cfg *config.Config) bool { if !cfg.CheckStructOnly { err = d.Equal(ctx) if err != nil { - fmt.Printf("An error occured while comparing table data: %s, please check log info in %s for full details\n", + fmt.Printf("An error occurred while comparing table data: %s, please check log info in %s for full details\n", err, filepath.Join(cfg.Task.OutputDir, config.LogFileName)) log.Fatal("failed to check data difference", zap.Error(err)) return false diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go index 3fed728a8d1..27f9ec7538a 100644 --- a/sync_diff_inspector/progress/progress.go +++ b/sync_diff_inspector/progress/progress.go @@ -24,7 +24,7 @@ import ( "github.com/pingcap/tiflow/sync_diff_inspector/source/common" ) -type TableProgressPrinter struct { +type tableProgressPrinter struct { tableList *list.List tableFailList *list.List tableMap map[string]*list.Element @@ -38,58 +38,59 @@ type TableProgressPrinter struct { progress int total int - optCh chan Operator + optCh chan operator finishCh chan struct{} } -type table_state_t int +type tableState int const ( - TABLE_STATE_REGISTER table_state_t = 0x1 - TABLE_STATE_PRESTART table_state_t = 0x2 - TABLE_STATE_COMPARING table_state_t = 0x4 - TABLE_STATE_FINISH table_state_t = 0x8 - TABLE_STATE_RESULT_OK table_state_t = 0x00 - TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE table_state_t = 0x10 - TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE table_state_t = 0x20 - TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS table_state_t = 0x40 - TABLE_STATE_RESULT_DIFFERENT table_state_t = 0x80 - TABLE_STATE_HEAD table_state_t = 0xff - TABLE_STATE_RESULT_MASK table_state_t = 0xff0 - TABLE_STATE_NOT_EXSIT_UPSTREAM table_state_t = 0x100 - TABLE_STATE_NOT_EXSIT_DOWNSTREAM table_state_t = 0x200 + tableStateRegister tableState = 0x1 + tableStatePrestart tableState = 0x2 + tableStateComparing tableState = 0x4 + tableStateFinish tableState = 0x8 + tableStateResultOK tableState = 0x00 + tableStateResultFailStructureDone tableState = 0x10 + tableStateResultFailStructureContinue tableState = 0x20 + tableStateResultFailStructurePass tableState = 0x40 + tableStateResultDifferent tableState = 0x80 + tableStateHead tableState = 0xff + tableStateResultMask tableState = 0xff0 + tableStateNotExistUpstream tableState = 0x100 + tableStateNotExistDownstream tableState = 0x200 ) +// TableProgress store the progress of one table type TableProgress struct { name string progress int total int - state table_state_t + state tableState totalStopUpdate bool } -type progress_opt_t int +type progressOpt int const ( - PROGRESS_OPT_INC progress_opt_t = iota - PROGRESS_OPT_UPDATE - PROGRESS_OPT_REGISTER - PROGRESS_OPT_START - PROGRESS_OPT_FAIL - PROGRESS_OPT_CLOSE - PROGRESS_OPT_ERROR + progressOptInc progressOpt = iota + progressOptUpdate + progressOptRegister + progressOptStart + progressOptFail + progressOptClose + progressOptError ) -type Operator struct { - optType progress_opt_t +type operator struct { + optType progressOpt name string total int - state table_state_t + state tableState totalStopUpdate bool } -func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressPrinter { - tpp := &TableProgressPrinter{ +func newTableProgressPrinter(tableNums int, finishTableNums int) *tableProgressPrinter { + tpp := &tableProgressPrinter{ tableList: list.New(), tableFailList: list.New(), tableMap: make(map[string]*list.Element), @@ -102,7 +103,7 @@ func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressP progress: 0, total: 0, - optCh: make(chan Operator, 16), + optCh: make(chan operator, 16), finishCh: make(chan struct{}), } tpp.init() @@ -111,77 +112,77 @@ func NewTableProgressPrinter(tableNums int, finishTableNums int) *TableProgressP return tpp } -func (tpp *TableProgressPrinter) SetOutput(output io.Writer) { +func (tpp *tableProgressPrinter) SetOutput(output io.Writer) { tpp.output = output } -func (tpp *TableProgressPrinter) Inc(name string) { - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_INC, +func (tpp *tableProgressPrinter) Inc(name string) { + tpp.optCh <- operator{ + optType: progressOptInc, name: name, } } -func (tpp *TableProgressPrinter) UpdateTotal(name string, total int, stopUpdate bool) { - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_UPDATE, +func (tpp *tableProgressPrinter) UpdateTotal(name string, total int, stopUpdate bool) { + tpp.optCh <- operator{ + optType: progressOptUpdate, name: name, total: total, totalStopUpdate: stopUpdate, } } -func (tpp *TableProgressPrinter) RegisterTable(name string, isFailed bool, isDone bool, isExist int) { - var state table_state_t +func (tpp *tableProgressPrinter) RegisterTable(name string, isFailed bool, isDone bool, isExist int) { + var state tableState if isFailed { if isDone { switch isExist { case common.UpstreamTableLackFlag: - state = TABLE_STATE_NOT_EXSIT_UPSTREAM | TABLE_STATE_REGISTER + state = tableStateNotExistUpstream | tableStateRegister case common.DownstreamTableLackFlag: - state = TABLE_STATE_NOT_EXSIT_DOWNSTREAM | TABLE_STATE_REGISTER + state = tableStateNotExistDownstream | tableStateRegister default: - state = TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE | TABLE_STATE_REGISTER + state = tableStateResultFailStructureDone | tableStateRegister } } else { - state = TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE | TABLE_STATE_REGISTER + state = tableStateResultFailStructureContinue | tableStateRegister } } else { - state = TABLE_STATE_REGISTER + state = tableStateRegister } - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_REGISTER, + tpp.optCh <- operator{ + optType: progressOptRegister, name: name, state: state, } } -func (tpp *TableProgressPrinter) StartTable(name string, total int, stopUpdate bool) { - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_START, +func (tpp *tableProgressPrinter) StartTable(name string, total int, stopUpdate bool) { + tpp.optCh <- operator{ + optType: progressOptStart, name: name, total: total, - state: TABLE_STATE_PRESTART, + state: tableStatePrestart, totalStopUpdate: stopUpdate, } } -func (tpp *TableProgressPrinter) FailTable(name string) { - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_FAIL, +func (tpp *tableProgressPrinter) FailTable(name string) { + tpp.optCh <- operator{ + optType: progressOptFail, name: name, - state: TABLE_STATE_RESULT_DIFFERENT, + state: tableStateResultDifferent, } } -func (tpp *TableProgressPrinter) Close() { - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_CLOSE, +func (tpp *tableProgressPrinter) Close() { + tpp.optCh <- operator{ + optType: progressOptClose, } <-tpp.finishCh } -func (tpp *TableProgressPrinter) PrintSummary() { +func (tpp *tableProgressPrinter) PrintSummary() { var cleanStr, fixStr string cleanStr = "\x1b[1A\x1b[J" fixStr = "\nSummary:\n\n" @@ -195,17 +196,17 @@ func (tpp *TableProgressPrinter) PrintSummary() { SkippedNum := 0 for p := tpp.tableFailList.Front(); p != nil; p = p.Next() { tp := p.Value.(*TableProgress) - if tp.state&(TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE|TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE) != 0 { + if tp.state&(tableStateResultFailStructureDone|tableStateResultFailStructureContinue) != 0 { fixStr = fmt.Sprintf("%sThe structure of %s is not equal.\n", fixStr, tp.name) } - if tp.state&(TABLE_STATE_RESULT_DIFFERENT) != 0 { + if tp.state&(tableStateResultDifferent) != 0 { fixStr = fmt.Sprintf("%sThe data of %s is not equal.\n", fixStr, tp.name) } - if tp.state&(TABLE_STATE_NOT_EXSIT_DOWNSTREAM) != 0 { + if tp.state&(tableStateNotExistDownstream) != 0 { fixStr = fmt.Sprintf("%sThe data of %s does not exist in downstream database.\n", fixStr, tp.name) SkippedNum++ } - if tp.state&(TABLE_STATE_NOT_EXSIT_UPSTREAM) != 0 { + if tp.state&(tableStateNotExistUpstream) != 0 { fixStr = fmt.Sprintf("%sThe data of %s does not exist in upstream database.\n", fixStr, tp.name) SkippedNum++ } @@ -219,9 +220,9 @@ func (tpp *TableProgressPrinter) PrintSummary() { fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr) } -func (tpp *TableProgressPrinter) Error(err error) { - tpp.optCh <- Operator{ - optType: PROGRESS_OPT_ERROR, +func (tpp *tableProgressPrinter) Error(err error) { + tpp.optCh <- operator{ + optType: progressOptError, } <-tpp.finishCh var cleanStr, fixStr string @@ -230,15 +231,15 @@ func (tpp *TableProgressPrinter) Error(err error) { fmt.Fprintf(tpp.output, "%s%s", cleanStr, fixStr) } -func (tpp *TableProgressPrinter) init() { +func (tpp *tableProgressPrinter) init() { tpp.tableList.PushBack(&TableProgress{ - state: TABLE_STATE_HEAD, + state: tableStateHead, }) tpp.output = os.Stdout } -func (tpp *TableProgressPrinter) serve() { +func (tpp *tableProgressPrinter) serve() { tick := time.NewTicker(200 * time.Millisecond) for { @@ -247,27 +248,27 @@ func (tpp *TableProgressPrinter) serve() { tpp.flush(false) case opt := <-tpp.optCh: switch opt.optType { - case PROGRESS_OPT_CLOSE: + case progressOptClose: tpp.flush(false) tpp.finishCh <- struct{}{} return - case PROGRESS_OPT_ERROR: + case progressOptError: tpp.finishCh <- struct{}{} return - case PROGRESS_OPT_INC: + case progressOptInc: if e, ok := tpp.tableMap[opt.name]; ok { tp := e.Value.(*TableProgress) tp.progress++ tpp.progress++ if tp.progress >= tp.total && tp.totalStopUpdate { - tp.state = (tp.state & TABLE_STATE_RESULT_MASK) | TABLE_STATE_FINISH + tp.state = (tp.state & tableStateResultMask) | tableStateFinish tpp.progress -= tp.progress tpp.total -= tp.total delete(tpp.tableMap, opt.name) tpp.flush(true) } } - case PROGRESS_OPT_REGISTER: + case progressOptRegister: if _, ok := tpp.tableMap[opt.name]; !ok { e := tpp.tableList.PushBack(&TableProgress{ name: opt.name, @@ -278,38 +279,38 @@ func (tpp *TableProgressPrinter) serve() { }) tpp.tableMap[opt.name] = e } - case PROGRESS_OPT_START: + case progressOptStart: e, ok := tpp.tableMap[opt.name] if !ok { e = tpp.tableList.PushBack(&TableProgress{ name: opt.name, progress: 0, total: opt.total, - state: opt.state | TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS, + state: opt.state | tableStateResultFailStructurePass, totalStopUpdate: opt.totalStopUpdate, }) tpp.tableMap[opt.name] = e } else { tp := e.Value.(*TableProgress) - tp.state ^= TABLE_STATE_REGISTER | opt.state + tp.state ^= tableStateRegister | opt.state tp.progress = 0 tp.total = opt.total tp.totalStopUpdate = opt.totalStopUpdate } - if e.Value.(*TableProgress).state&TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE == 0 { + if e.Value.(*TableProgress).state&tableStateResultFailStructureDone == 0 { tpp.total += opt.total } else { delete(tpp.tableMap, opt.name) } tpp.flush(true) - case PROGRESS_OPT_UPDATE: + case progressOptUpdate: if e, ok := tpp.tableMap[opt.name]; ok { tp := e.Value.(*TableProgress) tpp.total += opt.total tp.total += opt.total tp.totalStopUpdate = opt.totalStopUpdate } - case PROGRESS_OPT_FAIL: + case progressOptFail: if e, ok := tpp.tableMap[opt.name]; ok { tp := e.Value.(*TableProgress) tp.state |= opt.state @@ -320,8 +321,8 @@ func (tpp *TableProgressPrinter) serve() { } } -// flush flush info -func (tpp *TableProgressPrinter) flush(stateIsChanged bool) { +// flush info +func (tpp *tableProgressPrinter) flush(stateIsChanged bool) { /* * A total of 15 tables need to be compared * @@ -348,22 +349,22 @@ func (tpp *TableProgressPrinter) flush(stateIsChanged bool) { // 4. structure is different and data is same // 5. structure is different and data is different switch tp.state & 0xf { - case TABLE_STATE_PRESTART: - switch tp.state & TABLE_STATE_RESULT_MASK { - case TABLE_STATE_RESULT_OK: + case tableStatePrestart: + switch tp.state & tableStateResultMask { + case tableStateResultOK: fixStr = fmt.Sprintf("%sComparing the table structure of %s ... equivalent\n", fixStr, tp.name) dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) tpp.lines++ tpp.progressTableNums++ - tp.state = TABLE_STATE_COMPARING - case TABLE_STATE_NOT_EXSIT_UPSTREAM, TABLE_STATE_NOT_EXSIT_DOWNSTREAM: + tp.state = tableStateComparing + case tableStateNotExistUpstream, tableStateNotExistDownstream: dynStr = fmt.Sprintf("%sComparing the table data of %s ...skipped\n", dynStr, tp.name) tpp.tableFailList.PushBack(tp) preNode := p.Prev() tpp.tableList.Remove(p) p = preNode tpp.finishTableNums++ - case TABLE_STATE_RESULT_FAIL_STRUCTURE_DONE: + case tableStateResultFailStructureDone: fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name) tpp.tableFailList.PushBack(tp) // we have empty node as list head, so p is not nil @@ -371,29 +372,29 @@ func (tpp *TableProgressPrinter) flush(stateIsChanged bool) { tpp.tableList.Remove(p) p = preNode tpp.finishTableNums++ - case TABLE_STATE_RESULT_FAIL_STRUCTURE_CONTINUE: + case tableStateResultFailStructureContinue: fixStr = fmt.Sprintf("%sComparing the table structure of %s ... failure\n", fixStr, tp.name) dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) tpp.lines++ tpp.progressTableNums++ - tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART - case TABLE_STATE_RESULT_FAIL_STRUCTURE_PASS: + tp.state ^= tableStateComparing | tableStatePrestart + case tableStateResultFailStructurePass: fixStr = fmt.Sprintf("%sComparing the table structure of %s ... skip\n", fixStr, tp.name) dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) tpp.lines++ tpp.progressTableNums++ - tp.state ^= TABLE_STATE_COMPARING | TABLE_STATE_PRESTART + tp.state ^= tableStateComparing | tableStatePrestart } - case TABLE_STATE_COMPARING: + case tableStateComparing: dynStr = fmt.Sprintf("%sComparing the table data of %s ...\n", dynStr, tp.name) tpp.lines++ - case TABLE_STATE_FINISH: - if tp.state&TABLE_STATE_RESULT_DIFFERENT == 0 { + case tableStateFinish: + if tp.state&tableStateResultDifferent == 0 { fixStr = fmt.Sprintf("%sComparing the table data of %s ... equivalent\n", fixStr, tp.name) } else { fixStr = fmt.Sprintf("%sComparing the table data of %s ... failure\n", fixStr, tp.name) } - if tp.state&TABLE_STATE_RESULT_MASK != 0 { + if tp.state&tableStateResultMask != 0 { tpp.tableFailList.PushBack(tp) } // we have empty node as list head, so p is not nil @@ -418,62 +419,72 @@ func (tpp *TableProgressPrinter) flush(stateIsChanged bool) { fmt.Fprintf(tpp.output, "Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total) } -var progress_ *TableProgressPrinter = nil +var progress *tableProgressPrinter = nil +// Init initialize the printer func Init(tableNums, finishTableNums int) { - progress_ = NewTableProgressPrinter(tableNums, finishTableNums) + progress = newTableProgressPrinter(tableNums, finishTableNums) } +// Inc update the progress of one table func Inc(name string) { - if progress_ != nil { - progress_.Inc(name) + if progress != nil { + progress.Inc(name) } } +// UpdateTotal the total for given table func UpdateTotal(name string, total int, stopUpdate bool) { - if progress_ != nil { - progress_.UpdateTotal(name, total, stopUpdate) + if progress != nil { + progress.UpdateTotal(name, total, stopUpdate) } } +// RegisterTable register a new table func RegisterTable(name string, isFailed bool, isDone bool, isExist int) { - if progress_ != nil { - progress_.RegisterTable(name, isFailed, isDone, isExist) + if progress != nil { + progress.RegisterTable(name, isFailed, isDone, isExist) } } +// StartTable start a table func StartTable(name string, total int, stopUpdate bool) { - if progress_ != nil { - progress_.StartTable(name, total, stopUpdate) + if progress != nil { + progress.StartTable(name, total, stopUpdate) } } +// FailTable stop a table func FailTable(name string) { - if progress_ != nil { - progress_.FailTable(name) + if progress != nil { + progress.FailTable(name) } } +// Close close the progress printer func Close() { - if progress_ != nil { - progress_.Close() + if progress != nil { + progress.Close() } } +// PrintSummary print the summary func PrintSummary() { - if progress_ != nil { - progress_.PrintSummary() + if progress != nil { + progress.PrintSummary() } } +// Error pass the error into progress printer func Error(err error) { - if progress_ != nil { - progress_.Error(err) + if progress != nil { + progress.Error(err) } } +// SetOutput set the output for progress printer func SetOutput(output io.Writer) { - if progress_ != nil { - progress_.SetOutput(output) + if progress != nil { + progress.SetOutput(output) } } diff --git a/sync_diff_inspector/progress/progress_test.go b/sync_diff_inspector/progress/progress_test.go index 7393f93c022..3a12bc1122c 100644 --- a/sync_diff_inspector/progress/progress_test.go +++ b/sync_diff_inspector/progress/progress_test.go @@ -24,7 +24,7 @@ import ( ) func TestProgress(t *testing.T) { - p := NewTableProgressPrinter(6, 0) + p := newTableProgressPrinter(6, 0) p.RegisterTable("1", true, true, common.AllTableExistFlag) p.StartTable("1", 50, true) p.RegisterTable("2", true, false, common.AllTableExistFlag) @@ -59,7 +59,7 @@ func TestProgress(t *testing.T) { } func TestTableError(t *testing.T) { - p := NewTableProgressPrinter(4, 0) + p := newTableProgressPrinter(4, 0) p.RegisterTable("1", true, true, common.AllTableExistFlag) p.StartTable("1", 50, true) p.RegisterTable("2", true, true, common.AllTableExistFlag) diff --git a/sync_diff_inspector/report/report.go b/sync_diff_inspector/report/report.go index ba58878e8a5..cb74b3b0393 100644 --- a/sync_diff_inspector/report/report.go +++ b/sync_diff_inspector/report/report.go @@ -41,17 +41,18 @@ const ( // Pass means all data and struct of tables are equal Pass = "pass" // Fail means not all data or struct of tables are equal - Fail = "fail" + Fail = "fail" + // Error means we meet an error Error = "error" ) -// ReportConfig stores the config information for the user -type ReportConfig struct { +// Config stores the config information for the user +type Config struct { Host string `toml:"host"` Port int `toml:"port"` User string `toml:"user"` Snapshot string `toml:"snapshot,omitempty"` - SqlMode string `toml:"sql-mode,omitempty"` + SQLMode string `toml:"sql-mode,omitempty"` } // TableResult saves the check result for every table. @@ -234,6 +235,7 @@ func (r *Report) CommitSummary() error { return nil } +// Print print the current report func (r *Report) Print(w io.Writer) error { var summary strings.Builder if r.Result == Pass && r.SkippedNum == 0 { @@ -272,7 +274,7 @@ func (r *Report) Print(w io.Writer) error { for schema, tableMap := range r.TableResults { for table, result := range tableMap { if result.MeetError != nil { - summary.WriteString(fmt.Sprintf("%s error occured in %s\n", result.MeetError.Error(), dbutil.TableName(schema, table))) + summary.WriteString(fmt.Sprintf("%s error occurred in %s\n", result.MeetError.Error(), dbutil.TableName(schema, table))) } } } @@ -291,6 +293,7 @@ func NewReport(task *config.TaskConfig) *Report { } } +// Init initialize the report func (r *Report) Init(tableDiffs []*common.TableDiff, sourceConfig [][]byte, targetConfig []byte) { r.StartTime = time.Now() r.SourceConfig = sourceConfig @@ -325,7 +328,7 @@ func (r *Report) SetTableStructCheckResult(schema, table string, equal bool, ski } // SetTableDataCheckResult sets the data check result for table. -func (r *Report) SetTableDataCheckResult(schema, table string, equal bool, rowsAdd, rowsDelete int, upCount, downCount int64, id *chunk.ChunkID) { +func (r *Report) SetTableDataCheckResult(schema, table string, equal bool, rowsAdd, rowsDelete int, upCount, downCount int64, id *chunk.CID) { r.Lock() defer r.Unlock() result := r.TableResults[schema][table] @@ -368,7 +371,7 @@ func (r *Report) SetTableMeetError(schema, table string, err error) { // GetSnapshot get the snapshot of the current state of the report, then we can restart the // sync-diff and get the correct report state. -func (r *Report) GetSnapshot(chunkID *chunk.ChunkID, schema, table string) (*Report, error) { +func (r *Report) GetSnapshot(chunkID *chunk.CID, schema, table string) (*Report, error) { r.RLock() defer r.RUnlock() targetID := utils.UniqueID(schema, table) @@ -387,7 +390,7 @@ func (r *Report) GetSnapshot(chunkID *chunk.ChunkID, schema, table string) (*Rep MeetError: result.MeetError, } for id, chunkResult := range result.ChunkMap { - sid := new(chunk.ChunkID) + sid := new(chunk.CID) err := sid.FromString(id) if err != nil { return nil, errors.Trace(err) diff --git a/sync_diff_inspector/report/report_test.go b/sync_diff_inspector/report/report_test.go index ce49d1a8016..6d36ffeb214 100644 --- a/sync_diff_inspector/report/report_test.go +++ b/sync_diff_inspector/report/report_test.go @@ -24,7 +24,7 @@ import ( "github.com/BurntSushi/toml" "github.com/DATA-DOG/go-sqlmock" "github.com/pingcap/tidb/pkg/parser" - "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/config" "github.com/pingcap/tiflow/sync_diff_inspector/source/common" @@ -45,10 +45,10 @@ func TestReport(t *testing.T) { report := NewReport(task) createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) createTableSQL2 := "create table `atest`.`atbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) tableDiffs := []*common.TableDiff{ @@ -77,7 +77,7 @@ func TestReport(t *testing.T) { Collation: "[123]", }, } - configs := []*ReportConfig{ + configs := []*Config{ { Host: "127.0.0.1", Port: 3306, @@ -111,37 +111,37 @@ func TestReport(t *testing.T) { // Test Table Report report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 222, 222, &chunk.ChunkID{1, 1, 1, 1, 2}) + report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 222, 222, &chunk.CID{1, 1, 1, 1, 2}) report.SetTableMeetError("test", "tbl", errors.New("eeee")) - new_report := NewReport(task) - new_report.LoadReport(report) + newReport := NewReport(task) + newReport.LoadReport(report) - require.Equal(t, new_report.TotalSize, int64(579)) - result, ok := new_report.TableResults["test"]["tbl"] + require.Equal(t, newReport.TotalSize, int64(579)) + result, ok := newReport.TableResults["test"]["tbl"] require.True(t, ok) require.Equal(t, result.MeetError.Error(), "eeee") require.True(t, result.DataEqual) require.True(t, result.StructEqual) - require.Equal(t, new_report.getSortedTables(), [][]string{{"`atest`.`atbl`", "0", "0"}, {"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) - require.Equal(t, new_report.getDiffRows(), [][]string{}) + require.Equal(t, newReport.getSortedTables(), [][]string{{"`atest`.`atbl`", "0", "0"}, {"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) + require.Equal(t, newReport.getDiffRows(), [][]string{}) - new_report.SetTableStructCheckResult("atest", "atbl", true, false, common.AllTableExistFlag) - new_report.SetTableDataCheckResult("atest", "atbl", false, 111, 222, 333, 333, &chunk.ChunkID{1, 1, 1, 1, 2}) - require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) - require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "true", "+111/-222", "333", "333"}}) + newReport.SetTableStructCheckResult("atest", "atbl", true, false, common.AllTableExistFlag) + newReport.SetTableDataCheckResult("atest", "atbl", false, 111, 222, 333, 333, &chunk.CID{1, 1, 1, 1, 2}) + require.Equal(t, newReport.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) + require.Equal(t, newReport.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "true", "+111/-222", "333", "333"}}) - new_report.SetTableStructCheckResult("atest", "atbl", false, false, common.AllTableExistFlag) - require.Equal(t, new_report.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) - require.Equal(t, new_report.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "false", "+111/-222", "333", "333"}}) + newReport.SetTableStructCheckResult("atest", "atbl", false, false, common.AllTableExistFlag) + require.Equal(t, newReport.getSortedTables(), [][]string{{"`ctest`.`atbl`", "0", "0"}, {"`dtest`.`atbl`", "0", "0"}, {"`test`.`tbl`", "222", "222"}}) + require.Equal(t, newReport.getDiffRows(), [][]string{{"`atest`.`atbl`", "succeed", "false", "+111/-222", "333", "333"}}) - new_report.SetTableStructCheckResult("ctest", "atbl", false, true, common.AllTableExistFlag) + newReport.SetTableStructCheckResult("ctest", "atbl", false, true, common.AllTableExistFlag) - new_report.SetTableStructCheckResult("dtest", "atbl", false, true, common.DownstreamTableLackFlag) + newReport.SetTableStructCheckResult("dtest", "atbl", false, true, common.DownstreamTableLackFlag) buf := new(bytes.Buffer) - new_report.Print(buf) + newReport.Print(buf) info := buf.String() require.Contains(t, info, "The structure of `atest`.`atbl` is not equal\n") require.Contains(t, info, "The data of `atest`.`atbl` is not equal\n") @@ -162,7 +162,7 @@ func TestCalculateTotal(t *testing.T) { report := NewReport(task) createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) tableDiffs := []*common.TableDiff{ @@ -173,7 +173,7 @@ func TestCalculateTotal(t *testing.T) { Collation: "[123]", }, } - configs := []*ReportConfig{ + configs := []*Config{ { Host: "127.0.0.1", Port: 3306, @@ -209,7 +209,7 @@ func TestCalculateTotal(t *testing.T) { func TestPrint(t *testing.T) { report := NewReport(task) createTableSQL := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) tableDiffs := []*common.TableDiff{ @@ -226,7 +226,7 @@ func TestPrint(t *testing.T) { Collation: "[123]", }, } - configs := []*ReportConfig{ + configs := []*Config{ { Host: "127.0.0.1", Port: 3306, @@ -256,7 +256,7 @@ func TestPrint(t *testing.T) { var buf *bytes.Buffer // All Pass report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 22, 22, &chunk.ChunkID{0, 0, 0, 0, 1}) + report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 22, 22, &chunk.CID{0, 0, 0, 0, 1}) buf = new(bytes.Buffer) report.Print(buf) require.Equal(t, buf.String(), "A total of 0 table have been compared and all are equal.\n"+ @@ -268,20 +268,20 @@ func TestPrint(t *testing.T) { buf = new(bytes.Buffer) report.Print(buf) require.Equal(t, buf.String(), "Error in comparison process:\n"+ - "123 error occured in `test`.`tbl1`\n"+ + "123 error occurred in `test`.`tbl1`\n"+ "You can view the comparison details through 'output_dir/sync_diff.log'\n") } func TestGetSnapshot(t *testing.T) { report := NewReport(task) createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New()) + tableInfo3, err := dbutiltest.GetTableInfoBySQL(createTableSQL3, parser.New()) require.NoError(t, err) tableDiffs := []*common.TableDiff{ @@ -302,7 +302,7 @@ func TestGetSnapshot(t *testing.T) { Collation: "[123]", }, } - configs := []*ReportConfig{ + configs := []*Config{ { Host: "127.0.0.1", Port: 3306, @@ -330,39 +330,39 @@ func TestGetSnapshot(t *testing.T) { report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("test", "tbl", false, 100, 100, 200, 300, &chunk.ChunkID{0, 0, 0, 1, 10}) - report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 300, 300, &chunk.ChunkID{0, 0, 0, 3, 10}) - report.SetTableDataCheckResult("test", "tbl", false, 200, 200, 400, 500, &chunk.ChunkID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("test", "tbl", false, 100, 100, 200, 300, &chunk.CID{0, 0, 0, 1, 10}) + report.SetTableDataCheckResult("test", "tbl", true, 0, 0, 300, 300, &chunk.CID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("test", "tbl", false, 200, 200, 400, 500, &chunk.CID{0, 0, 0, 3, 10}) report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("atest", "tbl", false, 100, 100, 500, 600, &chunk.ChunkID{0, 0, 0, 0, 10}) - report.SetTableDataCheckResult("atest", "tbl", true, 0, 0, 600, 600, &chunk.ChunkID{0, 0, 0, 3, 10}) - report.SetTableDataCheckResult("atest", "tbl", false, 200, 200, 700, 800, &chunk.ChunkID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("atest", "tbl", false, 100, 100, 500, 600, &chunk.CID{0, 0, 0, 0, 10}) + report.SetTableDataCheckResult("atest", "tbl", true, 0, 0, 600, 600, &chunk.CID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("atest", "tbl", false, 200, 200, 700, 800, &chunk.CID{0, 0, 0, 3, 10}) report.SetTableStructCheckResult("xtest", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("xtest", "tbl", false, 100, 100, 800, 900, &chunk.ChunkID{0, 0, 0, 0, 10}) - report.SetTableDataCheckResult("xtest", "tbl", true, 0, 0, 900, 900, &chunk.ChunkID{0, 0, 0, 1, 10}) - report.SetTableDataCheckResult("xtest", "tbl", false, 200, 200, 1000, 1100, &chunk.ChunkID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("xtest", "tbl", false, 100, 100, 800, 900, &chunk.CID{0, 0, 0, 0, 10}) + report.SetTableDataCheckResult("xtest", "tbl", true, 0, 0, 900, 900, &chunk.CID{0, 0, 0, 1, 10}) + report.SetTableDataCheckResult("xtest", "tbl", false, 200, 200, 1000, 1100, &chunk.CID{0, 0, 0, 3, 10}) - report_snap, err := report.GetSnapshot(&chunk.ChunkID{0, 0, 0, 1, 10}, "test", "tbl") + reportSnap, err := report.GetSnapshot(&chunk.CID{0, 0, 0, 1, 10}, "test", "tbl") require.NoError(t, err) - require.Equal(t, report_snap.TotalSize, report.TotalSize) - require.Equal(t, report_snap.Result, report.Result) + require.Equal(t, reportSnap.TotalSize, report.TotalSize) + require.Equal(t, reportSnap.Result, report.Result) for key, value := range report.TableResults { - if _, ok := report_snap.TableResults[key]; !ok { + if _, ok := reportSnap.TableResults[key]; !ok { v, ok := value["tbl"] require.True(t, ok) require.Equal(t, v.Schema, "atest") continue } - if _, ok := report_snap.TableResults[key]["tbl"]; !ok { + if _, ok := reportSnap.TableResults[key]["tbl"]; !ok { require.Equal(t, key, "atest") continue } v1 := value["tbl"] - v2 := report_snap.TableResults[key]["tbl"] + v2 := reportSnap.TableResults[key]["tbl"] require.Equal(t, v1.Schema, v2.Schema) require.Equal(t, v1.Table, v2.Table) require.Equal(t, v1.StructEqual, v2.StructEqual) @@ -372,14 +372,14 @@ func TestGetSnapshot(t *testing.T) { chunkMap1 := v1.ChunkMap chunkMap2 := v2.ChunkMap for id, r1 := range chunkMap1 { - sid := new(chunk.ChunkID) + sid := new(chunk.CID) if _, ok := chunkMap2[id]; !ok { require.NoError(t, sid.FromString(id)) - require.Equal(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 3, 10}), 0) + require.Equal(t, sid.Compare(&chunk.CID{0, 0, 0, 3, 10}), 0) continue } require.NoError(t, sid.FromString(id)) - require.True(t, sid.Compare(&chunk.ChunkID{0, 0, 0, 1, 10}) <= 0) + require.True(t, sid.Compare(&chunk.CID{0, 0, 0, 1, 10}) <= 0) r2 := chunkMap2[id] require.Equal(t, r1.RowsAdd, r2.RowsAdd) require.Equal(t, r1.RowsDelete, r2.RowsDelete) @@ -392,16 +392,16 @@ func TestCommitSummary(t *testing.T) { outputDir := "./" report := NewReport(&config.TaskConfig{OutputDir: outputDir, FixDir: task.FixDir}) createTableSQL1 := "create table `test`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo1, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo1, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) createTableSQL2 := "create table `atest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo2, err := dbutil.GetTableInfoBySQL(createTableSQL2, parser.New()) + tableInfo2, err := dbutiltest.GetTableInfoBySQL(createTableSQL2, parser.New()) require.NoError(t, err) createTableSQL3 := "create table `xtest`.`tbl`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo3, err := dbutil.GetTableInfoBySQL(createTableSQL3, parser.New()) + tableInfo3, err := dbutiltest.GetTableInfoBySQL(createTableSQL3, parser.New()) require.NoError(t, err) createTableSQL4 := "create table `xtest`.`tb1`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" - tableInfo4, err := dbutil.GetTableInfoBySQL(createTableSQL4, parser.New()) + tableInfo4, err := dbutiltest.GetTableInfoBySQL(createTableSQL4, parser.New()) require.NoError(t, err) tableDiffs := []*common.TableDiff{ { @@ -436,7 +436,7 @@ func TestCommitSummary(t *testing.T) { Collation: "[123]", }, } - configs := []*ReportConfig{ + configs := []*Config{ { Host: "127.0.0.1", Port: 3306, @@ -464,19 +464,19 @@ func TestCommitSummary(t *testing.T) { report.Init(tableDiffs, configsBytes[:2], configsBytes[2]) report.SetTableStructCheckResult("test", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 400, 400, &chunk.ChunkID{0, 0, 0, 1, 10}) + report.SetTableDataCheckResult("test", "tbl", true, 100, 200, 400, 400, &chunk.CID{0, 0, 0, 1, 10}) report.SetTableStructCheckResult("atest", "tbl", true, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("atest", "tbl", false, 100, 200, 500, 600, &chunk.ChunkID{0, 0, 0, 2, 10}) + report.SetTableDataCheckResult("atest", "tbl", false, 100, 200, 500, 600, &chunk.CID{0, 0, 0, 2, 10}) report.SetTableStructCheckResult("xtest", "tbl", false, false, common.AllTableExistFlag) - report.SetTableDataCheckResult("xtest", "tbl", false, 100, 200, 600, 700, &chunk.ChunkID{0, 0, 0, 3, 10}) + report.SetTableDataCheckResult("xtest", "tbl", false, 100, 200, 600, 700, &chunk.CID{0, 0, 0, 3, 10}) report.SetTableStructCheckResult("xtest", "tb1", false, true, common.UpstreamTableLackFlag) - report.SetTableDataCheckResult("xtest", "tb1", false, 0, 200, 0, 200, &chunk.ChunkID{0, 0, 0, 4, 10}) + report.SetTableDataCheckResult("xtest", "tb1", false, 0, 200, 0, 200, &chunk.CID{0, 0, 0, 4, 10}) report.SetTableStructCheckResult("xtest", "tb2", false, true, common.DownstreamTableLackFlag) - report.SetTableDataCheckResult("xtest", "tb2", false, 100, 0, 100, 0, &chunk.ChunkID{0, 0, 0, 5, 10}) + report.SetTableDataCheckResult("xtest", "tb2", false, 100, 0, 100, 0, &chunk.CID{0, 0, 0, 5, 10}) err = report.CommitSummary() require.NoError(t, err) diff --git a/sync_diff_inspector/source/chunks_iter.go b/sync_diff_inspector/source/chunks_iter.go index 44b051fcb72..0439aba8ea3 100644 --- a/sync_diff_inspector/source/chunks_iter.go +++ b/sync_diff_inspector/source/chunks_iter.go @@ -28,7 +28,7 @@ import ( // ChunksIterator is used for single mysql/tidb source. type ChunksIterator struct { - ID *chunk.ChunkID + ID *chunk.CID tableAnalyzer TableAnalyzer TableDiffs []*common.TableDiff @@ -40,6 +40,7 @@ type ChunksIterator struct { cancel context.CancelFunc } +// NewChunksIterator returns a new iterator func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs []*common.TableDiff, startRange *splitter.RangeInfo, splitThreadCount int) (*ChunksIterator, error) { ctxx, cancel := context.WithCancel(ctx) iter := &ChunksIterator{ @@ -114,7 +115,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter return case t.chunksCh <- &splitter.RangeInfo{ ChunkRange: &chunk.Range{ - Index: &chunk.ChunkID{ + Index: &chunk.CID{ TableIndex: curTableIndex, }, Type: chunk.Empty, @@ -162,6 +163,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter pool.WaitFinished() } +// Next returns the next chunk func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error) { select { case <-ctx.Done(): @@ -176,6 +178,7 @@ func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error) } } +// Close closes the iterator func (t *ChunksIterator) Close() { t.cancel() } diff --git a/sync_diff_inspector/source/common/common_test.go b/sync_diff_inspector/source/common/common_test.go index 467548935bf..5649bbf347d 100644 --- a/sync_diff_inspector/source/common/common_test.go +++ b/sync_diff_inspector/source/common/common_test.go @@ -19,13 +19,14 @@ import ( "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" "github.com/pingcap/tiflow/sync_diff_inspector/utils" "github.com/stretchr/testify/require" ) func TestRowData(t *testing.T) { createTableSQL := "create table test.test(id int(24), name varchar(24), age int(24), primary key(id, name));" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) _, orderKeyCols := dbutil.SelectUniqueOrderKey(tableInfo) diff --git a/sync_diff_inspector/source/common/rows.go b/sync_diff_inspector/source/common/rows.go index 27470c15042..acfb0928ff9 100644 --- a/sync_diff_inspector/source/common/rows.go +++ b/sync_diff_inspector/source/common/rows.go @@ -23,6 +23,7 @@ import ( "go.uber.org/zap" ) +// RowData represents a single row type RowData struct { Data map[string]*dbutil.ColumnData Source int @@ -34,7 +35,12 @@ type RowDatas struct { OrderKeyCols []*model.ColumnInfo } -func (r RowDatas) Len() int { return len(r.Rows) } +// Len returns the number of rows +func (r RowDatas) Len() int { + return len(r.Rows) +} + +// Less compares two rows func (r RowDatas) Less(i, j int) bool { for _, col := range r.OrderKeyCols { col1, ok := r.Rows[i].Data[col.Name.O] @@ -83,7 +89,11 @@ func (r RowDatas) Less(i, j int) bool { return false } -func (r RowDatas) Swap(i, j int) { r.Rows[i], r.Rows[j] = r.Rows[j], r.Rows[i] } + +// Swap swap two rows +func (r RowDatas) Swap(i, j int) { + r.Rows[i], r.Rows[j] = r.Rows[j], r.Rows[i] +} // Push implements heap.Interface's Push function func (r *RowDatas) Push(x interface{}) { diff --git a/sync_diff_inspector/source/common/table_diff.go b/sync_diff_inspector/source/common/table_diff.go index 2960f0ba7cb..74507a2ad7c 100644 --- a/sync_diff_inspector/source/common/table_diff.go +++ b/sync_diff_inspector/source/common/table_diff.go @@ -73,11 +73,15 @@ type TableDiff struct { } const ( - AllTableExistFlag = 0 + // AllTableExistFlag means the table exists in both upstream and downstream + AllTableExistFlag = 0 + // DownstreamTableLackFlag means the table only exists in upstream DownstreamTableLackFlag = -1 - UpstreamTableLackFlag = 1 + // UpstreamTableLackFlag means the table only exists in downstream + UpstreamTableLackFlag = 1 ) +// AllTableExist check the status func AllTableExist(tableLack int) bool { return tableLack == AllTableExistFlag } diff --git a/sync_diff_inspector/source/mysql_shard.go b/sync_diff_inspector/source/mysql_shard.go index ff0edd9e8c9..8e3461c0c18 100644 --- a/sync_diff_inspector/source/mysql_shard.go +++ b/sync_diff_inspector/source/mysql_shard.go @@ -33,10 +33,12 @@ import ( "go.uber.org/zap" ) +// MySQLTableAnalyzer is used to analyze MySQL table type MySQLTableAnalyzer struct { sourceTableMap map[string][]*common.TableShardSource } +// AnalyzeSplitter return an iterator for current table func (a *MySQLTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) { matchedSources := getMatchedSourcesForTable(a.sourceTableMap, table) @@ -57,6 +59,7 @@ func (a *MySQLTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common. return randIter, nil } +// MySQLSources represent one table in MySQL type MySQLSources struct { tableDiffs []*common.TableDiff @@ -74,16 +77,19 @@ func getMatchedSourcesForTable(sourceTablesMap map[string][]*common.TableShardSo return matchSources } +// GetTableAnalyzer get analyzer for current table func (s *MySQLSources) GetTableAnalyzer() TableAnalyzer { return &MySQLTableAnalyzer{ s.sourceTablesMap, } } +// GetRangeIterator get range iterator func (s *MySQLSources) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) { return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount) } +// Close close the current table func (s *MySQLSources) Close() { for _, t := range s.sourceTablesMap { for _, db := range t { @@ -92,7 +98,8 @@ func (s *MySQLSources) Close() { } } -func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { +// GetCountAndMD5 return count and checksum +func (s *MySQLSources) GetCountAndMD5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { beginTime := time.Now() table := s.tableDiffs[tableRange.GetTableIndex()] chunk := tableRange.GetChunk() @@ -102,7 +109,7 @@ func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter. for _, ms := range matchSources { go func(ms *common.TableShardSource) { - count, checksum, err := utils.GetCountAndMd5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, chunk.Args) + count, checksum, err := utils.GetCountAndMD5Checksum(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, table.Info, chunk.Where, chunk.Args) infoCh <- &ChecksumInfo{ Checksum: checksum, Count: count, @@ -137,24 +144,25 @@ func (s *MySQLSources) GetCountAndMd5(ctx context.Context, tableRange *splitter. } } +// GetCountForLackTable return count for lack table func (s *MySQLSources) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 { table := s.tableDiffs[tableRange.GetTableIndex()] var totalCount int64 matchSources := getMatchedSourcesForTable(s.sourceTablesMap, table) - if matchSources != nil { - for _, ms := range matchSources { - count, _ := dbutil.GetRowCount(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, "", nil) - totalCount += count - } + for _, ms := range matchSources { + count, _ := dbutil.GetRowCount(ctx, ms.DBConn, ms.OriginSchema, ms.OriginTable, "", nil) + totalCount += count } return totalCount } +// GetTables return all tables func (s *MySQLSources) GetTables() []*common.TableDiff { return s.tableDiffs } +// GenerateFixSQL generate SQL func (s *MySQLSources) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string { switch t { case Insert: @@ -169,6 +177,7 @@ func (s *MySQLSources) GenerateFixSQL(t DMLType, upstreamData, downstreamData ma return "" } +// GetRowsIterator get iterator for current table func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) { chunk := tableRange.GetChunk() @@ -187,6 +196,11 @@ func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter rowsQuery, orderKeyCols = utils.GetTableRowsQueryFormat(ms.OriginSchema, ms.OriginTable, table.Info, table.Collation) query := fmt.Sprintf(rowsQuery, chunk.Where) rows, err := ms.DBConn.QueryContext(ctx, query, chunk.Args...) + defer func() { + if rows != nil { + _ = rows.Err() + } + }() if err != nil { return nil, errors.Trace(err) } @@ -222,6 +236,7 @@ func (s *MySQLSources) GetRowsIterator(ctx context.Context, tableRange *splitter }, nil } +// GetDB get the current DB func (s *MySQLSources) GetDB() *sql.DB { // return any of them is ok for _, st := range s.sourceTablesMap { @@ -233,11 +248,13 @@ func (s *MySQLSources) GetDB() *sql.DB { return nil } +// GetSnapshot get the current snapshot func (s *MySQLSources) GetSnapshot() string { log.Fatal("unreachable!, mysql doesn't have the snapshot") return "" } +// GetSourceStructInfo get the current table info func (s *MySQLSources) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) { tableDiff := s.GetTables()[tableIndex] // for tables that do not exist upstream or downstream @@ -258,6 +275,7 @@ func (s *MySQLSources) GetSourceStructInfo(ctx context.Context, tableIndex int) return sourceTableInfos, nil } +// MultiSourceRowsIterator is used to iterate rows from multi source type MultiSourceRowsIterator struct { sourceRows map[int]*sql.Rows sourceRowDatas *common.RowDatas @@ -271,6 +289,7 @@ func getRowData(rows *sql.Rows) (rowData map[string]*dbutil.ColumnData, err erro return } +// Next return the next row func (ms *MultiSourceRowsIterator) Next() (map[string]*dbutil.ColumnData, error) { // Before running getSourceRow, heap save one row from all the sources, // otherwise this source has read to the end. Each row should be the smallest in each source. @@ -297,12 +316,14 @@ func (ms *MultiSourceRowsIterator) Next() (map[string]*dbutil.ColumnData, error) return rowData.Data, nil } +// Close return all sources func (ms *MultiSourceRowsIterator) Close() { for _, s := range ms.sourceRows { s.Close() } } +// NewMySQLSources return sources for MySQL tables func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []*config.DataSource, threadCount int, f tableFilter.Filter, skipNonExistingTable bool) (Source, error) { sourceTablesMap := make(map[string][]*common.TableShardSource) // we should get the real table name @@ -340,20 +361,20 @@ func NewMySQLSources(ctx context.Context, tableDiffs []*common.TableDiff, ds []* return nil, errors.Errorf("get route result for %d source %s.%s failed, error %v", i, schema, table, err) } } - uniqueId := utils.UniqueID(targetSchema, targetTable) + uniqueID := utils.UniqueID(targetSchema, targetTable) isMatched := f.MatchTable(targetSchema, targetTable) if isMatched { // if match the filter, we should respect it and check target has this table later. - sourceTablesAfterRoute[uniqueId] = struct{}{} + sourceTablesAfterRoute[uniqueID] = struct{}{} } - if _, ok := targetUniqueTableMap[uniqueId]; !ok && !(isMatched && skipNonExistingTable) { + if _, ok := targetUniqueTableMap[uniqueID]; !ok && !(isMatched && skipNonExistingTable) { continue } - maxSourceRouteTableCount[uniqueId]++ - if _, ok := sourceTablesMap[uniqueId]; !ok { - sourceTablesMap[uniqueId] = make([]*common.TableShardSource, 0) + maxSourceRouteTableCount[uniqueID]++ + if _, ok := sourceTablesMap[uniqueID]; !ok { + sourceTablesMap[uniqueID] = make([]*common.TableShardSource, 0) } - sourceTablesMap[uniqueId] = append(sourceTablesMap[uniqueId], &common.TableShardSource{ + sourceTablesMap[uniqueID] = append(sourceTablesMap[uniqueID], &common.TableShardSource{ TableSource: common.TableSource{ OriginSchema: schema, OriginTable: table, diff --git a/sync_diff_inspector/source/source.go b/sync_diff_inspector/source/source.go index a3f7500a019..143187ebbfc 100644 --- a/sync_diff_inspector/source/source.go +++ b/sync_diff_inspector/source/source.go @@ -35,20 +35,25 @@ import ( "go.uber.org/zap" ) +// DMLType is the type of DML type DMLType int32 const ( + // Insert means insert Insert DMLType = iota + 1 + // Delete means delete Delete + // Replace means replace Replace ) const ( - ShieldDBName = "_no__exists__db_" - ShieldTableName = "_no__exists__table_" - GetSyncPointQuery = "SELECT primary_ts, secondary_ts FROM tidb_cdc.syncpoint_v1 ORDER BY primary_ts DESC LIMIT 1" + shieldDBName = "_no__exists__db_" + shieldTableName = "_no__exists__table_" + getSyncPointQuery = "SELECT primary_ts, secondary_ts FROM tidb_cdc.syncpoint_v1 ORDER BY primary_ts DESC LIMIT 1" ) +// ChecksumInfo stores checksum and count type ChecksumInfo struct { Checksum uint64 Count int64 @@ -71,6 +76,7 @@ type TableAnalyzer interface { AnalyzeSplitter(context.Context, *common.TableDiff, *splitter.RangeInfo) (splitter.ChunkIterator, error) } +// Source is the interface for table type Source interface { // GetTableAnalyzer pick the proper analyzer for different source. // the implement of this function is different in mysql/tidb. @@ -82,8 +88,8 @@ type Source interface { // there are many workers consume the range from the channel to compare. GetRangeIterator(context.Context, *splitter.RangeInfo, TableAnalyzer, int) (RangeIterator, error) - // GetCountAndMd5 gets the md5 result and the count from given range. - GetCountAndMd5(context.Context, *splitter.RangeInfo) *ChecksumInfo + // GetCountAndMD5 gets the md5 result and the count from given range. + GetCountAndMD5(context.Context, *splitter.RangeInfo) *ChecksumInfo // GetCountForLackTable gets the count for tables that don't exist upstream or downstream. GetCountForLackTable(context.Context, *splitter.RangeInfo) int64 @@ -112,6 +118,7 @@ type Source interface { Close() } +// NewSources returns a new source func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, upstream Source, err error) { // init db connection for upstream / downstream. err = initDBConn(ctx, cfg) @@ -152,8 +159,8 @@ func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, ups if d.Router.AddRule(&router.TableRule{ SchemaPattern: tableConfig.Schema, TablePattern: tableConfig.Table, - TargetSchema: ShieldDBName, - TargetTable: ShieldTableName, + TargetSchema: shieldDBName, + TargetTable: shieldTableName, }) != nil { return nil, nil, errors.Errorf("add shield rule failed [schema = %s] [table = %s]", tableConfig.Schema, tableConfig.Table) } @@ -168,8 +175,8 @@ func NewSources(ctx context.Context, cfg *config.Config) (downstream Source, ups if d.Router.AddRule(&router.TableRule{ SchemaPattern: tableConfig.Schema, TablePattern: tableConfig.Table, - TargetSchema: ShieldDBName, - TargetTable: ShieldTableName, + TargetSchema: shieldDBName, + TargetTable: shieldTableName, }) != nil { return nil, nil, errors.Errorf("add shield rule failed [schema = %s] [table = %s]", tableConfig.Schema, tableConfig.Table) } @@ -235,9 +242,9 @@ func buildSourceFromCfg( if ok { if len(dbs) == 1 { return NewTiDBSource(ctx, tableDiffs, dbs[0], bucketSpliterPool, f, skipNonExistingTable) - } else { - log.Fatal("Don't support check table in multiple tidb instance, please specify one tidb instance.") } + + log.Fatal("Don't support check table in multiple tidb instance, please specify one tidb instance.") } return NewMySQLSources(ctx, tableDiffs, dbs, connCount, f, skipNonExistingTable) } @@ -249,14 +256,14 @@ func getAutoSnapshotPosition(cfg *mysql.Config) (string, string, error) { } defer tmpConn.Close() var primaryTs, secondaryTs string - err = tmpConn.QueryRow(GetSyncPointQuery).Scan(&primaryTs, &secondaryTs) + err = tmpConn.QueryRow(getSyncPointQuery).Scan(&primaryTs, &secondaryTs) if err != nil { return "", "", errors.Annotatef(err, "fetching auto-position tidb_snapshot failed") } return primaryTs, secondaryTs, nil } -func initDBConn(ctx context.Context, cfg *config.Config) error { +func initDBConn(_ context.Context, cfg *config.Config) error { // Fill in tidb_snapshot if it is set to AUTO // This is only supported when set to auto on both target/source. if cfg.Task.TargetInstance.IsAutoSnapshot() { diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go index 692452af4b7..9627c825dd4 100644 --- a/sync_diff_inspector/source/source_test.go +++ b/sync_diff_inspector/source/source_test.go @@ -56,7 +56,7 @@ type MockChunkIterator struct { ctx context.Context tableDiff *common.TableDiff rangeInfo *splitter.RangeInfo - index *chunk.ChunkID + index *chunk.CID } const ( @@ -70,7 +70,7 @@ func (m *MockChunkIterator) Next() (*chunk.Range, error) { } m.index.ChunkIndex = m.index.ChunkIndex + 1 return &chunk.Range{ - Index: &chunk.ChunkID{ + Index: &chunk.CID{ TableIndex: m.index.TableIndex, BucketIndexLeft: m.index.BucketIndexLeft, BucketIndexRight: m.index.BucketIndexRight, @@ -86,7 +86,7 @@ func (m *MockChunkIterator) Close() { type MockAnalyzer struct{} func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.TableDiff, rangeInfo *splitter.RangeInfo) (splitter.ChunkIterator, error) { - i := &chunk.ChunkID{ + i := &chunk.CID{ TableIndex: 0, BucketIndexLeft: 0, BucketIndexRight: 0, @@ -184,7 +184,7 @@ func TestTiDBSource(t *testing.T) { require.Equal(t, n, tableCase.rangeInfo.GetTableIndex()) countRows := sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456) mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows) - checksum := tidb.GetCountAndMd5(ctx, tableCase.rangeInfo) + checksum := tidb.GetCountAndMD5(ctx, tableCase.rangeInfo) require.NoError(t, checksum.Err) require.Equal(t, checksum.Count, int64(123)) require.Equal(t, checksum.Checksum, uint64(456)) @@ -399,7 +399,7 @@ func TestMysqlShardSources(t *testing.T) { mock.ExpectQuery("SELECT COUNT.*").WillReturnRows(countRows) } - checksum := shard.GetCountAndMd5(ctx, tableCase.rangeInfo) + checksum := shard.GetCountAndMD5(ctx, tableCase.rangeInfo) require.NoError(t, checksum.Err) require.Equal(t, checksum.Count, int64(len(dbs))) require.Equal(t, checksum.Checksum, resChecksum) @@ -771,15 +771,17 @@ func TestRouterRules(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - r, err := router.NewTableRouter(false, []*router.TableRule{ - // make sure this rule works - { - SchemaPattern: "schema1", - TablePattern: "tbl", - TargetSchema: "schema2", - TargetTable: "tbl", - }, - }) + r, _ := router.NewTableRouter( + false, + []*router.TableRule{ + // make sure this rule works + { + SchemaPattern: "schema1", + TablePattern: "tbl", + TargetSchema: "schema2", + TargetTable: "tbl", + }, + }) cfg := &config.Config{ LogLevel: "debug", CheckThreadCount: 4, @@ -851,8 +853,8 @@ func TestRouterRules(t *testing.T) { require.Equal(t, "tbl", targetTable) targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema2", "tbl") require.NoError(t, err) - require.Equal(t, ShieldDBName, targetSchema) - require.Equal(t, ShieldTableName, targetTable) + require.Equal(t, shieldDBName, targetSchema) + require.Equal(t, shieldTableName, targetTable) targetSchema, targetTable, err = cfg.Task.SourceInstances[0].Router.Route("schema_test", "tbl") require.NoError(t, err) require.Equal(t, "schema_test", targetSchema) @@ -911,7 +913,7 @@ func TestInitTables(t *testing.T) { rows = sqlmock.NewRows([]string{"col1", "col2"}).AddRow("", "") mock.ExpectQuery("SHOW VARIABLES LIKE*").WillReturnRows(rows) - tablesToBeCheck, err = initTables(ctx, cfg) + _, err = initTables(ctx, cfg) require.Contains(t, err.Error(), "different config matched to same target table") require.NoError(t, mock.ExpectationsWereMet()) } @@ -936,18 +938,18 @@ func TestCheckTableMatched(t *testing.T) { tmap["`test`.`t1`"] = struct{}{} tmap["`test`.`t2`"] = struct{}{} - tables, err := checkTableMatched(tableDiffs, tmap, smap, false) + _, err := checkTableMatched(tableDiffs, tmap, smap, false) require.NoError(t, err) smap["`test`.`t3`"] = struct{}{} - tables, err = checkTableMatched(tableDiffs, tmap, smap, false) + _, err = checkTableMatched(tableDiffs, tmap, smap, false) require.Contains(t, err.Error(), "the target has no table to be compared. source-table is ``test`.`t3``") delete(smap, "`test`.`t2`") - tables, err = checkTableMatched(tableDiffs, tmap, smap, false) + _, err = checkTableMatched(tableDiffs, tmap, smap, false) require.Contains(t, err.Error(), "the source has no table to be compared. target-table is ``test`.`t2``") - tables, err = checkTableMatched(tableDiffs, tmap, smap, true) + tables, err := checkTableMatched(tableDiffs, tmap, smap, true) require.NoError(t, err) require.Equal(t, 0, tables[0].TableLack) require.Equal(t, 1, tables[1].TableLack) diff --git a/sync_diff_inspector/source/tidb.go b/sync_diff_inspector/source/tidb.go index 92d80c5d34b..5265bf88eb5 100644 --- a/sync_diff_inspector/source/tidb.go +++ b/sync_diff_inspector/source/tidb.go @@ -33,12 +33,14 @@ import ( "go.uber.org/zap" ) +// TiDBTableAnalyzer is used to analyze table type TiDBTableAnalyzer struct { dbConn *sql.DB bucketSpliterPool *utils.WorkerPool sourceTableMap map[string]*common.TableSource } +// AnalyzeSplitter returns a new iterator for TiDB table func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.TableDiff, startRange *splitter.RangeInfo) (splitter.ChunkIterator, error) { matchedSource := getMatchSource(a.sourceTableMap, table) // Shallow Copy @@ -65,14 +67,17 @@ func (a *TiDBTableAnalyzer) AnalyzeSplitter(ctx context.Context, table *common.T return randIter, nil } +// TiDBRowsIterator is used to iterate rows in TiDB type TiDBRowsIterator struct { rows *sql.Rows } +// Close closes the iterator func (s *TiDBRowsIterator) Close() { s.rows.Close() } +// Next gets the next row func (s *TiDBRowsIterator) Next() (map[string]*dbutil.ColumnData, error) { if s.rows.Next() { return dbutil.ScanRow(s.rows) @@ -80,6 +85,7 @@ func (s *TiDBRowsIterator) Next() (map[string]*dbutil.ColumnData, error) { return nil, nil } +// TiDBSource represents the table in TiDB type TiDBSource struct { tableDiffs []*common.TableDiff sourceTableMap map[string]*common.TableSource @@ -91,6 +97,7 @@ type TiDBSource struct { version *semver.Version } +// GetTableAnalyzer gets the analyzer for current source func (s *TiDBSource) GetTableAnalyzer() TableAnalyzer { return &TiDBTableAnalyzer{ s.dbConn, @@ -111,21 +118,24 @@ func getMatchSource(sourceTableMap map[string]*common.TableSource, table *common return sourceTableMap[uniqueID] } +// GetRangeIterator returns a new iterator for TiDB table func (s *TiDBSource) GetRangeIterator(ctx context.Context, r *splitter.RangeInfo, analyzer TableAnalyzer, splitThreadCount int) (RangeIterator, error) { return NewChunksIterator(ctx, analyzer, s.tableDiffs, r, splitThreadCount) } +// Close closes the source func (s *TiDBSource) Close() { s.dbConn.Close() } -func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { +// GetCountAndMD5 returns the checksum info +func (s *TiDBSource) GetCountAndMD5(ctx context.Context, tableRange *splitter.RangeInfo) *ChecksumInfo { beginTime := time.Now() table := s.tableDiffs[tableRange.GetTableIndex()] chunk := tableRange.GetChunk() matchSource := getMatchSource(s.sourceTableMap, table) - count, checksum, err := utils.GetCountAndMd5Checksum(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, chunk.Where, chunk.Args) + count, checksum, err := utils.GetCountAndMD5Checksum(ctx, s.dbConn, matchSource.OriginSchema, matchSource.OriginTable, table.Info, chunk.Where, chunk.Args) cost := time.Since(beginTime) return &ChecksumInfo{ @@ -136,6 +146,7 @@ func (s *TiDBSource) GetCountAndMd5(ctx context.Context, tableRange *splitter.Ra } } +// GetCountForLackTable returns count for lack table func (s *TiDBSource) GetCountForLackTable(ctx context.Context, tableRange *splitter.RangeInfo) int64 { table := s.tableDiffs[tableRange.GetTableIndex()] matchSource := getMatchSource(s.sourceTableMap, table) @@ -146,10 +157,12 @@ func (s *TiDBSource) GetCountForLackTable(ctx context.Context, tableRange *split return 0 } +// GetTables returns all tables func (s *TiDBSource) GetTables() []*common.TableDiff { return s.tableDiffs } +// GetSourceStructInfo get the table info func (s *TiDBSource) GetSourceStructInfo(ctx context.Context, tableIndex int) ([]*model.TableInfo, error) { var err error tableInfos := make([]*model.TableInfo, 1) @@ -163,6 +176,7 @@ func (s *TiDBSource) GetSourceStructInfo(ctx context.Context, tableIndex int) ([ return tableInfos, nil } +// GenerateFixSQL generate SQL func (s *TiDBSource) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[string]*dbutil.ColumnData, tableIndex int) string { if t == Insert { return utils.GenerateReplaceDML(upstreamData, s.tableDiffs[tableIndex].Info, s.tableDiffs[tableIndex].Schema) @@ -177,6 +191,7 @@ func (s *TiDBSource) GenerateFixSQL(t DMLType, upstreamData, downstreamData map[ return "" } +// GetRowsIterator returns a new iterator func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.RangeInfo) (RowDataIterator, error) { chunk := tableRange.GetChunk() @@ -187,6 +202,11 @@ func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.R log.Debug("select data", zap.String("sql", query), zap.Reflect("args", chunk.Args)) rows, err := s.dbConn.QueryContext(ctx, query, chunk.Args...) + defer func() { + if rows != nil { + _ = rows.Err() + } + }() if err != nil { return nil, errors.Trace(err) } @@ -195,14 +215,17 @@ func (s *TiDBSource) GetRowsIterator(ctx context.Context, tableRange *splitter.R }, nil } +// GetDB get the current DB func (s *TiDBSource) GetDB() *sql.DB { return s.dbConn } +// GetSnapshot get the current snapshot func (s *TiDBSource) GetSnapshot() string { return s.snapshot } +// NewTiDBSource return a new TiDB source func NewTiDBSource( ctx context.Context, tableDiffs []*common.TableDiff, ds *config.DataSource, @@ -248,18 +271,18 @@ func NewTiDBSource( } } - uniqueId := utils.UniqueID(targetSchema, targetTable) + uniqueID := utils.UniqueID(targetSchema, targetTable) isMatched := f.MatchTable(targetSchema, targetTable) if isMatched { // if match the filter, we should respect it and check target has this table later. - sourceTablesAfterRoute[uniqueId] = struct{}{} + sourceTablesAfterRoute[uniqueID] = struct{}{} } - if _, ok := targetUniqueTableMap[uniqueId]; ok || (isMatched && skipNonExistingTable) { - if _, ok := sourceTableMap[uniqueId]; ok { + if _, ok := targetUniqueTableMap[uniqueID]; ok || (isMatched && skipNonExistingTable) { + if _, ok := sourceTableMap[uniqueID]; ok { log.Error("TiDB source don't support compare multiple source tables with one downstream table," + " if this happening when diff on same instance is fine. otherwise we are not guarantee this diff result is right") } - sourceTableMap[uniqueId] = &common.TableSource{ + sourceTableMap[uniqueID] = &common.TableSource{ OriginSchema: schema, OriginTable: table, } diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go index 46a53f99d28..cfd5c1d6664 100644 --- a/sync_diff_inspector/splitter/bucket.go +++ b/sync_diff_inspector/splitter/bucket.go @@ -30,8 +30,10 @@ import ( "go.uber.org/zap" ) +// DefaultChannelBuffer is the default size for channel buffer const DefaultChannelBuffer = 1024 +// BucketIterator is struct for bucket iterator type BucketIterator struct { buckets []dbutil.Bucket table *common.TableDiff @@ -53,10 +55,12 @@ type BucketIterator struct { dbConn *sql.DB } +// NewBucketIterator return a new iterator func NewBucketIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*BucketIterator, error) { return NewBucketIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil, utils.NewWorkerPool(1, "bucketIter")) } +// NewBucketIteratorWithCheckpoint return a new iterator func NewBucketIteratorWithCheckpoint( ctx context.Context, progressID string, @@ -95,10 +99,12 @@ func NewBucketIteratorWithCheckpoint( return bs, nil } +// GetIndexID return the index id func (s *BucketIterator) GetIndexID() int64 { return s.indexID } +// Next return the next chunk func (s *BucketIterator) Next() (*chunk.Range, error) { var ok bool if uint(len(s.chunks)) <= s.nextChunk { @@ -219,6 +225,7 @@ NEXTINDEX: return nil } +// Close closes the iterator func (s *BucketIterator) Close() { s.cancel() } diff --git a/sync_diff_inspector/splitter/index_fields_test.go b/sync_diff_inspector/splitter/index_fields_test.go index 6b6cc5768e7..788df79a116 100644 --- a/sync_diff_inspector/splitter/index_fields_test.go +++ b/sync_diff_inspector/splitter/index_fields_test.go @@ -17,7 +17,7 @@ import ( "testing" "github.com/pingcap/tidb/pkg/parser" - "github.com/pingcap/tidb/pkg/util/dbutil" + "github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest" "github.com/stretchr/testify/require" ) @@ -30,7 +30,7 @@ func TestIndexFieldsSimple(t *testing.T) { "`c` char(120) NOT NULL DEFAULT '', " + "PRIMARY KEY (`id`), KEY `k_1` (`k`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) fields, err := indexFieldsFromConfigString("k", tableInfo) @@ -61,7 +61,7 @@ func TestIndexFieldsComposite(t *testing.T) { "KEY `k_1` (`k`)," + "UNIQUE INDEX `c_1` (`c`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) fields, err := indexFieldsFromConfigString("id, k", tableInfo) @@ -92,7 +92,7 @@ func TestIndexFieldsEmpty(t *testing.T) { "`c` char(120) NOT NULL DEFAULT '', " + "PRIMARY KEY (`id`), KEY `k_1` (`k`))" - tableInfo, err := dbutil.GetTableInfoBySQL(createTableSQL1, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL1, parser.New()) require.NoError(t, err) fields, err := indexFieldsFromConfigString("", tableInfo) diff --git a/sync_diff_inspector/splitter/limit.go b/sync_diff_inspector/splitter/limit.go index 3138d53181f..3baa585713a 100644 --- a/sync_diff_inspector/splitter/limit.go +++ b/sync_diff_inspector/splitter/limit.go @@ -30,6 +30,7 @@ import ( "go.uber.org/zap" ) +// LimitIterator is the iterator with limit type LimitIterator struct { table *common.TableDiff tagChunk *chunk.Range @@ -46,11 +47,19 @@ type LimitIterator struct { columnOffset map[string]int } +// NewLimitIterator return a new iterator func NewLimitIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*LimitIterator, error) { return NewLimitIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil) } -func NewLimitIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*LimitIterator, error) { +// NewLimitIteratorWithCheckpoint return a new iterator +func NewLimitIteratorWithCheckpoint( + ctx context.Context, + progressID string, + table *common.TableDiff, + dbConn *sql.DB, + startRange *RangeInfo, +) (*LimitIterator, error) { indices, err := utils.GetBetterIndex(ctx, dbConn, table.Schema, table.Table, table.Info) if err != nil { return nil, errors.Trace(err) @@ -161,10 +170,12 @@ func NewLimitIteratorWithCheckpoint(ctx context.Context, progressID string, tabl return limitIterator, nil } +// Close close the iterator func (lmt *LimitIterator) Close() { lmt.cancel() } +// Next return the next chunk func (lmt *LimitIterator) Next() (*chunk.Range, error) { select { case err := <-lmt.errCh: @@ -177,6 +188,7 @@ func (lmt *LimitIterator) Next() (*chunk.Range, error) { } } +// GetIndexID get the current index id func (lmt *LimitIterator) GetIndexID() int64 { return lmt.indexID } @@ -199,7 +211,6 @@ func (lmt *LimitIterator) produceChunks(ctx context.Context, bucketID int) { if dataMap == nil { // there is no row in result set chunk.InitChunk(chunkRange, chunk.Limit, bucketID, bucketID, lmt.table.Collation, lmt.table.Range) - bucketID++ progress.UpdateTotal(lmt.progressID, 1, true) select { case <-ctx.Done(): diff --git a/sync_diff_inspector/splitter/random.go b/sync_diff_inspector/splitter/random.go index 98771d0eb28..241b2d12e33 100644 --- a/sync_diff_inspector/splitter/random.go +++ b/sync_diff_inspector/splitter/random.go @@ -31,6 +31,7 @@ import ( "go.uber.org/zap" ) +// RandomIterator is used to random iterate a table type RandomIterator struct { table *common.TableDiff chunkSize int64 @@ -40,11 +41,19 @@ type RandomIterator struct { dbConn *sql.DB } +// NewRandomIterator return a new iterator func NewRandomIterator(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB) (*RandomIterator, error) { return NewRandomIteratorWithCheckpoint(ctx, progressID, table, dbConn, nil) } -func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, table *common.TableDiff, dbConn *sql.DB, startRange *RangeInfo) (*RandomIterator, error) { +// NewRandomIteratorWithCheckpoint return a new iterator with checkpoint +func NewRandomIteratorWithCheckpoint( + ctx context.Context, + progressID string, + table *common.TableDiff, + dbConn *sql.DB, + startRange *RangeInfo, +) (*RandomIterator, error) { // get the chunk count by data count and chunk size var splitFieldArr []string if len(table.Fields) != 0 { @@ -141,6 +150,7 @@ func NewRandomIteratorWithCheckpoint(ctx context.Context, progressID string, tab }, nil } +// Next get the next chunk func (s *RandomIterator) Next() (*chunk.Range, error) { if uint(len(s.chunks)) <= s.nextChunk { return nil, nil @@ -159,6 +169,7 @@ func (s *RandomIterator) Next() (*chunk.Range, error) { return c, nil } +// Close close the iterator func (s *RandomIterator) Close() { } diff --git a/sync_diff_inspector/splitter/splitter.go b/sync_diff_inspector/splitter/splitter.go index d2a43ed1ce2..5fb45bc9024 100644 --- a/sync_diff_inspector/splitter/splitter.go +++ b/sync_diff_inspector/splitter/splitter.go @@ -21,6 +21,7 @@ import ( ) const ( + // SplitThreshold is the threshold for splitting SplitThreshold = 1000 ) @@ -41,22 +42,27 @@ type RangeInfo struct { ProgressID string `json:"progress-id"` } -// GetTableIndex return the index of table diffs. +// GetTableIndex returns the index of table diffs. // IMPORTANT!!! // We need to keep the tables order during checkpoint. // So we should have to save the config info to checkpoint file too func (r *RangeInfo) GetTableIndex() int { return r.ChunkRange.Index.TableIndex } +// GetBucketIndexLeft returns the BucketIndexLeft func (r *RangeInfo) GetBucketIndexLeft() int { return r.ChunkRange.Index.BucketIndexLeft } +// GetBucketIndexRight returns the BucketIndexRight func (r *RangeInfo) GetBucketIndexRight() int { return r.ChunkRange.Index.BucketIndexRight } +// GetChunkIndex returns the ChunkIndex func (r *RangeInfo) GetChunkIndex() int { return r.ChunkRange.Index.ChunkIndex } +// GetChunk returns the chunk func (r *RangeInfo) GetChunk() *chunk.Range { return r.ChunkRange } +// Copy returns a copy of RangeInfo func (r *RangeInfo) Copy() *RangeInfo { return &RangeInfo{ ChunkRange: r.ChunkRange.Clone(), @@ -65,6 +71,7 @@ func (r *RangeInfo) Copy() *RangeInfo { } } +// Update updates the current RangeInfo func (r *RangeInfo) Update(column, lower, upper string, updateLower, updateUpper bool, collation, limits string) { r.ChunkRange.Update(column, lower, upper, updateLower, updateUpper) conditions, args := r.ChunkRange.ToString(collation) @@ -72,6 +79,7 @@ func (r *RangeInfo) Update(column, lower, upper string, updateLower, updateUpper r.ChunkRange.Args = args } +// ToNode converts RangeInfo to node func (r *RangeInfo) ToNode() *checkpoints.Node { return &checkpoints.Node{ ChunkRange: r.ChunkRange, @@ -79,6 +87,7 @@ func (r *RangeInfo) ToNode() *checkpoints.Node { } } +// FromNode converts the Node into RangeInfo func FromNode(n *checkpoints.Node) *RangeInfo { return &RangeInfo{ ChunkRange: n.ChunkRange, diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go index 9b16fc5c0ea..5d312bddcad 100644 --- a/sync_diff_inspector/splitter/splitter_test.go +++ b/sync_diff_inspector/splitter/splitter_test.go @@ -915,7 +915,7 @@ func TestChunkSize(t *testing.T) { tableInfo, err = dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) require.NoError(t, err) - tableDiff_noindex := &common.TableDiff{ + tableDiffNoIndex := &common.TableDiff{ Schema: "test", Table: "test", Info: tableInfo, @@ -923,7 +923,7 @@ func TestChunkSize(t *testing.T) { } // no index createFakeResultForRandomSplit(mock, 1000, nil) - randomIter, err = NewRandomIterator(ctx, "", tableDiff_noindex, db) + randomIter, err = NewRandomIterator(ctx, "", tableDiffNoIndex, db) require.NoError(t, err) require.Equal(t, randomIter.chunkSize, int64(1001)) diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go index b9604f81129..7aadf2fbc59 100644 --- a/sync_diff_inspector/utils/pd.go +++ b/sync_diff_inspector/utils/pd.go @@ -177,6 +177,7 @@ func parseVersion(versionStr string) (*semver.Version, error) { return semver.NewVersion(versionStr) } +// TryToGetVersion gets the version of current db. // It's OK to failed to get db version func TryToGetVersion(ctx context.Context, db *sql.DB) *semver.Version { versionStr, err := dbutil.GetDBVersion(ctx, db) @@ -267,6 +268,7 @@ func parseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) { return uint64(tso.Int64*1000) << 18, nil } +// GetSnapshot gets the snapshot func GetSnapshot(ctx context.Context, db *sql.DB) ([]string, error) { query := "SHOW MASTER STATUS;" rows, err := db.QueryContext(ctx, query) diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go index 10f73e588d3..6cd2fae078b 100644 --- a/sync_diff_inspector/utils/table.go +++ b/sync_diff_inspector/utils/table.go @@ -37,8 +37,8 @@ import ( ) const ( - AnnotationClusteredReplaceString = "${1} /*T![clustered_index] CLUSTERED */${2}\n" - AnnotationNonClusteredReplaceString = "${1} /*T![clustered_index] NONCLUSTERED */${2}\n" + annotationClusteredReplaceString = "${1} /*T![clustered_index] CLUSTERED */${2}\n" + annotationNonClusteredReplaceString = "${1} /*T![clustered_index] NONCLUSTERED */${2}\n" ) func init() { @@ -116,7 +116,7 @@ func getTableInfoBySQL(ctx *metabuild.Context, createTableSQL string, parser2 *p return table, nil } - return nil, errors.Errorf("get table info from sql %s failed!", createTableSQL) + return nil, errors.Errorf("get table info from sql %s failed", createTableSQL) } func isPKISHandle( @@ -126,15 +126,20 @@ func isPKISHandle( ) bool { query := fmt.Sprintf("SELECT _tidb_rowid FROM %s LIMIT 0;", dbutil.TableName(schemaName, tableName)) rows, err := db.QueryContext(ctx, query) + defer func() { + if rows != nil { + _ = rows.Err() + rows.Close() + } + }() + if err != nil && strings.Contains(err.Error(), "Unknown column") { return true } - if rows != nil { - rows.Close() - } return false } +// GetTableInfoWithVersion returns table info under given version. func GetTableInfoWithVersion( ctx context.Context, db dbutil.QueryExecutor, @@ -149,9 +154,9 @@ func GetTableInfoWithVersion( if version != nil && version.Major <= 4 { var replaceString string if isPKISHandle(ctx, db, schemaName, tableName) { - replaceString = AnnotationClusteredReplaceString + replaceString = annotationClusteredReplaceString } else { - replaceString = AnnotationNonClusteredReplaceString + replaceString = annotationNonClusteredReplaceString } createTableSQL, err = addClusteredAnnotationForPrimaryKey(createTableSQL, replaceString) if err != nil { diff --git a/sync_diff_inspector/utils/utils.go b/sync_diff_inspector/utils/utils.go index 5e4351c1c7e..354ae30eeb9 100644 --- a/sync_diff_inspector/utils/utils.go +++ b/sync_diff_inspector/utils/utils.go @@ -39,10 +39,12 @@ import ( // which yields redacted string when being marshaled. type SecretString string +// MarshalJSON return fixed string for SerectString func (s SecretString) MarshalJSON() ([]byte, error) { return []byte(`"******"`), nil } +// String return fixed string for SerectString func (s SecretString) String() string { return "******" } @@ -130,7 +132,7 @@ func (pool *WorkerPool) HasWorker() bool { return len(pool.workers) > 0 } -// WaitFinished waits till the pool finishs all the tasks. +// WaitFinished waits till the pool finishes all the tasks. func (pool *WorkerPool) WaitFinished() { pool.wg.Wait() } @@ -211,7 +213,7 @@ func GenerateReplaceDML(data map[string]*dbutil.ColumnData, table *model.TableIn return fmt.Sprintf("REPLACE INTO %s(%s) VALUES (%s);", dbutil.TableName(schema, table.Name.O), strings.Join(colNames, ","), strings.Join(values, ",")) } -// GerateReplaceDMLWithAnnotation returns the replace SQL for the specific 2 rows. +// GenerateReplaceDMLWithAnnotation returns the replace SQL for the specific 2 rows. // And add Annotations to show the different columns. func GenerateReplaceDMLWithAnnotation(source, target map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string { sqlColNames := make([]string, 0, len(table.Columns)) @@ -286,7 +288,7 @@ func GenerateReplaceDMLWithAnnotation(source, target map[string]*dbutil.ColumnDa return fmt.Sprintf("/*\n%s*/\nREPLACE INTO %s(%s) VALUES (%s);", tableString.String(), dbutil.TableName(schema, table.Name.O), strings.Join(sqlColNames, ","), strings.Join(sqlValues, ",")) } -// GerateReplaceDMLWithAnnotation returns the delete SQL for the specific row. +// GenerateDeleteDML returns the delete SQL for the specific row. func GenerateDeleteDML(data map[string]*dbutil.ColumnData, table *model.TableInfo, schema string) string { kvs := make([]string, 0, len(table.Columns)) for _, col := range table.Columns { @@ -649,25 +651,25 @@ func CompareData(map1, map2 map[string]*dbutil.ColumnData, orderKeyCols, columns cmp = 1 } break - } else { - num1, err1 := strconv.ParseFloat(string(data1.Data), 64) - num2, err2 := strconv.ParseFloat(string(data2.Data), 64) - if err1 != nil || err2 != nil { - err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", string(data1.Data), string(data2.Data), err1, err2) - return - } + } - if num1 == num2 { - continue - } + num1, err1 := strconv.ParseFloat(string(data1.Data), 64) + num2, err2 := strconv.ParseFloat(string(data2.Data), 64) + if err1 != nil || err2 != nil { + err = errors.Errorf("convert %s, %s to float failed, err1: %v, err2: %v", string(data1.Data), string(data2.Data), err1, err2) + return + } - if num1 < num2 { - cmp = -1 - } else { - cmp = 1 - } - break + if num1 == num2 { + continue } + + if num1 < num2 { + cmp = -1 + } else { + cmp = 1 + } + break } return @@ -773,8 +775,8 @@ func GetTableSize(ctx context.Context, db *sql.DB, schemaName, tableName string) return dataSize.Int64, nil } -// GetCountAndMd5Checksum returns checksum code and count of some data by given condition -func GetCountAndMd5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, args []interface{}) (int64, uint64, error) { +// GetCountAndMD5Checksum returns checksum code and count of some data by given condition +func GetCountAndMD5Checksum(ctx context.Context, db *sql.DB, schemaName, tableName string, tbInfo *model.TableInfo, limitRange string, args []interface{}) (int64, uint64, error) { /* calculate MD5 checksum and count example: mysql> SELECT COUNT(*) as CNT, BIT_XOR(CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 1, 16), 16, 10) AS UNSIGNED) ^ CAST(CONV(SUBSTRING(MD5(CONCAT_WS(',', `id`, `name`, CONCAT(ISNULL(`id`), ISNULL(`name`)))), 17, 16), 16, 10) AS UNSIGNED)) as CHECKSUM FROM `a`.`t`; @@ -964,7 +966,7 @@ func GetBetterIndex(ctx context.Context, db *sql.DB, schema, table string, table return []*model.IndexInfo{index}, nil } } - sels := make([]float64, len(indices)) + sels := make([]float64, 0, len(indices)) for _, index := range indices { column := GetColumnsFromIndex(index, tableInfo)[0] selectivity, err := GetSelectivity(ctx, db, schema, table, column.Name.O, tableInfo) @@ -1002,7 +1004,7 @@ func GetSelectivity(ctx context.Context, db *sql.DB, schemaName, tableName, colu func CalculateChunkSize(rowCount int64) int64 { // we assume chunkSize is 50000 for any cluster. chunkSize := int64(50000) - if rowCount > int64(chunkSize)*10000 { + if rowCount > chunkSize*10000 { // we assume we only need 10k chunks for any table. chunkSize = rowCount / 10000 } @@ -1016,12 +1018,12 @@ func AnalyzeTable(ctx context.Context, db *sql.DB, tableName string) error { } // GetSQLFileName returns filename of fix-SQL identified by chunk's `Index`. -func GetSQLFileName(index *chunk.ChunkID) string { +func GetSQLFileName(index *chunk.CID) string { return fmt.Sprintf("%d:%d-%d:%d", index.TableIndex, index.BucketIndexLeft, index.BucketIndexRight, index.ChunkIndex) } -// GetChunkIDFromSQLFileName convert the filename to chunk's `Index`. -func GetChunkIDFromSQLFileName(fileIDStr string) (int, int, int, int, error) { +// GetCIDFromSQLFileName convert the filename to chunk's `Index`. +func GetCIDFromSQLFileName(fileIDStr string) (int, int, int, int, error) { ids := strings.Split(fileIDStr, ":") tableIndex, err := strconv.Atoi(ids[0]) if err != nil { @@ -1051,6 +1053,7 @@ func IsRangeTrivial(rangeCond string) bool { return strings.ToLower(rangeCond) == "true" } +// IsBinaryColumn checks if the given column is a binary column func IsBinaryColumn(col *model.ColumnInfo) bool { // varbinary or binary return (col.GetType() == mysql.TypeVarchar || col.GetType() == mysql.TypeString) && mysql.HasBinaryFlag(col.GetFlag()) diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go index fde005ebb3c..a768266eca6 100644 --- a/sync_diff_inspector/utils/utils_test.go +++ b/sync_diff_inspector/utils/utils_test.go @@ -49,8 +49,8 @@ func TestWorkerPool(t *testing.T) { infoCh <- 2 }) pool.Apply(func() { - new_v := <-infoCh - v = new_v + newV := <-infoCh + v = newV doneCh <- struct{}{} }) <-doneCh @@ -257,7 +257,7 @@ func TestBasicTableUtilOperation(t *testing.T) { require.Equal(t, tableInfo.Indices[0].Columns[1].Offset, 1) } -func TestGetCountAndMd5Checksum(t *testing.T) { +func TestGetCountAndMD5Checksum(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) defer cancel() @@ -271,7 +271,7 @@ func TestGetCountAndMd5Checksum(t *testing.T) { mock.ExpectQuery("SELECT COUNT.*FROM `test_schema`\\.`test_table` WHERE \\[23 45\\].*").WithArgs("123", "234").WillReturnRows(sqlmock.NewRows([]string{"CNT", "CHECKSUM"}).AddRow(123, 456)) - count, checksum, err := GetCountAndMd5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", []interface{}{"123", "234"}) + count, checksum, err := GetCountAndMD5Checksum(ctx, conn, "test_schema", "test_table", tableInfo, "[23 45]", []interface{}{"123", "234"}) require.NoError(t, err) require.Equal(t, count, int64(123)) require.Equal(t, checksum, uint64(0x1c8)) @@ -501,7 +501,7 @@ func TestCalculateChunkSize(t *testing.T) { } func TestGetSQLFileName(t *testing.T) { - index := &chunk.ChunkID{ + index := &chunk.CID{ TableIndex: 1, BucketIndexLeft: 2, BucketIndexRight: 3, @@ -511,8 +511,8 @@ func TestGetSQLFileName(t *testing.T) { require.Equal(t, GetSQLFileName(index), "1:2-3:4") } -func TestGetChunkIDFromSQLFileName(t *testing.T) { - tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := GetChunkIDFromSQLFileName("11:12-13:14") +func TestGetCIDFromSQLFileName(t *testing.T) { + tableIndex, bucketIndexLeft, bucketIndexRight, chunkIndex, err := GetCIDFromSQLFileName("11:12-13:14") require.NoError(t, err) require.Equal(t, tableIndex, 11) require.Equal(t, bucketIndexLeft, 12) @@ -620,16 +620,16 @@ func TestGenerateSQLBlob(t *testing.T) { } cases := []struct { - createTableSql string + createTableSQL string }{ - {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` tinyblob)"}, - {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` blob)"}, - {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` mediumblob)"}, - {createTableSql: "CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` longblob)"}, + {"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` tinyblob)"}, + {"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` blob)"}, + {"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` mediumblob)"}, + {"CREATE TABLE `diff_test`.`atest` (`id` int primary key, `b` longblob)"}, } for _, c := range cases { - tableInfo, err := dbutiltest.GetTableInfoBySQL(c.createTableSql, parser.New()) + tableInfo, err := dbutiltest.GetTableInfoBySQL(c.createTableSQL, parser.New()) require.NoError(t, err) replaceSQL := GenerateReplaceDML(rowsData, tableInfo, "diff_test") From ad27384ca805edebdffbe8e9d729e5550bcf11f7 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 17:38:32 +0800 Subject: [PATCH 06/22] Update Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 82b2501c711..1b185159ef4 100644 --- a/Makefile +++ b/Makefile @@ -445,7 +445,7 @@ dm_unit_test_in_verify_ci: check_failpoint_ctl tools/bin/gotestsum tools/bin/goc tools/bin/gocov convert "$(DM_TEST_DIR)/cov.unit_test.out" | tools/bin/gocov-xml > dm-coverage.xml $(FAILPOINT_DISABLE) -dm_integration_test_build: check_failpoint_ctl +dm_integration_test_build: check_failpoint_ctl sync_diff_inspector $(FAILPOINT_ENABLE) $(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \ -coverpkg=github.com/pingcap/tiflow/dm/... \ @@ -475,7 +475,7 @@ dm_integration_test_build_worker: check_failpoint_ctl $(FAILPOINT_DISABLE) ./dm/tests/prepare_tools.sh -dm_integration_test_build_master: check_failpoint_ctl +dm_integration_test_build_master: check_failpoint_ctl sync_diff_inspector $(FAILPOINT_ENABLE) $(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \ -coverpkg=github.com/pingcap/tiflow/dm/... \ From 42c8f8cd77e9f47bd548da84bd6b5e0fb611d658 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Mon, 21 Oct 2024 18:00:33 +0800 Subject: [PATCH 07/22] Update Makefile --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1b185159ef4..82e58f89e13 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ ### Makefile for tiflow -.PHONY: build test check clean fmt sync-diff-inspector cdc kafka_consumer storage_consumer coverage \ +.PHONY: build test check clean fmt sync_diff_inspector cdc kafka_consumer storage_consumer coverage \ integration_test_build integration_test integration_test_mysql integration_test_kafka bank \ kafka_docker_integration_test kafka_docker_integration_test_with_build \ clean_integration_test_containers \ @@ -499,6 +499,7 @@ install_test_python_dep: check_third_party_binary_for_dm: @which bin/tidb-server + @which bin/sync_diff_inspector @which mysql @which bin/minio From 2b933843f186f3e99845d7d9d1987344eb104f93 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 22 Oct 2024 12:04:28 +0800 Subject: [PATCH 08/22] Fix tests --- sync_diff_inspector/config/config_test.go | 2 +- sync_diff_inspector/progress/progress.go | 42 +++++++++++++++---- sync_diff_inspector/splitter/splitter_test.go | 22 +++++++--- sync_diff_inspector/utils/utils_test.go | 13 +++--- 4 files changed, 56 insertions(+), 23 deletions(-) diff --git a/sync_diff_inspector/config/config_test.go b/sync_diff_inspector/config/config_test.go index 5a36caa4504..24526446d35 100644 --- a/sync_diff_inspector/config/config_test.go +++ b/sync_diff_inspector/config/config_test.go @@ -38,7 +38,7 @@ func TestParseConfig(t *testing.T) { require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"})) // we change the config from config.toml to config_sharding.toml // this action will raise error. - require.Contains(t, cfg.Init().Error(), "failed to init Task: config changes breaking the checkpoint, please use another outputDir and start over again!") + require.Contains(t, cfg.Init().Error(), "failed to init Task: config changes breaking the checkpoint, please use another outputDir and start over again") require.NoError(t, os.RemoveAll(cfg.Task.OutputDir)) require.Nil(t, cfg.Parse([]string{"--config", "config_sharding.toml"})) diff --git a/sync_diff_inspector/progress/progress.go b/sync_diff_inspector/progress/progress.go index 27f9ec7538a..bbd9c87b5c9 100644 --- a/sync_diff_inspector/progress/progress.go +++ b/sync_diff_inspector/progress/progress.go @@ -19,16 +19,40 @@ import ( "io" "os" "strings" + "sync" "time" "github.com/pingcap/tiflow/sync_diff_inspector/source/common" ) +type atomicWriter struct { + mu sync.Mutex + writer io.Writer +} + +func (aw *atomicWriter) Set(writer io.Writer) { + aw.mu.Lock() + defer aw.mu.Unlock() + aw.writer = writer +} + +func (aw *atomicWriter) Write(s string, args ...any) { + aw.mu.Lock() + defer aw.mu.Unlock() + fmt.Fprintf(aw.writer, s, args...) +} + +func (aw *atomicWriter) Get() io.Writer { + aw.mu.Lock() + defer aw.mu.Unlock() + return aw.writer +} + type tableProgressPrinter struct { tableList *list.List tableFailList *list.List tableMap map[string]*list.Element - output io.Writer + output atomicWriter lines int progressTableNums int @@ -108,12 +132,12 @@ func newTableProgressPrinter(tableNums int, finishTableNums int) *tableProgressP } tpp.init() go tpp.serve() - fmt.Fprintf(tpp.output, "A total of %d tables need to be compared\n\n\n", tableNums) + tpp.output.Write("A total of %d tables need to be compared\n\n\n", tableNums) return tpp } func (tpp *tableProgressPrinter) SetOutput(output io.Writer) { - tpp.output = output + tpp.output.Set(output) } func (tpp *tableProgressPrinter) Inc(name string) { @@ -217,7 +241,7 @@ func (tpp *tableProgressPrinter) PrintSummary() { ) } - fmt.Fprintf(tpp.output, "%s%s\n", cleanStr, fixStr) + tpp.output.Write("%s%s\n", cleanStr, fixStr) } func (tpp *tableProgressPrinter) Error(err error) { @@ -228,7 +252,7 @@ func (tpp *tableProgressPrinter) Error(err error) { var cleanStr, fixStr string cleanStr = "\x1b[1A\x1b[J" fixStr = fmt.Sprintf("\nError in comparison process:\n%v\n\nYou can view the comparison details through './output_dir/sync_diff_inspector.log'\n", err) - fmt.Fprintf(tpp.output, "%s%s", cleanStr, fixStr) + tpp.output.Write("%s%s", cleanStr, fixStr) } func (tpp *tableProgressPrinter) init() { @@ -236,7 +260,7 @@ func (tpp *tableProgressPrinter) init() { state: tableStateHead, }) - tpp.output = os.Stdout + tpp.output.Set(os.Stdout) } func (tpp *tableProgressPrinter) serve() { @@ -407,16 +431,16 @@ func (tpp *tableProgressPrinter) flush(stateIsChanged bool) { } dynStr = fmt.Sprintf("%s_____________________________________________________________________________\n", dynStr) - fmt.Fprintf(tpp.output, "%s%s%s", cleanStr, fixStr, dynStr) + tpp.output.Write("%s%s%s", cleanStr, fixStr, dynStr) } else { - fmt.Fprint(tpp.output, "\x1b[1A\x1b[J") + tpp.output.Write("\x1b[1A\x1b[J") } // show bar // 60 '='+'-' coe := float32(tpp.progressTableNums*tpp.progress)/float32(tpp.tableNums*(tpp.total+1)) + float32(tpp.finishTableNums)/float32(tpp.tableNums) numLeft := int(60 * coe) percent := int(100 * coe) - fmt.Fprintf(tpp.output, "Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total) + tpp.output.Write("Progress [%s>%s] %d%% %d/%d\n", strings.Repeat("=", numLeft), strings.Repeat("-", 60-numLeft), percent, tpp.progress, tpp.total) } var progress *tableProgressPrinter = nil diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go index 5d312bddcad..760b642c01d 100644 --- a/sync_diff_inspector/splitter/splitter_test.go +++ b/sync_diff_inspector/splitter/splitter_test.go @@ -738,8 +738,6 @@ func createFakeResultForRandom(mock sqlmock.Sqlmock, aRandomValues, bRandomValue func TestLimitSpliter(t *testing.T) { ctx := context.Background() - db, mock, err := sqlmock.New() - require.NoError(t, err) createTableSQL := "create table `test`.`test`(`a` int, `b` varchar(10), `c` float, `d` datetime, primary key(`a`, `b`))" tableInfo, err := dbutiltest.GetTableInfoBySQL(createTableSQL, parser.New()) @@ -782,6 +780,10 @@ func TestLimitSpliter(t *testing.T) { } for _, testCase := range testCases { + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + createFakeResultForLimitSplit(mock, testCase.limitAValues, testCase.limitBValues, true) iter, err := NewLimitIterator(ctx, "", tableDiff, db) @@ -801,10 +803,14 @@ func TestLimitSpliter(t *testing.T) { } } + db2, mock2, err := sqlmock.New() + require.NoError(t, err) + defer db2.Close() + // Test Checkpoint stopJ := 2 - createFakeResultForLimitSplit(mock, testCases[0].limitAValues[:stopJ], testCases[0].limitBValues[:stopJ], true) - iter, err := NewLimitIterator(ctx, "", tableDiff, db) + createFakeResultForLimitSplit(mock2, testCases[0].limitAValues[:stopJ], testCases[0].limitBValues[:stopJ], true) + iter, err := NewLimitIterator(ctx, "", tableDiff, db2) require.NoError(t, err) j := 0 var chunk *chunk.Range @@ -819,8 +825,12 @@ func TestLimitSpliter(t *testing.T) { IndexID: iter.GetIndexID(), } - createFakeResultForLimitSplit(mock, testCases[0].limitAValues[stopJ:], testCases[0].limitBValues[stopJ:], true) - iter, err = NewLimitIteratorWithCheckpoint(ctx, "", tableDiff, db, rangeInfo) + db3, mock3, err := sqlmock.New() + require.NoError(t, err) + defer db3.Close() + + createFakeResultForLimitSplit(mock3, testCases[0].limitAValues[stopJ:], testCases[0].limitBValues[stopJ:], true) + iter, err = NewLimitIteratorWithCheckpoint(ctx, "", tableDiff, db3, rangeInfo) require.NoError(t, err) chunk, err = iter.Next() require.NoError(t, err) diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go index a768266eca6..6454e27ef4b 100644 --- a/sync_diff_inspector/utils/utils_test.go +++ b/sync_diff_inspector/utils/utils_test.go @@ -402,8 +402,7 @@ func TestGetTableSize(t *testing.T) { } func TestGetBetterIndex(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() conn, mock, err := sqlmock.New() require.NoError(t, err) defer conn.Close() @@ -445,9 +444,9 @@ func TestGetBetterIndex(t *testing.T) { {"7", "d"}, {"8", "d"}, {"9", "e"}, - {"A", "e"}, - {"B", "f"}, - {"C", "f"}, + {"10", "e"}, + {"11", "f"}, + {"12", "f"}, }, indices: []string{"a", "b"}, sels: []float64{1.0, 0.5}, @@ -488,8 +487,8 @@ func TestGetBetterIndex(t *testing.T) { require.NoError(t, err) require.Equal(t, sel, tableCase.sels[i]) } - mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2")) - mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("5")) + mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"ESL"}).AddRow("5")) + mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2")) indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo) require.NoError(t, err) require.Equal(t, indices[0].Name.O, tableCase.selected) From 8b501e45b319f58c79553a872e4310afdb2ea6fd Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 22 Oct 2024 13:55:45 +0800 Subject: [PATCH 09/22] Revert scripts --- Makefile | 2 +- .../download-compatibility-test-binaries.sh | 6 +++++ .../download-integration-test-binaries.sh | 5 ++++ dm/tests/mariadb_master_down_and_up/case.sh | 1 + dm/tests/mariadb_master_down_and_up/lib.sh | 6 +++++ dm/tests/tiup/lib.sh | 6 +++++ dm/tests/tiup/upgrade-from-v1.sh | 2 ++ dm/tests/tiup/upgrade-from-v2.sh | 2 ++ dm/tests/tiup/upgrade-tidb.sh | 2 ++ dm/tests/upstream_switch/case.sh | 1 + dm/tests/upstream_switch/lib.sh | 6 +++++ scripts/download-integration-test-binaries.sh | 4 +++- scripts/download-sync-diff.sh | 23 +++++++++++++++++++ 13 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 scripts/download-sync-diff.sh diff --git a/Makefile b/Makefile index 82e58f89e13..44f4366ba69 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ .DEFAULT_GOAL := default # Adapted from https://www.thapaliya.com/en/writings/well-documented-makefiles/ -help: ## Display this help ann any documented user-facing targets. Other undocumented targets may be present in the Makefile. +help: ## Display this help and any documented user-facing targets. Other undocumented targets may be present in the Makefile. help: @awk 'BEGIN {FS = ": ##"; printf "Usage:\n make \n\nTargets:\n"} /^[a-zA-Z0-9_\.\-\/%]+: ##/ { printf " %-45s %s\n", $$1, $$2 }' $(MAKEFILE_LIST) diff --git a/dm/tests/download-compatibility-test-binaries.sh b/dm/tests/download-compatibility-test-binaries.sh index e71a740ccda..df29fa3e303 100755 --- a/dm/tests/download-compatibility-test-binaries.sh +++ b/dm/tests/download-compatibility-test-binaries.sh @@ -77,6 +77,7 @@ main() { # Define download URLs local download_urls=( "${FILE_SERVER_URL}/download/builds/pingcap/tidb/${tidb_sha1}/centos7/tidb-server.tar.gz" + "http://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz" "http://download.pingcap.org/tidb-enterprise-tools-latest-linux-amd64.tar.gz" "${GITHUB_RELEASE_URL}/gh-ost-binary-linux-20200828140552.tar.gz" "${FILE_SERVER_URL}/download/minio.tar.gz" @@ -97,6 +98,11 @@ main() { extract "$filename" "$THIRD_BIN_DIR" "bin/tidb-server" mv "${THIRD_BIN_DIR}/bin/tidb-server" "$THIRD_BIN_DIR/" ;; + tidb-enterprise-tools-nightly-linux-amd64.tar.gz) + extract "$filename" "$THIRD_BIN_DIR" "tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector" + mv "${THIRD_BIN_DIR}/tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector" "$THIRD_BIN_DIR/" + rm -rf "${THIRD_BIN_DIR}/tidb-enterprise-tools-nightly-linux-amd64" + ;; tidb-enterprise-tools-latest-linux-amd64.tar.gz) extract "$filename" "$THIRD_BIN_DIR" "tidb-enterprise-tools-latest-linux-amd64/bin/mydumper" mv "${THIRD_BIN_DIR}/tidb-enterprise-tools-latest-linux-amd64/bin/mydumper" "$THIRD_BIN_DIR/" diff --git a/dm/tests/download-integration-test-binaries.sh b/dm/tests/download-integration-test-binaries.sh index d5c96eff7de..6dd88b767a4 100755 --- a/dm/tests/download-integration-test-binaries.sh +++ b/dm/tests/download-integration-test-binaries.sh @@ -77,6 +77,7 @@ main() { "${FILE_SERVER_URL}/download/builds/pingcap/tidb/${tidb_sha1}/centos7/tidb-server.tar.gz" "${FILE_SERVER_URL}/download/builds/pingcap/tikv/${tikv_sha1}/centos7/tikv-server.tar.gz" "${FILE_SERVER_URL}/download/builds/pingcap/pd/${pd_sha1}/centos7/pd-server.tar.gz" + "${FILE_SERVER_URL}/download/builds/pingcap/tidb-tools/${tidb_tools_sha1}/centos7/tidb-tools.tar.gz" "${GITHUB_RELEASE_URL}/gh-ost-binary-linux-20200828140552.tar.gz" "${FILE_SERVER_URL}/download/minio.tar.gz" ) @@ -104,6 +105,10 @@ main() { tar -xz -C "$THIRD_BIN_DIR" bin/tikv-server -f "${TEMP_DIR}/${filename}" mv "${THIRD_BIN_DIR}/bin/tikv-server" "$THIRD_BIN_DIR/" ;; + tidb-tools.tar.gz) + tar -xz -C "$THIRD_BIN_DIR" 'bin/sync_diff_inspector' -f "${TEMP_DIR}/${filename}" + mv "${THIRD_BIN_DIR}/bin/sync_diff_inspector" "$THIRD_BIN_DIR/" + ;; minio.tar.gz | gh-ost-binary-linux-20200828140552.tar.gz) tar -xz -C "$THIRD_BIN_DIR" -f "${TEMP_DIR}/${filename}" ;; diff --git a/dm/tests/mariadb_master_down_and_up/case.sh b/dm/tests/mariadb_master_down_and_up/case.sh index dc59fe3e1b2..2941263415c 100644 --- a/dm/tests/mariadb_master_down_and_up/case.sh +++ b/dm/tests/mariadb_master_down_and_up/case.sh @@ -108,6 +108,7 @@ function test_master_down_and_up() { cleanup_process clean_data setup_replica + install_sync_diff gen_full_data run_dm_components_and_create_source $1 start_task diff --git a/dm/tests/mariadb_master_down_and_up/lib.sh b/dm/tests/mariadb_master_down_and_up/lib.sh index 4a548c73425..3d38de273e7 100644 --- a/dm/tests/mariadb_master_down_and_up/lib.sh +++ b/dm/tests/mariadb_master_down_and_up/lib.sh @@ -27,6 +27,12 @@ function exec_tidb() { echo $2 | mysql -uroot -h127.0.0.1 -P$1 } +function install_sync_diff() { + curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz + mkdir -p bin + mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ +} + function get_master_status() { arr=$(echo "show master status;" | MYSQL_PWD=123456 mysql -uroot -h127.0.0.1 -P3306 | awk 'NR==2') echo $arr diff --git a/dm/tests/tiup/lib.sh b/dm/tests/tiup/lib.sh index 441fd2da753..8b57d9355e7 100755 --- a/dm/tests/tiup/lib.sh +++ b/dm/tests/tiup/lib.sh @@ -56,6 +56,12 @@ function run_sql_tidb_with_retry() { fi } +function install_sync_diff() { + curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz + mkdir -p bin + mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ +} + function exec_full_stage() { # drop previous data exec_sql mysql1 3306 "DROP DATABASE IF EXISTS $DB1;" diff --git a/dm/tests/tiup/upgrade-from-v1.sh b/dm/tests/tiup/upgrade-from-v1.sh index 75b4244efb0..dcf95ea03fd 100755 --- a/dm/tests/tiup/upgrade-from-v1.sh +++ b/dm/tests/tiup/upgrade-from-v1.sh @@ -122,6 +122,8 @@ function destroy_v2_by_tiup() { } function test() { + install_sync_diff + deploy_v1_by_ansible migrate_in_v1 diff --git a/dm/tests/tiup/upgrade-from-v2.sh b/dm/tests/tiup/upgrade-from-v2.sh index 1a1252e94b2..f5781c3002c 100755 --- a/dm/tests/tiup/upgrade-from-v2.sh +++ b/dm/tests/tiup/upgrade-from-v2.sh @@ -170,6 +170,8 @@ function destroy_v2_by_tiup() { } function test() { + install_sync_diff + deploy_previous_v2 migrate_in_previous_v2 diff --git a/dm/tests/tiup/upgrade-tidb.sh b/dm/tests/tiup/upgrade-tidb.sh index 1207e512f27..434c74cc7a9 100755 --- a/dm/tests/tiup/upgrade-tidb.sh +++ b/dm/tests/tiup/upgrade-tidb.sh @@ -52,6 +52,8 @@ function destroy_v2_by_tiup() { # run this before upgrade TiDB. function before_upgrade() { + install_sync_diff + deploy_dm migrate_before_upgrade diff --git a/dm/tests/upstream_switch/case.sh b/dm/tests/upstream_switch/case.sh index 185ebdbd878..012b4df8ff3 100644 --- a/dm/tests/upstream_switch/case.sh +++ b/dm/tests/upstream_switch/case.sh @@ -208,6 +208,7 @@ function check_master() { function test_relay() { cleanup_process check_master + install_sync_diff clean_data prepare_binlogs setup_replica diff --git a/dm/tests/upstream_switch/lib.sh b/dm/tests/upstream_switch/lib.sh index b11537d988f..65064fb4cb6 100644 --- a/dm/tests/upstream_switch/lib.sh +++ b/dm/tests/upstream_switch/lib.sh @@ -30,6 +30,12 @@ function exec_tidb() { echo $2 | mysql -uroot -h$1 -P4000 } +function install_sync_diff() { + curl https://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz + mkdir -p bin + mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ +} + function prepare_more_binlogs() { exec_sql $1 "create database db1 collate latin1_bin;" exec_sql $1 "flush logs;" diff --git a/scripts/download-integration-test-binaries.sh b/scripts/download-integration-test-binaries.sh index ec0d8849438..765d848aede 100755 --- a/scripts/download-integration-test-binaries.sh +++ b/scripts/download-integration-test-binaries.sh @@ -91,7 +91,7 @@ download_community_binaries() { mv ${THIRD_BIN_DIR}/tiflash ${THIRD_BIN_DIR}/_tiflash mv ${THIRD_BIN_DIR}/_tiflash/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/_tiflash tar -xz -C ${THIRD_BIN_DIR} pd-ctl -f ${TMP_DIR}/$tidb_file_name/ctl-${dist}.tar.gz - tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl -f ${TMP_DIR}/$toolkit_tar_name + tar -xz -C ${THIRD_BIN_DIR} $toolkit_file_name/etcdctl $toolkit_file_name/sync_diff_inspector -f ${TMP_DIR}/$toolkit_tar_name mv ${THIRD_BIN_DIR}/$toolkit_file_name/* ${THIRD_BIN_DIR} && rm -rf ${THIRD_BIN_DIR}/$toolkit_file_name # Download additional tools @@ -147,6 +147,7 @@ download_binaries() { local minio_download_url="${FILE_SERVER_URL}/download/minio.tar.gz" local go_ycsb_download_url="${FILE_SERVER_URL}/download/builds/pingcap/go-ycsb/test-br/go-ycsb" local etcd_download_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/etcd-v3.4.7-linux-amd64.tar.gz" + local sync_diff_inspector_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/sync_diff_inspector_hash-a129f096_linux-amd64.tar.gz" local jq_download_url="${FILE_SERVER_URL}/download/builds/pingcap/test/jq-1.6/jq-linux64" local schema_registry_url="${FILE_SERVER_URL}/download/builds/pingcap/cdc/schema-registry.tar.gz" @@ -157,6 +158,7 @@ download_binaries() { download_and_extract "$tiflash_download_url" "tiflash.tar.gz" download_and_extract "$minio_download_url" "minio.tar.gz" download_and_extract "$etcd_download_url" "etcd.tar.gz" "etcd-v3.4.7-linux-amd64/etcdctl" + download_and_extract "$sync_diff_inspector_url" "sync_diff_inspector.tar.gz" download_and_extract "$schema_registry_url" "schema-registry.tar.gz" download_file "$go_ycsb_download_url" "go-ycsb" "${THIRD_BIN_DIR}/go-ycsb" diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh new file mode 100644 index 00000000000..e154ed2900c --- /dev/null +++ b/scripts/download-sync-diff.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Copyright 2022 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu + +echo "will download tidb-tools v6.1.0 to get sync_diff_inspector" +curl -C - --retry 3 -o /tmp/tidb-tools.tar.gz https://download.pingcap.org/tidb-community-toolkit-v6.1.0-linux-amd64.tar.gz +mkdir -p /tmp/tidb-tools +tar -zxf /tmp/tidb-tools.tar.gz -C /tmp/tidb-tools +mv /tmp/tidb-tools/tidb-community-toolkit-v6.1.0-linux-amd64/sync_diff_inspector ./bin/sync_diff_inspector +rm -r /tmp/tidb-tools +rm /tmp/tidb-tools.tar.gz \ No newline at end of file From ee0b72cd66c7f9d8ddf2be88d1b30fd609199a57 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 22 Oct 2024 13:59:51 +0800 Subject: [PATCH 10/22] Revert scripts --- dm/tests/mariadb_master_down_and_up/case.sh | 2 +- dm/tests/tiup/upgrade-from-v1.sh | 2 +- scripts/download-sync-diff.sh | 0 tests/integration_tests/README.md | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 scripts/download-sync-diff.sh diff --git a/dm/tests/mariadb_master_down_and_up/case.sh b/dm/tests/mariadb_master_down_and_up/case.sh index 2941263415c..e678d3bcfd2 100644 --- a/dm/tests/mariadb_master_down_and_up/case.sh +++ b/dm/tests/mariadb_master_down_and_up/case.sh @@ -107,8 +107,8 @@ function clean_task() { function test_master_down_and_up() { cleanup_process clean_data - setup_replica install_sync_diff + setup_replica gen_full_data run_dm_components_and_create_source $1 start_task diff --git a/dm/tests/tiup/upgrade-from-v1.sh b/dm/tests/tiup/upgrade-from-v1.sh index dcf95ea03fd..3520dd0f7b9 100755 --- a/dm/tests/tiup/upgrade-from-v1.sh +++ b/dm/tests/tiup/upgrade-from-v1.sh @@ -123,7 +123,7 @@ function destroy_v2_by_tiup() { function test() { install_sync_diff - + deploy_v1_by_ansible migrate_in_v1 diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh old mode 100644 new mode 100755 diff --git a/tests/integration_tests/README.md b/tests/integration_tests/README.md index b2c3d471f85..483f697338f 100644 --- a/tests/integration_tests/README.md +++ b/tests/integration_tests/README.md @@ -14,6 +14,7 @@ If you need to specify a version, os or arch, you can use, for example: `make pr * `pd-ctl` # version >= 6.0.0-rc.1 * `tiflash` # tiflash binary * `libc++.so, libc++abi.so, libgmssl.so, libtiflash_proxy.so` # some necessary so files related to tiflash + * `sync_diff_inspector` * [go-ycsb](https://github.com/pingcap/go-ycsb) * [etcdctl](https://github.com/etcd-io/etcd/tree/master/etcdctl) * [jq](https://stedolan.github.io/jq/) From 82397bc81d0d9ae4ee63022e8371437ff081ccc2 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 22 Oct 2024 14:00:34 +0800 Subject: [PATCH 11/22] Revert scripts --- scripts/download-sync-diff.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/download-sync-diff.sh b/scripts/download-sync-diff.sh index e154ed2900c..3ee26c6e505 100755 --- a/scripts/download-sync-diff.sh +++ b/scripts/download-sync-diff.sh @@ -20,4 +20,4 @@ mkdir -p /tmp/tidb-tools tar -zxf /tmp/tidb-tools.tar.gz -C /tmp/tidb-tools mv /tmp/tidb-tools/tidb-community-toolkit-v6.1.0-linux-amd64/sync_diff_inspector ./bin/sync_diff_inspector rm -r /tmp/tidb-tools -rm /tmp/tidb-tools.tar.gz \ No newline at end of file +rm /tmp/tidb-tools.tar.gz From 635bd6ecdac99f5a9cba2ae30aa754febd56436a Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 22 Oct 2024 14:16:38 +0800 Subject: [PATCH 12/22] Add TODO --- sync_diff_inspector/utils/table.go | 1 + 1 file changed, 1 insertion(+) diff --git a/sync_diff_inspector/utils/table.go b/sync_diff_inspector/utils/table.go index 6cd2fae078b..4b602bb02fb 100644 --- a/sync_diff_inspector/utils/table.go +++ b/sync_diff_inspector/utils/table.go @@ -169,6 +169,7 @@ func GetTableInfoWithVersion( } sctx := mock.NewContext() // unify the timezone to UTC +0:00 + // TODO(joechenrh): the following code doesn't work on the latest version of tidb. sctx.GetSessionVars().TimeZone = time.UTC sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictTransTables) sctx.GetSessionVars().SQLMode = mysql.DelSQLMode(sctx.GetSessionVars().SQLMode, mysql.ModeStrictAllTables) From ad434cb94cc1e7c1743e5da9197b656d76cf4411 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 22 Oct 2024 15:12:22 +0800 Subject: [PATCH 13/22] Fix tests --- Makefile | 9 +++++---- sync_diff_inspector/source/source_test.go | 22 +++++++--------------- sync_diff_inspector/utils/utils_test.go | 2 +- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 44f4366ba69..38f578b3845 100644 --- a/Makefile +++ b/Makefile @@ -137,7 +137,7 @@ dev: check test test: unit_test dm_unit_test engine_unit_test -build: cdc dm engine sync_diff_inspector +build: cdc dm engine check-makefiles: ## Check the makefiles format. Please run this target after the changes are committed. check-makefiles: format-makefiles @@ -222,13 +222,14 @@ check_third_party_binary: @which bin/pd-server @which bin/tiflash @which bin/pd-ctl + @which bin/sync_diff_inspector @which bin/go-ycsb @which bin/etcdctl @which bin/jq @which bin/minio @which bin/bin/schema-registry-start -integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server sync_diff_inspector +integration_test_build: check_failpoint_ctl storage_consumer kafka_consumer pulsar_consumer oauth2_server $(FAILPOINT_ENABLE) $(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \ -coverpkg=github.com/pingcap/tiflow/... \ @@ -445,7 +446,7 @@ dm_unit_test_in_verify_ci: check_failpoint_ctl tools/bin/gotestsum tools/bin/goc tools/bin/gocov convert "$(DM_TEST_DIR)/cov.unit_test.out" | tools/bin/gocov-xml > dm-coverage.xml $(FAILPOINT_DISABLE) -dm_integration_test_build: check_failpoint_ctl sync_diff_inspector +dm_integration_test_build: check_failpoint_ctl $(FAILPOINT_ENABLE) $(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \ -coverpkg=github.com/pingcap/tiflow/dm/... \ @@ -475,7 +476,7 @@ dm_integration_test_build_worker: check_failpoint_ctl $(FAILPOINT_DISABLE) ./dm/tests/prepare_tools.sh -dm_integration_test_build_master: check_failpoint_ctl sync_diff_inspector +dm_integration_test_build_master: check_failpoint_ctl $(FAILPOINT_ENABLE) $(GOTEST) -ldflags '$(LDFLAGS)' -c -cover -covermode=atomic \ -coverpkg=github.com/pingcap/tiflow/dm/... \ diff --git a/sync_diff_inspector/source/source_test.go b/sync_diff_inspector/source/source_test.go index 9627c825dd4..bc69832547d 100644 --- a/sync_diff_inspector/source/source_test.go +++ b/sync_diff_inspector/source/source_test.go @@ -22,7 +22,6 @@ import ( "regexp" "strconv" "testing" - "time" "github.com/DATA-DOG/go-sqlmock" _ "github.com/go-sql-driver/mysql" @@ -102,8 +101,7 @@ func (m *MockAnalyzer) AnalyzeSplitter(ctx context.Context, tableDiff *common.Ta } func TestTiDBSource(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() conn, mock, err := sqlmock.New() require.NoError(t, err) @@ -270,8 +268,7 @@ func TestTiDBSource(t *testing.T) { } func TestFallbackToRandomIfRangeIsSet(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() conn, mock, err := sqlmock.New() require.NoError(t, err) @@ -319,8 +316,7 @@ func TestFallbackToRandomIfRangeIsSet(t *testing.T) { } func TestMysqlShardSources(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() tableCases := []*tableCaseType{ { @@ -443,8 +439,7 @@ func TestMysqlShardSources(t *testing.T) { } func TestMysqlRouter(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() conn, mock, err := sqlmock.New() require.NoError(t, err) @@ -556,8 +551,7 @@ func TestMysqlRouter(t *testing.T) { } func TestTiDBRouter(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() conn, mock, err := sqlmock.New() require.NoError(t, err) @@ -668,8 +662,7 @@ func TestSource(t *testing.T) { port, err := strconv.Atoi(portstr) require.NoError(t, err) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() router, err := router.NewTableRouter(false, nil) require.NoError(t, err) @@ -768,8 +761,7 @@ func TestRouterRules(t *testing.T) { port, err := strconv.Atoi(portStr) require.NoError(t, err) - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() + ctx := context.Background() r, _ := router.NewTableRouter( false, diff --git a/sync_diff_inspector/utils/utils_test.go b/sync_diff_inspector/utils/utils_test.go index 6454e27ef4b..d5bd5f8eadc 100644 --- a/sync_diff_inspector/utils/utils_test.go +++ b/sync_diff_inspector/utils/utils_test.go @@ -487,7 +487,7 @@ func TestGetBetterIndex(t *testing.T) { require.NoError(t, err) require.Equal(t, sel, tableCase.sels[i]) } - mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"ESL"}).AddRow("5")) + mock.ExpectQuery("SELECT COUNT\\(DISTINCT `a.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("5")) mock.ExpectQuery("SELECT COUNT\\(DISTINCT `b.*").WillReturnRows(sqlmock.NewRows([]string{"SEL"}).AddRow("2")) indices, err = GetBetterIndex(ctx, conn, "single_index", "test1", tableInfo) require.NoError(t, err) From 2d45cacd975af9670e24b65c55602d49d252a6c9 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 25 Oct 2024 11:42:27 +0800 Subject: [PATCH 14/22] Revert all changes related to build --- .github/workflows/dm_binlog_999999.yaml | 6 +++--- Makefile | 9 +++++---- dm/tests/README.md | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/dm_binlog_999999.yaml b/.github/workflows/dm_binlog_999999.yaml index ee722f93f88..fa0aaf93899 100644 --- a/.github/workflows/dm_binlog_999999.yaml +++ b/.github/workflows/dm_binlog_999999.yaml @@ -44,13 +44,13 @@ jobs: key: ${{ runner.os }}-ticdc-tools-${{ hashFiles('tools/check/go.sum') }} - name: Build DM binary - run: | - make dm_integration_test_build - make sync_diff_inspector + run: make dm_integration_test_build - name: Setup CI environment run: | docker-compose -f ./dm/tests/binlog_999999/docker-compose.yml up -d + curl http://download.pingcap.org/tidb-enterprise-tools-nightly-linux-amd64.tar.gz | tar xz + mv tidb-enterprise-tools-nightly-linux-amd64/bin/sync_diff_inspector bin/ curl http://download.pingcap.org/tidb-nightly-linux-amd64.tar.gz | tar xz mv tidb-nightly-linux-amd64/bin/tidb-server bin/ curl -O https://dl.min.io/server/minio/release/linux-amd64/minio diff --git a/Makefile b/Makefile index 38f578b3845..63ce12fbfaf 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ ### Makefile for tiflow -.PHONY: build test check clean fmt sync_diff_inspector cdc kafka_consumer storage_consumer coverage \ +.PHONY: build test check clean fmt cdc kafka_consumer storage_consumer coverage \ integration_test_build integration_test integration_test_mysql integration_test_kafka bank \ kafka_docker_integration_test kafka_docker_integration_test_with_build \ clean_integration_test_containers \ @@ -159,9 +159,6 @@ build-cdc-with-failpoint: ## Build cdc with failpoint enabled. cdc: $(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc ./cmd/cdc -sync_diff_inspector: - $(GOBUILD) -ldflags '$(LDFLAGS)' -o bin/sync_diff_inspector ./sync_diff_inspector/main.go - kafka_consumer: $(CONSUMER_GOBUILD) -ldflags '$(LDFLAGS)' -o bin/cdc_kafka_consumer ./cmd/kafka-consumer @@ -569,6 +566,7 @@ check_third_party_binary_for_engine: @which mysql || (echo "mysql not found in ${PATH}"; exit 1) @which jq || (echo "jq not found in ${PATH}"; exit 1) @which mc || (echo "mc not found in ${PATH}, you can use 'make bin/mc' and move bin/mc to ${PATH}"; exit 1) + @which bin/sync_diff_inspector || (echo "run 'make bin/sync_diff_inspector' to download it if you need") check_engine_integration_test: ./engine/test/utils/check_case.sh @@ -583,6 +581,9 @@ check_cdc_integration_test: bin/mc: ./scripts/download-mc.sh +bin/sync_diff_inspector: + ./scripts/download-sync-diff.sh + define run_engine_unit_test @echo "running unit test for packages:" $(1) mkdir -p $(ENGINE_TEST_DIR) diff --git a/dm/tests/README.md b/dm/tests/README.md index 9d3e4432ad5..0fc789263d6 100644 --- a/dm/tests/README.md +++ b/dm/tests/README.md @@ -4,6 +4,7 @@ 1. The following executables must be copied or generated or linked into these locations. * `bin/tidb-server` can be downloaded from [tidb-master-linux-amd64](https://download.pingcap.org/tidb-master-linux-amd64.tar.gz) or installed by [tiup](https://github.com/pingcap/tiup), you can use the command `find ~/.tiup -name tidb-server` to locate `tidb-server` binary file and copy it + * `bin/sync_diff_inspector` # can be downloaded from [tidb-enterprise-tools-latest-linux-amd64](http://download.pingcap.org/tidb-enterprise-tools-latest-linux-amd64.tar.gz) or build from [source code](https://github.com/pingcap/tidb-tools) * `bin/minio` can be build from (https://github.com/minio/minio) * `bin/dm-master.test` # generated by `make dm_integration_test_build` * `bin/dm-worker.test` # generated by `make dm_integration_test_build` @@ -31,7 +32,7 @@ ### Integration Test -1. Run `make dm_integration_test_build` and `make sync_diff_inspector` to generate DM related binary for integration test. +1. Run `make dm_integration_test_build` to generate DM related binary for integration test. 2. Setup two MySQL servers (the first one: 5.6 ~ 5.7; the second one: 8.0.21, suggest you are same as [CI](https://github.com/PingCAP-QE/ci/blob/main/jenkins/pipelines/ci/dm/dm_ghpr_new_test.groovy#L164-L172)) with [binlog enabled first](https://dev.mysql.com/doc/refman/5.7/en/replication-howto-masterbaseconfig.html) and [set `GTID_MODE=ON`](https://dev.mysql.com/doc/refman/5.7/en/replication-mode-change-online-enable-gtids.html), You need set the mysql port and root password according to the following table. From 6656850b26e78d2a8d2f5f3942867d026c9c50ce Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 25 Oct 2024 11:44:37 +0800 Subject: [PATCH 15/22] Revert all changes related to build --- dm/tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dm/tests/README.md b/dm/tests/README.md index 0fc789263d6..f72fe191fee 100644 --- a/dm/tests/README.md +++ b/dm/tests/README.md @@ -32,7 +32,7 @@ ### Integration Test -1. Run `make dm_integration_test_build` to generate DM related binary for integration test. +1. Run `make dm_integration_test_build` to generate DM related binary for integration test 2. Setup two MySQL servers (the first one: 5.6 ~ 5.7; the second one: 8.0.21, suggest you are same as [CI](https://github.com/PingCAP-QE/ci/blob/main/jenkins/pipelines/ci/dm/dm_ghpr_new_test.groovy#L164-L172)) with [binlog enabled first](https://dev.mysql.com/doc/refman/5.7/en/replication-howto-masterbaseconfig.html) and [set `GTID_MODE=ON`](https://dev.mysql.com/doc/refman/5.7/en/replication-mode-change-online-enable-gtids.html), You need set the mysql port and root password according to the following table. From 8e601b3d1887ad9b03a38064b5e1669a4a1b81c5 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 25 Oct 2024 13:50:07 +0800 Subject: [PATCH 16/22] remove dep --- go.mod | 2 +- .../checkpoints/checkpoints.go | 26 +++++++++++++++++-- sync_diff_inspector/diff/diff.go | 11 +++++--- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index d0aec8d663a..8ce5bf41de4 100644 --- a/go.mod +++ b/go.mod @@ -346,7 +346,7 @@ require ( github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c // indirect github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect - github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 + github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 // indirect github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect diff --git a/sync_diff_inspector/checkpoints/checkpoints.go b/sync_diff_inspector/checkpoints/checkpoints.go index 82b4def15ec..ab98187eddf 100644 --- a/sync_diff_inspector/checkpoints/checkpoints.go +++ b/sync_diff_inspector/checkpoints/checkpoints.go @@ -17,7 +17,9 @@ import ( "container/heap" "context" "encoding/json" + "io" "os" + "path" "sync" "github.com/pingcap/errors" @@ -25,10 +27,30 @@ import ( "github.com/pingcap/tiflow/sync_diff_inspector/chunk" "github.com/pingcap/tiflow/sync_diff_inspector/config" "github.com/pingcap/tiflow/sync_diff_inspector/report" - "github.com/siddontang/go/ioutil2" "go.uber.org/zap" ) +// Write file to temp and atomically move when everything else succeeds. +func writeFileAtomic(filename string, data []byte, perm os.FileMode) error { + dir, name := path.Dir(filename), path.Base(filename) + f, err := os.CreateTemp(dir, name) + if err != nil { + return err + } + n, err := f.Write(data) + f.Close() + if err == nil && n < len(data) { + err = io.ErrShortWrite + } else { + err = os.Chmod(f.Name(), perm) + } + if err != nil { + os.Remove(f.Name()) + return err + } + return os.Rename(f.Name(), filename) +} + const ( // SuccessState means // for chunk: this chunk's data is equal @@ -227,7 +249,7 @@ func (cp *Checkpoint) SaveChunk(ctx context.Context, fileName string, cur *Node, return nil, errors.Trace(err) } - if err = ioutil2.WriteFileAtomic(fileName, checkpointData, config.LocalFilePerm); err != nil { + if err = writeFileAtomic(fileName, checkpointData, config.LocalFilePerm); err != nil { return nil, err } log.Info("save checkpoint", diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go index b2eec9afd17..929be851af6 100644 --- a/sync_diff_inspector/diff/diff.go +++ b/sync_diff_inspector/diff/diff.go @@ -41,10 +41,15 @@ import ( "github.com/pingcap/tiflow/sync_diff_inspector/source/common" "github.com/pingcap/tiflow/sync_diff_inspector/splitter" "github.com/pingcap/tiflow/sync_diff_inspector/utils" - "github.com/siddontang/go/ioutil2" "go.uber.org/zap" ) +// Check file exists or not +func fileExists(name string) bool { + _, err := os.Stat(name) + return !os.IsNotExist(err) +} + const ( // checkpointFile represents the checkpoints' file name which used for save and loads chunks checkpointFile = "sync_diff_checkpoints.pb" @@ -161,7 +166,7 @@ func (df *Diff) initCheckpoint() error { finishTableNums := 0 path := filepath.Join(df.CheckpointDir, checkpointFile) - if ioutil2.FileExists(path) { + if fileExists(path) { node, reportInfo, err := df.cp.LoadChunk(path) if err != nil { return errors.Annotate(err, "the checkpoint load process failed") @@ -740,7 +745,7 @@ func (df *Diff) writeSQLs(ctx context.Context) { tableDiff := df.downstream.GetTables()[dml.node.GetTableIndex()] fileName := fmt.Sprintf("%s:%s:%s.sql", tableDiff.Schema, tableDiff.Table, utils.GetSQLFileName(dml.node.GetID())) fixSQLPath := filepath.Join(df.FixSQLDir, fileName) - if ok := ioutil2.FileExists(fixSQLPath); ok { + if fileExists(fixSQLPath) { // unreachable log.Fatal("write sql failed: repeat sql happen", zap.Strings("sql", dml.sqls)) } From ec82382ddfd2bb8db234c8dc1c61bea6cd3188c2 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 25 Oct 2024 15:55:13 +0800 Subject: [PATCH 17/22] Fix unstable test --- sync_diff_inspector/source/chunks_iter.go | 57 ++++++++++++------- sync_diff_inspector/splitter/bucket.go | 47 ++++++++------- sync_diff_inspector/splitter/splitter.go | 2 + sync_diff_inspector/splitter/splitter_test.go | 3 + 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/sync_diff_inspector/source/chunks_iter.go b/sync_diff_inspector/source/chunks_iter.go index 0439aba8ea3..1aad42fc90c 100644 --- a/sync_diff_inspector/source/chunks_iter.go +++ b/sync_diff_inspector/source/chunks_iter.go @@ -28,22 +28,33 @@ import ( // ChunksIterator is used for single mysql/tidb source. type ChunksIterator struct { + ctx context.Context + cancel context.CancelFunc + ID *chunk.CID tableAnalyzer TableAnalyzer TableDiffs []*common.TableDiff - nextTableIndex int chunksCh chan *splitter.RangeInfo errCh chan error splitThreadCount int - cancel context.CancelFunc + pool *utils.WorkerPool } // NewChunksIterator returns a new iterator -func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs []*common.TableDiff, startRange *splitter.RangeInfo, splitThreadCount int) (*ChunksIterator, error) { +func NewChunksIterator( + ctx context.Context, + analyzer TableAnalyzer, + tableDiffs []*common.TableDiff, + startRange *splitter.RangeInfo, + splitThreadCount int, +) (*ChunksIterator, error) { ctxx, cancel := context.WithCancel(ctx) iter := &ChunksIterator{ + ctx: ctxx, + cancel: cancel, + splitThreadCount: splitThreadCount, tableAnalyzer: analyzer, TableDiffs: tableDiffs, @@ -51,26 +62,30 @@ func NewChunksIterator(ctx context.Context, analyzer TableAnalyzer, tableDiffs [ // reserve 30 capacity for each goroutine on average chunksCh: make(chan *splitter.RangeInfo, 30*splitThreadCount), errCh: make(chan error, len(tableDiffs)), - cancel: cancel, + pool: utils.NewWorkerPool(uint(splitThreadCount), "chunks producer"), } - go iter.produceChunks(ctxx, startRange) + go iter.produceChunks(startRange) return iter, nil } -func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter.RangeInfo) { - defer close(t.chunksCh) - pool := utils.NewWorkerPool(uint(t.splitThreadCount), "chunks producer") - t.nextTableIndex = 0 +func (t *ChunksIterator) produceChunks(startRange *splitter.RangeInfo) { + defer func() { + t.pool.WaitFinished() + close(t.chunksCh) + }() + + nextTableIndex := 0 // If chunkRange if startRange != nil { curIndex := startRange.GetTableIndex() curTable := t.TableDiffs[curIndex] - t.nextTableIndex = curIndex + 1 + nextTableIndex = curIndex + 1 + // if this chunk is empty, data-check for this table should be skipped if startRange.ChunkRange.Type != chunk.Empty { - pool.Apply(func() { - chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, curTable, startRange) + t.pool.Apply(func() { + chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(t.ctx, curTable, startRange) if err != nil { t.errCh <- errors.Trace(err) return @@ -87,7 +102,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter } c.Index.TableIndex = curIndex select { - case <-ctx.Done(): + case <-t.ctx.Done(): log.Info("Stop do produce chunks by context done") return case t.chunksCh <- &splitter.RangeInfo{ @@ -101,16 +116,16 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter } } - for ; t.nextTableIndex < len(t.TableDiffs); t.nextTableIndex++ { - curTableIndex := t.nextTableIndex + for ; nextTableIndex < len(t.TableDiffs); nextTableIndex++ { + curTableIndex := nextTableIndex // skip data-check, but still need to send a empty chunk to make checkpoint continuous if t.TableDiffs[curTableIndex].IgnoreDataCheck || !common.AllTableExist(t.TableDiffs[curTableIndex].TableLack) { - pool.Apply(func() { + t.pool.Apply(func() { table := t.TableDiffs[curTableIndex] progressID := dbutil.TableName(table.Schema, table.Table) progress.StartTable(progressID, 1, true) select { - case <-ctx.Done(): + case <-t.ctx.Done(): log.Info("Stop do produce chunks by context done") return case t.chunksCh <- &splitter.RangeInfo{ @@ -129,9 +144,9 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter continue } - pool.Apply(func() { + t.pool.Apply(func() { table := t.TableDiffs[curTableIndex] - chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(ctx, table, nil) + chunkIter, err := t.tableAnalyzer.AnalyzeSplitter(t.ctx, table, nil) if err != nil { t.errCh <- errors.Trace(err) return @@ -148,7 +163,7 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter } c.Index.TableIndex = curTableIndex select { - case <-ctx.Done(): + case <-t.ctx.Done(): log.Info("Stop do produce chunks by context done") return case t.chunksCh <- &splitter.RangeInfo{ @@ -160,7 +175,6 @@ func (t *ChunksIterator) produceChunks(ctx context.Context, startRange *splitter } }) } - pool.WaitFinished() } // Next returns the next chunk @@ -181,6 +195,7 @@ func (t *ChunksIterator) Next(ctx context.Context) (*splitter.RangeInfo, error) // Close closes the iterator func (t *ChunksIterator) Close() { t.cancel() + t.pool.WaitFinished() } // TODO: getCurTableIndexID only used for binary search, should be optimized later. diff --git a/sync_diff_inspector/splitter/bucket.go b/sync_diff_inspector/splitter/bucket.go index cfd5c1d6664..37fd765729b 100644 --- a/sync_diff_inspector/splitter/bucket.go +++ b/sync_diff_inspector/splitter/bucket.go @@ -16,7 +16,6 @@ package splitter import ( "context" "database/sql" - "sync" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -35,12 +34,14 @@ const DefaultChannelBuffer = 1024 // BucketIterator is struct for bucket iterator type BucketIterator struct { + ctx context.Context + cancel context.CancelFunc + buckets []dbutil.Bucket table *common.TableDiff indexColumns []*model.ColumnInfo chunkPool *utils.WorkerPool - wg sync.WaitGroup // control for one bucket in shared chunkPool chunkSize int64 chunks []*chunk.Range @@ -48,7 +49,6 @@ type BucketIterator struct { chunksCh chan []*chunk.Range errCh chan error - cancel context.CancelFunc indexID int64 progressID string @@ -75,14 +75,16 @@ func NewBucketIteratorWithCheckpoint( table.Range) } - bctx, cancel := context.WithCancel(ctx) + ctx, cancel := context.WithCancel(ctx) bs := &BucketIterator{ + ctx: ctx, + cancel: cancel, + table: table, chunkPool: bucketSpliterPool, chunkSize: table.ChunkSize, chunksCh: make(chan []*chunk.Range, DefaultChannelBuffer), errCh: make(chan error, 1), - cancel: cancel, dbConn: dbConn, progressID: progressID, @@ -94,7 +96,7 @@ func NewBucketIteratorWithCheckpoint( // Let the progress bar begins to record the table. progress.StartTable(bs.progressID, 0, false) - go bs.produceChunks(bctx, startRange) + go bs.produceChunks(startRange) return bs, nil } @@ -228,16 +230,19 @@ NEXTINDEX: // Close closes the iterator func (s *BucketIterator) Close() { s.cancel() + s.chunkPool.WaitFinished() } -func (s *BucketIterator) splitChunkForBucket(ctx context.Context, firstBucketID, lastBucketID int, beginIndex int, bucketChunkCnt int, splitChunkCnt int, chunkRange *chunk.Range) { - s.wg.Add(1) +func (s *BucketIterator) splitChunkForBucket( + firstBucketID, lastBucketID, beginIndex int, + bucketChunkCnt, splitChunkCnt int, + chunkRange *chunk.Range, +) { s.chunkPool.Apply(func() { - defer s.wg.Done() - chunks, err := splitRangeByRandom(ctx, s.dbConn, chunkRange, splitChunkCnt, s.table.Schema, s.table.Table, s.indexColumns, s.table.Range, s.table.Collation) + chunks, err := splitRangeByRandom(s.ctx, s.dbConn, chunkRange, splitChunkCnt, s.table.Schema, s.table.Table, s.indexColumns, s.table.Range, s.table.Collation) if err != nil { select { - case <-ctx.Done(): + case <-s.ctx.Done(): case s.errCh <- errors.Trace(err): } return @@ -248,11 +253,11 @@ func (s *BucketIterator) splitChunkForBucket(ctx context.Context, firstBucketID, }) } -func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInfo) { +func (s *BucketIterator) produceChunks(startRange *RangeInfo) { defer func() { - s.wg.Wait() - progress.UpdateTotal(s.progressID, 0, true) + s.chunkPool.WaitFinished() close(s.chunksCh) + progress.UpdateTotal(s.progressID, 0, true) }() var ( lowerValues, upperValues []string @@ -272,7 +277,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf // its bucketID is less than len(s.buckets) if c.Index.BucketIndexRight >= len(s.buckets) { select { - case <-ctx.Done(): + case <-s.ctx.Done(): case s.errCh <- errors.New("Wrong Bucket: Bucket index of the checkpoint node is larger than buckets' size"): } return @@ -281,7 +286,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf nextUpperValues, err := dbutil.AnalyzeValuesFromBuckets(s.buckets[c.Index.BucketIndexRight].UpperBound, s.indexColumns) if err != nil { select { - case <-ctx.Done(): + case <-s.ctx.Done(): case s.errCh <- errors.Trace(err): } return @@ -301,7 +306,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf chunkRange.Update(bound.Column, bound.Upper, "", true, false) } - s.splitChunkForBucket(ctx, c.Index.BucketIndexLeft, c.Index.BucketIndexRight, c.Index.ChunkIndex+1, c.Index.ChunkCnt, leftCnt, chunkRange) + s.splitChunkForBucket(c.Index.BucketIndexLeft, c.Index.BucketIndexRight, c.Index.ChunkIndex+1, c.Index.ChunkCnt, leftCnt, chunkRange) } } halfChunkSize := s.chunkSize >> 1 @@ -317,7 +322,7 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf upperValues, err = dbutil.AnalyzeValuesFromBuckets(s.buckets[i].UpperBound, s.indexColumns) if err != nil { select { - case <-ctx.Done(): + case <-s.ctx.Done(): case s.errCh <- errors.Trace(err): } return @@ -343,10 +348,10 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf if i == firstBucket { // chunkCnt := int((count + halfChunkSize) / s.chunkSize) - s.splitChunkForBucket(ctx, firstBucket, i, 0, chunkCnt, chunkCnt, chunkRange) + s.splitChunkForBucket(firstBucket, i, 0, chunkCnt, chunkCnt, chunkRange) } else { // use multi-buckets so chunkCnt = 1 - s.splitChunkForBucket(ctx, firstBucket, i, 0, 1, 1, chunkRange) + s.splitChunkForBucket(firstBucket, i, 0, 1, 1, chunkRange) } latestCount = s.buckets[i].Count @@ -368,5 +373,5 @@ func (s *BucketIterator) produceChunks(ctx context.Context, startRange *RangeInf } // When the table is much less than chunkSize, // it will return a chunk include the whole table. - s.splitChunkForBucket(ctx, firstBucket, len(s.buckets), 0, 1, 1, chunkRange) + s.splitChunkForBucket(firstBucket, len(s.buckets), 0, 1, 1, chunkRange) } diff --git a/sync_diff_inspector/splitter/splitter.go b/sync_diff_inspector/splitter/splitter.go index 5fb45bc9024..de4a05ad037 100644 --- a/sync_diff_inspector/splitter/splitter.go +++ b/sync_diff_inspector/splitter/splitter.go @@ -29,6 +29,8 @@ const ( type ChunkIterator interface { // Next seeks the next chunk, return nil if seeks to end. Next() (*chunk.Range, error) + + // Close close the current iterator. Close() } diff --git a/sync_diff_inspector/splitter/splitter_test.go b/sync_diff_inspector/splitter/splitter_test.go index 760b642c01d..1326f3c0f52 100644 --- a/sync_diff_inspector/splitter/splitter_test.go +++ b/sync_diff_inspector/splitter/splitter_test.go @@ -607,6 +607,7 @@ func TestBucketSpliter(t *testing.T) { tableDiff.ChunkSize = testCase.chunkSize iter, err := NewBucketIterator(ctx, "", tableDiff, db) require.NoError(t, err) + defer iter.Close() obtainChunks := make([]chunkResult, 0, len(testCase.expectResult)) nextBeginBucket := 0 @@ -675,6 +676,8 @@ func TestBucketSpliter(t *testing.T) { break } } + iter.Close() + bounds1 := chunk.Bounds rangeInfo := &RangeInfo{ From 17c1bf2e45dc3296e9bd61c6734aa756fad690c3 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Fri, 3 Jan 2025 15:19:26 +0800 Subject: [PATCH 18/22] Manually pick new commits --- go.mod | 6 +- sync_diff_inspector/diff/diff.go | 94 ++++++++--- sync_diff_inspector/diff/diff_test.go | 107 ++++++++++++ sync_diff_inspector/tests/README.md | 36 ++++ .../tests/_utils/check_contains | 15 ++ .../tests/_utils/check_contains_count | 18 ++ .../tests/_utils/check_contains_regex | 15 ++ .../tests/_utils/check_db_status | 21 +++ .../tests/_utils/check_not_contains | 15 ++ sync_diff_inspector/tests/conf/client.crt | 25 +++ sync_diff_inspector/tests/conf/client.key | 27 +++ .../tests/conf/generate_script.sh | 16 ++ sync_diff_inspector/tests/conf/root.crt | 31 ++++ sync_diff_inspector/tests/conf/tidb.crt | 25 +++ sync_diff_inspector/tests/conf/tidb.key | 27 +++ sync_diff_inspector/tests/importer/run.sh | 32 ++++ sync_diff_inspector/tests/run.sh | 119 ++++++++++++++ .../checkpoint/config_base.toml | 53 ++++++ .../checkpoint/config_base_continous.toml | 55 +++++++ .../checkpoint/config_base_rand.toml | 54 ++++++ .../sync_diff_inspector/checkpoint/run.sh | 155 ++++++++++++++++++ .../config_base_mysql.toml | 47 ++++++ .../sync_diff_inspector/config_base_tidb.toml | 49 ++++++ .../expression/config.toml | 54 ++++++ .../sync_diff_inspector/expression/run.sh | 23 +++ .../sync_diff_inspector/json/config_base.toml | 47 ++++++ .../tests/sync_diff_inspector/json/data.sql | 7 + .../tests/sync_diff_inspector/json/run.sh | 41 +++++ .../tests/sync_diff_inspector/run.sh | 67 ++++++++ .../shard/config_base.toml | 55 +++++++ .../shard/config_router_1.toml | 55 +++++++ .../shard/config_router_2.toml | 53 ++++++ .../shard/config_router_3.toml | 55 +++++++ .../shard/config_router_4.toml | 49 ++++++ .../shard/config_router_5.toml | 55 +++++++ .../tests/sync_diff_inspector/shard/run.sh | 134 +++++++++++++++ .../snapshot/config_base.toml | 49 ++++++ .../tests/sync_diff_inspector/snapshot/run.sh | 49 ++++++ .../table_config/config.toml | 60 +++++++ .../sync_diff_inspector/table_config/run.sh | 42 +++++ .../table_skip/config_base.toml | 49 ++++++ .../table_skip/config_router.toml | 61 +++++++ .../sync_diff_inspector/table_skip/data.sql | 5 + .../sync_diff_inspector/table_skip/run.sh | 65 ++++++++ .../sync_diff_inspector/time_zone/config.toml | 48 ++++++ .../sync_diff_inspector/time_zone/run.sh | 57 +++++++ .../tests/sync_diff_inspector/tls/config.toml | 53 ++++++ .../tests/sync_diff_inspector/tls/run.sh | 27 +++ sync_diff_inspector/utils/pd.go | 4 +- 49 files changed, 2278 insertions(+), 28 deletions(-) create mode 100644 sync_diff_inspector/diff/diff_test.go create mode 100644 sync_diff_inspector/tests/README.md create mode 100755 sync_diff_inspector/tests/_utils/check_contains create mode 100755 sync_diff_inspector/tests/_utils/check_contains_count create mode 100755 sync_diff_inspector/tests/_utils/check_contains_regex create mode 100755 sync_diff_inspector/tests/_utils/check_db_status create mode 100755 sync_diff_inspector/tests/_utils/check_not_contains create mode 100644 sync_diff_inspector/tests/conf/client.crt create mode 100644 sync_diff_inspector/tests/conf/client.key create mode 100644 sync_diff_inspector/tests/conf/generate_script.sh create mode 100644 sync_diff_inspector/tests/conf/root.crt create mode 100644 sync_diff_inspector/tests/conf/tidb.crt create mode 100644 sync_diff_inspector/tests/conf/tidb.key create mode 100644 sync_diff_inspector/tests/importer/run.sh create mode 100755 sync_diff_inspector/tests/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/json/data.sql create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/json/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml create mode 100644 sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh diff --git a/go.mod b/go.mod index 8ce5bf41de4..0545752ad15 100644 --- a/go.mod +++ b/go.mod @@ -215,9 +215,9 @@ require ( github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.1 // indirect github.com/AthenZ/athenz v1.10.39 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0 - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/DataDog/zstd v1.5.5 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/Masterminds/semver v1.5.0 // indirect diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go index 929be851af6..3feeb9e9de1 100644 --- a/sync_diff_inspector/diff/diff.go +++ b/sync_diff_inspector/diff/diff.go @@ -50,6 +50,29 @@ func fileExists(name string) bool { return !os.IsNotExist(err) } +// GetSnapsnot get the snapshot +func GetSnapshot(latestSnap []string, snap string, db *sql.DB) string { + if len(latestSnap) != 1 { + return snap + } + + latestSnapshotVal, err := utils.ParseSnapshotToTSO(db, latestSnap[0]) + if err != nil || latestSnapshotVal == 0 { + return snap + } + + snapshotVal, err := utils.ParseSnapshotToTSO(db, snap) + if err != nil { + return latestSnap[0] + } + + // compare the snapshot and choose the small one to lock + if latestSnapshotVal < snapshotVal { + return latestSnap[0] + } + return snap +} + const ( // checkpointFile represents the checkpoints' file name which used for save and loads chunks checkpointFile = "sync_diff_checkpoints.pb" @@ -340,15 +363,7 @@ func (df *Diff) startGCKeeperForTiDB(ctx context.Context, db *sql.DB, snap strin return } - if len(latestSnap) == 1 { - if len(snap) == 0 { - snap = latestSnap[0] - } - // compare the snapshot and choose the small one to lock - if strings.Compare(latestSnap[0], snap) < 0 { - snap = latestSnap[0] - } - } + snap = GetSnapshot(latestSnap, snap, db) err = utils.StartGCSavepointUpdateService(ctx, pdCli, db, snap) if err != nil { @@ -445,7 +460,7 @@ func (df *Diff) consume(ctx context.Context, rangeInfo *splitter.RangeInfo) bool // If an error occurs during the checksum phase, skip the data compare phase. state = checkpoints.FailedState df.report.SetTableMeetError(schema, table, err) - } else if !isEqual && df.exportFixSQL { + } else if !isEqual { state = checkpoints.FailedState // if the chunk's checksum differ, try to do binary check info := rangeInfo @@ -610,7 +625,13 @@ func (df *Diff) compareChecksumAndGetCount(ctx context.Context, tableRange *spli if upstreamInfo.Count == downstreamInfo.Count && upstreamInfo.Checksum == downstreamInfo.Checksum { return true, upstreamInfo.Count, downstreamInfo.Count, nil } - log.Debug("checksum doesn't match", zap.Any("chunk id", tableRange.ChunkRange.Index), zap.String("table", df.workSource.GetTables()[tableRange.GetTableIndex()].Table), zap.Int64("upstream chunk size", upstreamInfo.Count), zap.Int64("downstream chunk size", downstreamInfo.Count), zap.Uint64("upstream checksum", upstreamInfo.Checksum), zap.Uint64("downstream checksum", downstreamInfo.Checksum)) + log.Debug("checksum doesn't match, need to compare rows", + zap.Any("chunk id", tableRange.ChunkRange.Index), + zap.String("table", df.workSource.GetTables()[tableRange.GetTableIndex()].Table), + zap.Int64("upstream chunk size", upstreamInfo.Count), + zap.Int64("downstream chunk size", downstreamInfo.Count), + zap.Uint64("upstream checksum", upstreamInfo.Checksum), + zap.Uint64("downstream checksum", downstreamInfo.Checksum)) return false, upstreamInfo.Count, downstreamInfo.Count, nil } @@ -650,11 +671,17 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo, if lastUpstreamData == nil { // don't have source data, so all the targetRows's data is redundant, should be deleted for lastDownstreamData != nil { - sql := df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) rowsDelete++ - log.Debug("[delete]", zap.String("sql", sql)) - dml.sqls = append(dml.sqls, sql) + if df.exportFixSQL { + sql := df.downstream.GenerateFixSQL( + source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(), + ) + log.Debug("[delete]", zap.String("sql", sql)) + + dml.sqls = append(dml.sqls, sql) + } + equal = false lastDownstreamData, err = downstreamRowsIterator.Next() if err != nil { @@ -667,11 +694,13 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo, if lastDownstreamData == nil { // target lack some data, should insert the last source datas for lastUpstreamData != nil { - sql := df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) rowsAdd++ - log.Debug("[insert]", zap.String("sql", sql)) + if df.exportFixSQL { + sql := df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) + log.Debug("[insert]", zap.String("sql", sql)) - dml.sqls = append(dml.sqls, sql) + dml.sqls = append(dml.sqls, sql) + } equal = false lastUpstreamData, err = upstreamRowsIterator.Next() @@ -698,22 +727,34 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo, switch cmp { case 1: // delete - sql = df.downstream.GenerateFixSQL(source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) rowsDelete++ - log.Debug("[delete]", zap.String("sql", sql)) + if df.exportFixSQL { + sql = df.downstream.GenerateFixSQL( + source.Delete, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(), + ) + log.Debug("[delete]", zap.String("sql", sql)) + } lastDownstreamData = nil case -1: // insert - sql = df.downstream.GenerateFixSQL(source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) rowsAdd++ - log.Debug("[insert]", zap.String("sql", sql)) + if df.exportFixSQL { + sql = df.downstream.GenerateFixSQL( + source.Insert, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(), + ) + log.Debug("[insert]", zap.String("sql", sql)) + } lastUpstreamData = nil case 0: // update - sql = df.downstream.GenerateFixSQL(source.Replace, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex()) rowsAdd++ rowsDelete++ - log.Debug("[update]", zap.String("sql", sql)) + if df.exportFixSQL { + sql = df.downstream.GenerateFixSQL( + source.Replace, lastUpstreamData, lastDownstreamData, rangeInfo.GetTableIndex(), + ) + log.Debug("[update]", zap.String("sql", sql)) + } lastUpstreamData = nil lastDownstreamData = nil } @@ -722,6 +763,13 @@ func (df *Diff) compareRows(ctx context.Context, rangeInfo *splitter.RangeInfo, } dml.rowAdd = rowsAdd dml.rowDelete = rowsDelete + + log.Debug("compareRows", + zap.Bool("equal", equal), + zap.Int("rowsAdd", rowsAdd), + zap.Int("rowsDelete", rowsDelete), + zap.Any("chunk id", rangeInfo.ChunkRange.Index), + zap.String("table", df.workSource.GetTables()[rangeInfo.GetTableIndex()].Table)) return equal, nil } diff --git a/sync_diff_inspector/diff/diff_test.go b/sync_diff_inspector/diff/diff_test.go new file mode 100644 index 00000000000..e8251a13343 --- /dev/null +++ b/sync_diff_inspector/diff/diff_test.go @@ -0,0 +1,107 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package diff + +import ( + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/stretchr/testify/require" +) + +func TestGetSnapshot(t *testing.T) { + cases := []struct { + latestSnapshot []string + snapshot string + expected string + snapshotRows string + }{ + { + latestSnapshot: []string{}, + snapshot: "1", + expected: "1", + }, + { + latestSnapshot: []string{"2"}, + snapshot: "", + expected: "2", + }, + { + latestSnapshot: []string{"0"}, + snapshot: "3", + expected: "3", + }, + { + latestSnapshot: []string{"4"}, + snapshot: "0", + expected: "0", + }, + { + latestSnapshot: []string{"5"}, + snapshot: "6", + expected: "5", + }, + { + latestSnapshot: []string{"7"}, + snapshot: "6", + expected: "6", + }, + { + // 2017-10-07 16:45:26 + latestSnapshot: []string{"395146933305344000"}, + snapshot: "2017-10-08 16:45:26", + expected: "395146933305344000", + snapshotRows: "1507452326", + }, + { + // 2017-10-07 16:45:26 + latestSnapshot: []string{"395146933305344000"}, + snapshot: "2017-10-06 16:45:26", + expected: "2017-10-06 16:45:26", + snapshotRows: "1507279526", + }, + { + latestSnapshot: []string{"1"}, + snapshot: "2017-10-06 16:45:26", + expected: "1", + snapshotRows: "1507279526", + }, + { + latestSnapshot: []string{"395146933305344000"}, + snapshot: "1", + expected: "1", + }, + { + // 2090-11-19 22:07:45 + latestSnapshot: []string{"1000022649077760000"}, + snapshot: "2090-11-18 22:07:45", + expected: "2090-11-18 22:07:45", + snapshotRows: "3814697265", + }, + } + + conn, mock, err := sqlmock.New() + require.NoError(t, err) + defer conn.Close() + + for i, cs := range cases { + if len(cs.snapshotRows) > 0 { + dataRows := sqlmock.NewRows([]string{""}).AddRow(cs.snapshotRows) + mock.ExpectQuery("SELECT unix_timestamp(?)").WillReturnRows(dataRows) + } + val := GetSnapshot(cs.latestSnapshot, cs.snapshot, conn) + require.Equal(t, cs.expected, val, "case %d", i) + } + +} diff --git a/sync_diff_inspector/tests/README.md b/sync_diff_inspector/tests/README.md new file mode 100644 index 00000000000..443b78dd7e8 --- /dev/null +++ b/sync_diff_inspector/tests/README.md @@ -0,0 +1,36 @@ + + +This folder contains all tests which relies on external service such as TiDB. + +## Preparations + +1. The following seven executables must be copied or linked into these locations: + + - `bin/pd-server` + - `bin/tikv-server` + - `bin/tidb-server` + - `bin/sync_diff_inspector` + - `bin/dumpling` + - `bin/loader` + - `bin/importer` + +2. The following programs must be installed: + + - `mysql`(the CLI client) + - `mysqladmin` + +3. The user executing the tests must have permission to create the folder + + `/tmp/tidb_tools_test`. All test artifacts will be written into this folder. + +## Running + +Run `make integration_test` to execute the integration tests. This command will + +1. Build binaries. +2. Check that all executables exist. +3. Execute `tests/run.sh` + +If the first two steps are done before, you could also run `tests/run.sh` directly. + +The scrip will find out all `tests/*/run.sh` and run it. diff --git a/sync_diff_inspector/tests/_utils/check_contains b/sync_diff_inspector/tests/_utils/check_contains new file mode 100755 index 00000000000..93e7970b76a --- /dev/null +++ b/sync_diff_inspector/tests/_utils/check_contains @@ -0,0 +1,15 @@ +#!/bin/sh + +# argument 1 is the string need grep +# argument 2 is the filename + +set -eu +OUT_DIR=/tmp/tidb_tools_test + +if ! grep -Fq "$1" "$2"; then + echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 +fi diff --git a/sync_diff_inspector/tests/_utils/check_contains_count b/sync_diff_inspector/tests/_utils/check_contains_count new file mode 100755 index 00000000000..8308512d789 --- /dev/null +++ b/sync_diff_inspector/tests/_utils/check_contains_count @@ -0,0 +1,18 @@ +#!/bin/sh + +# argument 1 is the string need grep +# argument 2 is the filename +# argument 3 is the match count + +set -eu +OUT_DIR=/tmp/tidb_tools_test + +count=$(grep -F "$1" "$2" | wc -l) + +if [ "$count" -ne "$3" ]; then + echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1' $3 times" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 +fi diff --git a/sync_diff_inspector/tests/_utils/check_contains_regex b/sync_diff_inspector/tests/_utils/check_contains_regex new file mode 100755 index 00000000000..ce498abbec2 --- /dev/null +++ b/sync_diff_inspector/tests/_utils/check_contains_regex @@ -0,0 +1,15 @@ +#!/bin/sh + +# argument 1 is the string need grep +# argument 2 is the filename + +set -eu +OUT_DIR=/tmp/tidb_tools_test + +if ! grep -q "$1" "$2"; then + echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 +fi diff --git a/sync_diff_inspector/tests/_utils/check_db_status b/sync_diff_inspector/tests/_utils/check_db_status new file mode 100755 index 00000000000..8dc75739c55 --- /dev/null +++ b/sync_diff_inspector/tests/_utils/check_db_status @@ -0,0 +1,21 @@ +#!/bin/bash + +# argument 1 is the host +# argument 2 is the port +# argument 3 is the database service's name + +for i in {1..20} +do + if mysqladmin -h "$1" -P "$2" -u root --default-character-set utf8 ping > /dev/null 2>&1 + then + echo "$3 is alive" + exit 0 + fi + + echo "$3 is not alive, will try again" + sleep 2 +done + +echo "$3 is not alive" +cat "$4" +exit 2 diff --git a/sync_diff_inspector/tests/_utils/check_not_contains b/sync_diff_inspector/tests/_utils/check_not_contains new file mode 100755 index 00000000000..43fd007ad5f --- /dev/null +++ b/sync_diff_inspector/tests/_utils/check_not_contains @@ -0,0 +1,15 @@ +#!/bin/sh + +# argument 1 is the string need grep +# argument 2 is the filename + +set -eu +OUT_DIR=/tmp/tidb_binlog_test + +if grep -Fq "$1" "$2"; then + echo "TEST FAILED: '$2' CONTAIN '$1'" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 +fi \ No newline at end of file diff --git a/sync_diff_inspector/tests/conf/client.crt b/sync_diff_inspector/tests/conf/client.crt new file mode 100644 index 00000000000..0e14073aca2 --- /dev/null +++ b/sync_diff_inspector/tests/conf/client.crt @@ -0,0 +1,25 @@ +-----BEGIN CERTIFICATE----- +MIIEKDCCAhACCQC8wBajSPPO6jANBgkqhkiG9w0BAQsFADBCMQswCQYDVQQGEwJY +WDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBh +bnkgTHRkMCAXDTIzMDkyMTE1NTEwNVoYDzIwNTEwMjA2MTU1MTA1WjBoMQswCQYD +VQQGEwJzZDEMMAoGA1UECAwDc2RmMQ0wCwYDVQQHDARzZGZ2MQwwCgYDVQQKDANk +dnMxDDAKBgNVBAsMA3ZkczEMMAoGA1UEAwwDdmRzMRIwEAYJKoZIhvcNAQkBFgNz +ZHYwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDsc8jf3AshHroMGGuT +D7PDIb29IL91BbIm+8LYr1/dCio0mZX7tW4CKlw+OdpJo5oLZGvuvq6BkHPrzCa5 +NvwZGGRuJUEqMEoBk/aDsOJ15JTZ8XEPPNNBNgKT0eq8V+KEBcQ4y8llBhEoFFOJ +9pYMn+RdK8n2s2qTqufXO6gNgLKHJCvu5cWsQqGnFfIr3vv6iBmaQHi6krjk75Ti +LgqmwF+u49Iexdd7DWBYyL+YjcVoN0Bjjsk0d4xB+VRE2JNVehWNIvK8bOMfgLy4 +pUNm6PXPNxn8CZDGhKYEZBiTfh/LkXgsc7FawPBtddWwJ17DV6sLqa3LkbomNjHt +g2gpAgMBAAEwDQYJKoZIhvcNAQELBQADggIBAKRTOQApEZbMyEcROEjIo2xf5Svb +h/eMsG+9xjh5KvxLrXNogVA/pd3n2tk1TNSgIz9jc+AhixIeFSjzi0KTrtbHoVYT +iZ0pPPBvbuBfSFeQWxGPvosfARRivSiljo4wsTuHPUNEj2EqQ5NXEzpEKXkTKaD8 +T3vLXSK9h+9GREGTAecVGbxfxfGeW94q3Z0QMA9uNu04Y2g16r/uUdQ5ih7nv1zM +84RcHTeMCWZV9i3unf4j7Sn5ShnKs7Q18iKQZPZRN4rh7HACe7JAa38Z2DzThPoA +MaROPpzZ5pUFSVdCYAh5YtwwDEmT6AKyW8LfO5bYLg+LABoGXmDCQr0Zo4zEMVmt +TCwaSNwvGA65llXXMCxivIERoXmoV+PyNbC6oGE//znC32jWNfxwc12Y7sqBZgXn +q8D08+XE3OAncCwP7zPBY5MLvYecIPos97qwJg3GECwkV82iIWmSb4xC0E4XIStA +YXDvjwlRkQ6VS/J9Igl+PdRwIxKPjDsSxnWKH6rshX6zasgJBB31txFQIaX6/DAZ +ZQpMFvjBxZ1L1q4gpdnq60l4Ok+bj3pz6vHyZ5DD1dKu5yMWfh0jqR9k6fAy5Tay +ESgqcCB3d8l4MHL8L4vqV8TkA/L5r4h5jpms3oe2+bHASdbV3uwLlqNBLGP5WvOF +GwsnD7vwZG0TRLPf +-----END CERTIFICATE----- diff --git a/sync_diff_inspector/tests/conf/client.key b/sync_diff_inspector/tests/conf/client.key new file mode 100644 index 00000000000..38b3a840be5 --- /dev/null +++ b/sync_diff_inspector/tests/conf/client.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEA7HPI39wLIR66DBhrkw+zwyG9vSC/dQWyJvvC2K9f3QoqNJmV ++7VuAipcPjnaSaOaC2Rr7r6ugZBz68wmuTb8GRhkbiVBKjBKAZP2g7DideSU2fFx +DzzTQTYCk9HqvFfihAXEOMvJZQYRKBRTifaWDJ/kXSvJ9rNqk6rn1zuoDYCyhyQr +7uXFrEKhpxXyK977+ogZmkB4upK45O+U4i4KpsBfruPSHsXXew1gWMi/mI3FaDdA +Y47JNHeMQflURNiTVXoVjSLyvGzjH4C8uKVDZuj1zzcZ/AmQxoSmBGQYk34fy5F4 +LHOxWsDwbXXVsCdew1erC6mty5G6JjYx7YNoKQIDAQABAoIBAF5boji7TfGsmmAD +SsKIDJt9FoOn0t93I23tIAdqjN3flZIiDDlDYKAamW73lUW3tNyA+ZVvoKHxrGDX +ukhFSfsVwdY2pbeZR58zlbdd2jFjw4bpk82Z30/xpruolH0OsUUwK+riX/8hma0d +12yB68Uj9XS86b66QHinRhedJeYkxjZG/F94zPXBPh+qqmU0ubf5VOqBFAAQJv7k +736Z5CG9Hp+mnBvWa+oDKj+fNe6kXwA4DNiZumVTqaCifGldHIwrQuP/RZnuO6WM +XlA67eiHsHQZQ7j99biAfWunEqktK/B5rxhICSe/bRwXmT852kQUFBlpdMefGtX8 +DzeFS3UCgYEA+d+7h7hFQbmCIkVUsltKmnljqiLVfc6Ee/K6giQldMsijcCRpiDu +gyLDph/Et9ncFFI1t0zwm8XQyIzVlG69Oq118dxSrRmuRpL53CnsfQ8UCIl1kCxU +NkoOWrK/UElVl0oOX/xyNVhxbaeAE2gitpiTZvBiiRc+BfqR+QIif78CgYEA8j/Q +h+F+iP5jd+d/eDe4bE3ekNjX6Xo+HQFwYNSDmcllxGpKa/IR8hzNXH/Z2xJGjLw8 +FIeZ/O3ub8wVOQKjvalg7y6u1pen4HlNT+L6sIUZEy0DaEDyC5iBFPs0uE/0bXkv +iyoIk6uKBzeUHWKVVjBItpQwwU0Hjd3zZmQxkhcCgYAMa/zsoFKBE/HONlghjbxF +tacovBaU7IFVkBmJgraB+d874Mjc02JIDqBfT9D9uszgDb4x4JdNhyX78lRjzqWF +lz33yhYqGM67H29gbI0fInLCgeLgSfPdxwyzoZM1MJAat0nDp88dq8nnw53wWQrw +vOEHCwg6/HbO4UgEcwC4DQKBgA7EWxbdZRQ+xZt6jieq5eAcQxP7U/YUkJK+Erak +Xb1TLJPzksPPxs5GeTJJTONw0sIje1oZBgcIDf/cpfKKuaaHG7SY2OmV5xLk8hSL +lpKKGoQzu0BwrRCN5Fh+E7GklhbSS1alYk52J3zXI31DFC1j1hrjH7G421wHip3U +P71TAoGAKLpj4ZA4XiyZcejX3C32IuuFP4hUTDmmI/kNuJ8MqolTGGlFdvk2RqKR +SDW2cGRiPx+h+KmTzWJawZNAuWYCX13yj5+WiIdLl11yT+sWrjQl3MWerv6Mb9Y5 +SNfwucXTV6gMwq0djVszS3kBNP1keypQosFhNZHVLVGEpK7TmwM= +-----END RSA PRIVATE KEY----- diff --git a/sync_diff_inspector/tests/conf/generate_script.sh b/sync_diff_inspector/tests/conf/generate_script.sh new file mode 100644 index 00000000000..385f393e738 --- /dev/null +++ b/sync_diff_inspector/tests/conf/generate_script.sh @@ -0,0 +1,16 @@ +# root.key +openssl genrsa -out root.key 4096 +# root.crt +openssl req -new -x509 -key root.key -out root.crt -days 10000 +# tidb.key +openssl genrsa -out tidb.key 2048 +# tidb.csr, DON'T LEAVE WITH EMPTY INFORMATION +openssl req -new -key tidb.key -out tidb.csr +# tidb.crt +openssl x509 -req -days 10000 -CA root.crt -CAkey root.key -CAcreateserial -in tidb.csr -out tidb.crt +# client.key +openssl genrsa -out client.key 2048 +# client.csr, DON'T LEAVE WITH EMPTY INFORMATION +openssl req -new -key client.key -out client.csr +# client.crt +openssl x509 -req -days 10000 -CA root.crt -CAkey root.key -CAcreateserial -in client.csr -out client.crt diff --git a/sync_diff_inspector/tests/conf/root.crt b/sync_diff_inspector/tests/conf/root.crt new file mode 100644 index 00000000000..29110b65e6c --- /dev/null +++ b/sync_diff_inspector/tests/conf/root.crt @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFWTCCA0GgAwIBAgIJAKPBAEg3MkuTMA0GCSqGSIb3DQEBCwUAMEIxCzAJBgNV +BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg +Q29tcGFueSBMdGQwIBcNMjMwOTIxMTQ1OTU0WhgPMjA1MTAyMDYxNDU5NTRaMEIx +CzAJBgNVBAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0Rl +ZmF1bHQgQ29tcGFueSBMdGQwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoIC +AQDwrasqh2GJgq+ggHtd78Ocjx2CkoO+5Pn9QS0fupDN3O6Jo1/2dVBpDj0VE50I +3bfJ8ubBQbevCJnq/1J5jBwoN+ZtxJ3md2Vwv4cpFHDssUcaepH5fAA4wWDUDHd3 +ShE+90M7LbTZxpSMYA8/JmNhFIDYFoeSAMl1001m/CVHIRFl16ifeKHryFafx8on +poOyu2Q6MDDCimfbK4p8iaQXeJc/T99Gv6+kbRIJ/HrBD5LU/fR0asSt+9CXm4kH +7B+GzPDEnrV22Qv2LB/P87GHdit0WyuC613qOD7yAQwS4KE6UCahjYG2gPai6gwM +tkTwIHw9jRSw8nBKxBPlbL21eZQL/q+6ekVSV+bRVsAXf9svA4HC1d+5O/iWLXqU +E4TPcCPmfld523UsyBVz62NCIluKZejjdJCZFtvwy8yeZ2IuBcQq+VWoMttAYivk +c2rq0qE/FokuJR7efy7uQMNzxQgQueMQfn6fARJJu7KF0P0lrUAaMy+iZQfc4+ca +ozjqHMgjs1yIiYGCwCvzCfWL/qD/EK4bZhhpfsNF5Nb4AcQFRy9k2gJvXuUGu31/ +r59TwVoE/ItQ5zik8OXP2WVCiGeQ+nk3uEkpclA1aFnfXfcYPXsW0fJJAPLIxU3J +cT6D3ZlDQmhJpri/vVIPCXzJx3yMqOn3hDEqR3ULG00FKQIDAQABo1AwTjAdBgNV +HQ4EFgQU42kTinG330mWu6zhUQSJ+ZdJLkUwHwYDVR0jBBgwFoAU42kTinG330mW +u6zhUQSJ+ZdJLkUwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAgEARHhc +pY8QQVyOpVwrCO/jBNCTfGb6FGjtHLpO9smN6hdtMgjxfDF1Ljuf1SaBM7Up+ina +1NG1VvFB6cOPn4Nvwv/AmznXje+R4QDD7fjuCW3VHZpzWHQENDz/4pwjaZk85DO7 +P9TuX1OMsGvNElb/576gU6eb1mpg7QX5mk+y+xW7TNFzg+x6pLKr4jxenQ2EIsYV +6TzGYN85GS21V2yLi8NlMWsd5mhHYlCl92sRip4pb0Tzzp9ErXMDCoer6hVjQGES +i8rc6r9XNUUka4bU/TyjvAa8BIC75bEvn1FspaifbxBezQVvuojUw5XdC+GJCkpL +Nmys5H9an+gq1s79VNWhXtlCfvE540WI5CgfCQJuY6I8KAw17FvjcTrGkgay8c+t +cKpEDO0ZSXNTIrNjIZJE3AhCIRxViWoGDHWWPX8eGz9OAeuswEVoZ77EIczAgZC+ +CvMw5XakoRdEuYl9Sm0x3P0BuqRGSAwmoFM4bFGTeHK39yOzL8NZbBL940Ed0eae +tM8GPGxtL0x6Cn9yf/Q2jXH/6jElS1v8LXfivAgqiC6h0MNPPZ749XXULIUkSddB +Z7O7AFG35TXnJ2z00FOBmXAv31Wd78KMiY4DKFSyEovj0HkJsDEnn1WSya+zI4Vw +FyOO1ffCdJ+jLCC/gfbqI32iJiHWDz8hG9cIApg= +-----END CERTIFICATE----- diff --git a/sync_diff_inspector/tests/conf/tidb.crt b/sync_diff_inspector/tests/conf/tidb.crt new file mode 100644 index 00000000000..49049259e93 --- /dev/null +++ b/sync_diff_inspector/tests/conf/tidb.crt @@ -0,0 +1,25 @@ +-----BEGIN CERTIFICATE----- +MIIEJzCCAg8CCQC8wBajSPPO6TANBgkqhkiG9w0BAQsFADBCMQswCQYDVQQGEwJY +WDEVMBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBh +bnkgTHRkMCAXDTIzMDkyMTE1NTAyNVoYDzIwNTEwMjA2MTU1MDI1WjBnMQswCQYD +VQQGEwJmZDELMAkGA1UECAwCZXcxDDAKBgNVBAcMA2ZldzENMAsGA1UECgwEZndl +ZjEMMAoGA1UECwwDd2VkMQwwCgYDVQQDDANkZXcxEjAQBgkqhkiG9w0BCQEWA2Rl +dzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAN8dJT6PA+6lxtRntEsZ +gTveN1IvJfvnm1Zdw1Ryv/fEwE3hrGj3M61g+arZq2kgz58NzD6Uiis3sva1G4DK +4eh/9vscXq6xZ+VR89RhTVD+BqJes0l0+hhd7gT2v6Cn9VhgOP740rp4ZpmlQW6w ++/CBZ0k1dC4XD/QTIpETbmPIlavVATbQNqXDiObjxqVEyOu4QoNWC6PrvjBcO65L +jWxBnoGD6EH2ow/J6YJQh+U3mNy4iDkEz/sLJPVRaabplBWGfT887iiJighTCEpN +Be8NeUIKOykHZiefmynhKBz3hv32IuFhMt8u6ZzORC8uC/lID+Mty8Pnn4C2bKQn +0UsCAwEAATANBgkqhkiG9w0BAQsFAAOCAgEAmYd6xg/15JeIbmbpOJVnQAPvfOPa +3eZJjhcP0kjGe3ZQeQaW1XSbn/vsx7+rofu8j/yutmqs6HBxFWKkrlRODIcDwECo +7HZzDUtL3eZr6sPoPadHRV5k8DT6JWsb8rYhbe2VWOh+DxN2YACgD+We4Tdh44Ce +LKLwqHcBaXDKeoLGhjNQDIaUF1g/D7f3ad7H9vKUd4Pc4hgmOh3zIKido8/14uil +e/x0dp9uwRdWVBL+BJDZGXGRVQAo+Cg0RRV0xmsjSQ8RJSmB+kb4aQuBHq2J1unR +mTRCZ7Y2bscdCOCvzumzaEvI+7yUnoXceymW30cKAUnpCMMIiTiDXBmMVxV7B0wq +RUCIi5uIt66pw/r/02u4aAJyO4jNAbt+Rhpg6bGa0Ng+4YWt1XHRN1jSpcIhR7Gp +almdVZhG128ZmcEIpJhnTc8bj5DwCMkR5D2gbg6jeFOHz2bFMXavTze30z36B/Uy +UFhku8ZdlciDL+7jhFTAuAgERwxnCBgO9tu4rL3KbZ3S0GY4CzgDQgiUB5e9h4iY +tdix5bPKSirirljwGxPrIxKppaxQhgU2pqJ3ZQPgLGG8NkEA0ycbQErnlb2sRtKE +PPI59DchHn9t7KcuANwg3HsCcL0Ts3/0HWA6hX9uccpNS+HnIjnhr8rdxylHezZw +33VTbDV0ahqXRiY= +-----END CERTIFICATE----- diff --git a/sync_diff_inspector/tests/conf/tidb.key b/sync_diff_inspector/tests/conf/tidb.key new file mode 100644 index 00000000000..6ad3cd2b6c0 --- /dev/null +++ b/sync_diff_inspector/tests/conf/tidb.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEA3x0lPo8D7qXG1Ge0SxmBO943Ui8l++ebVl3DVHK/98TATeGs +aPczrWD5qtmraSDPnw3MPpSKKzey9rUbgMrh6H/2+xxerrFn5VHz1GFNUP4Gol6z +SXT6GF3uBPa/oKf1WGA4/vjSunhmmaVBbrD78IFnSTV0LhcP9BMikRNuY8iVq9UB +NtA2pcOI5uPGpUTI67hCg1YLo+u+MFw7rkuNbEGegYPoQfajD8npglCH5TeY3LiI +OQTP+wsk9VFppumUFYZ9PzzuKImKCFMISk0F7w15Qgo7KQdmJ5+bKeEoHPeG/fYi +4WEy3y7pnM5ELy4L+UgP4y3Lw+efgLZspCfRSwIDAQABAoIBAQDbH7fNeg5FmuDt +CTDkNRdcogE5IP8wKSbBqvLFBLIO+FhZ5RM8P9SsKjrqlj8nz8a5koaOy7nrehe0 +/ugVFKzMeKkrtJA1CB1e9p7/bdTOf74qR9HTiiR6O/4hN+m6MlrewhoYKG5mHwu9 +SLU7rmyxp5W2K95/ybysXQKs8yWOrsMCxnq4N69QvqkThkTdBVNwbBoxIMwe66Yr +DuEtocherwTeDxRQjSzkK0oafxy0lokLrXp1wGy/goacYNlyfpQmztlkiGxRW/Dq +2816W3u2sgH86Oy9WbAgiJcjLGfvAq/f2acRBZ4RKX+KiA1KNvgpHWUw6XxBjsIF +9J+RPNNJAoGBAPD/cHJw2qOSvem/taOCY2RcRk6RPtTdJxrmBjMjlhs0+asrmK5P +/1dtfx0sZ7TPE7oo6rkD6ZeCj/QsS9dWgJSq18abjGIIkZbLIa4DWZkDrdQoG2Bd +11F01DVpgfk8F1dieSiL5p0HvEsNZugjeMRQwPem/NoPivZ9sA3aD/t9AoGBAO0A +tLwGQEEk0xQjGyfsfWjA4IYbb1elZ0AGkgKnQxgkzgwsdJ4HIqNzpT7Jpwr8Iaj1 +wv2VAP3/EpEXpcM82oQG4jX/4W7jc+DWN5mEDvvoQKWgaxuA6OOS3aF71iTs7rcv +G7Ju6kCKfAHMT1138yRAfKITZaqziNyeX1EgvcpnAoGAPNcv6yREfiEQos9MKtBD +CVYmRbVzWEfQlIDXtddZENtJ4IWsEO2PN+Ijwhiwwbu5bjjgMP8k3KQQdYMtTlq/ +MUkEGlawlRs2rgvwH78mwnNkUfgiGSz3q8/DtwxAzMv31I6+qZbQDHqkdYoXnak+ +1sjQPnVAxkhAO8Q1SvnvKP0CgYB/MIjN+0DSdRO+U9TICTeIVzJnZiPL0p1lk+Ea +AW+VbnMRv23aPRQOygpddtTppUPfK/04H5YHubLaIOm5rFfM0PDnb+oom3JdsDjo +byGneQ3wlPXGLdlOAExm1FGpQWoe7u4bRUD74BYK1P2muK/Ivb7lMCm4gV8qnuei +X0LbcwKBgQDnxsFKrxrmsSKXIdGGqj6sn8fZwS6TcpyFNkkgItLIoYez0aiSLHe0 +WDBa4VGfhraUxGs3KsRWHIgFAIaJhVqfiVnRTFE08/U2vbB3GF3SntaSZVkFUNaT +a+LJzaNzrkyikjt42tYQGmX/5W2f/597PfrGxxiWKG6UTT/kHIwDjw== +-----END RSA PRIVATE KEY----- diff --git a/sync_diff_inspector/tests/importer/run.sh b/sync_diff_inspector/tests/importer/run.sh new file mode 100644 index 00000000000..b82a2ef1575 --- /dev/null +++ b/sync_diff_inspector/tests/importer/run.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +TEST_DATABASE_NAME=checker_test +IMPORT_EXEC="../bin/importer -c 1 -h ${MYSQL_HOST} -P ${MYSQL_PORT} -D ${TEST_DATABASE_NAME}" +MYSQL_EXEC="mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root" + +init(){ + check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "." + ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};" + ${MYSQL_EXEC} -e "create database ${TEST_DATABASE_NAME};" +} + +destroy(){ + ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};" +} + +testImporter(){ + ${IMPORT_EXEC} -c 1 -n 10 -t "$1" -i "$2" + RESULT=`${MYSQL_EXEC} -e "$3" | sed -n '2p'` + if [[ "${RESULT}" != "$4" ]]; then + echo "Test importer failed: $1" + exit 1 + fi +} + +set -e +init +testImporter "create table ta(a int primary key, b double, c varchar(10), d date unique, e time unique, f timestamp unique, g date unique, h datetime unique, i year unique);" "create unique index u_b on ta(b);" "select count(*) as result from ${TEST_DATABASE_NAME}.ta" "10" +testImporter "create table tb(a int comment '[[range=1,10]]');" "" "select count(*) as result from ${TEST_DATABASE_NAME}.tb where a <= 10 and a >= 1" "10" +testImporter "create table tc(a int unique comment '[[step=2]]');" "" "select sum(a) as result from ${TEST_DATABASE_NAME}.tc" "90" +testImporter "create table td(a int comment '[[set=1,2,3]]');" "" "select count(*) as result from ${TEST_DATABASE_NAME}.td where a <= 3 and a >= 1" "10" +destroy diff --git a/sync_diff_inspector/tests/run.sh b/sync_diff_inspector/tests/run.sh new file mode 100755 index 00000000000..7d0e04a1a1f --- /dev/null +++ b/sync_diff_inspector/tests/run.sh @@ -0,0 +1,119 @@ +#!/bin/sh + +set -eu + +OUT_DIR=/tmp/tidb_tools_test + +# assign default value to mysql config +if [[ -z ${MYSQL_HOST+x} ]]; then + echo "set MYSQL_HOST as default value \"127.0.0.1\"" + export MYSQL_HOST="127.0.0.1" +fi +if [[ -z ${MYSQL_PORT+x} ]]; then + echo "set MYSQL_PORT as default value 3306" + export MYSQL_PORT=3306 +fi + +mkdir -p $OUT_DIR || true +# to the dir of this script +cd "$(dirname "$0")" + +pwd=$(pwd) + +export PATH=$PATH:$pwd/_utils +export PATH=$PATH:$(dirname $pwd)/bin + +rm -rf $OUT_DIR || true + +stop_services() { + killall -9 tikv-server || true + killall -9 pd-server || true + killall -9 tidb-server || true +} + +start_services() { + stop_services + + echo "Starting PD..." + pd-server \ + --client-urls http://127.0.0.1:2379 \ + --log-file "$OUT_DIR/pd.log" \ + --data-dir "$OUT_DIR/pd" & + # wait until PD is online... + while ! curl -o /dev/null -sf http://127.0.0.1:2379/pd/api/v1/version; do + sleep 1 + done + + # Tries to limit the max number of open files under the system limit + cat - > "$OUT_DIR/tikv-config.toml" < "$OUT_DIR/tidb-config.toml" <>>\033[0m" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml new file mode 100644 index 00000000000..37629b414fa --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base.toml @@ -0,0 +1,53 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[table-configs] +[table-configs.config1] +target-tables = ["diff_test.test"] +chunk-size = 10 +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test"] + + # extra table config + target-configs= ["config1"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml new file mode 100644 index 00000000000..b83552c0cd7 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_continous.toml @@ -0,0 +1,55 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[table-configs] +[table-configs.config1] +target-tables = ["diff_test.test"] +chunk-size = 50 +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test", "diff_test.ttt"] + + # extra table config + target-configs= ["config1"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml new file mode 100644 index 00000000000..50d15de73b7 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/config_base_rand.toml @@ -0,0 +1,54 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.mysql] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[table-configs] +[table-configs.config1] +target-tables = ["diff_test.test"] +chunk-size = 500 +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "mysql" + + # tables need to check. + target-check-tables = ["diff_test.test"] + + # extra table config + target-configs= ["config1"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh new file mode 100644 index 00000000000..9aa782c7735 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/checkpoint/run.sh @@ -0,0 +1,155 @@ +#!/bin/sh + +set -ex + +cd "$(dirname "$0")" + +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +rm -rf $OUT_DIR +mkdir -p $OUT_DIR + +# create table diff_test.test(`table` int, aa int, b varchar(10), c float, d datetime, primary key(a), key(aa)); + +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml + +echo "================test bucket checkpoint=================" +echo "---------1. chunk is in the last of the bucket---------" +export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/check-one-bucket=return();\ +github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\ +main/wait-for-checkpoint=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +# Save the last chunk's info, +# to which we will check whether the first chunk's info is next in the next running. +last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}') +echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4 +last_chunk_bound=$(echo $last_chunk_info | awk -F ' ' '{print $1}') +echo "$last_chunk_bound" +last_chunk_index=$(echo $last_chunk_info | awk -F '=' '{print $2}') +echo "$last_chunk_index" +OLD_IFS="$IFS" +IFS=":" +last_chunk_index_array=($last_chunk_index) +IFS="$OLD_IFS" +for s in ${last_chunk_index_array[@]} +do +echo "$s" +done +# chunkIndex should be the last Index +[[ $((${last_chunk_index_array[2]} + 1)) -eq ${last_chunk_index_array[3]} ]] || exit 1 +# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. +bucket_index_right=$(($(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}') + 1)) +echo $bucket_index_right + +rm -f $OUT_DIR/sync_diff.log +export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1') +echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound +cat $OUT_DIR/first_chunk_bound +echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index +cat $OUT_DIR/first_chunk_index +# Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before. +check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound +check_contains_regex ".:${bucket_index_right}-.:0:." $OUT_DIR/first_chunk_index + +echo "--------2. chunk is in the middle of the bucket--------" +rm -rf $OUT_DIR +mkdir -p $OUT_DIR +export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/check-one-bucket=return();\ +github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\ +github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\ +main/wait-for-checkpoint=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +# Save the last chunk's info, +# to which we will check whether the first chunk's info is next in the next running. +last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}') +echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4 +last_chunk_bound=$(echo $last_chunk_info | awk -F ' ' '{print $1}') +echo "$last_chunk_bound" +last_chunk_index=$(echo $last_chunk_info | awk -F '=' '{print $2}') +echo "$last_chunk_index" +OLD_IFS="$IFS" +IFS=":" +last_chunk_index_array=($last_chunk_index) +IFS="$OLD_IFS" +for s in ${last_chunk_index_array[@]} +do +echo "$s" +done +# chunkIndex should be the last Index +[[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1 +# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. +bucket_index_left=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $1}') +bucket_index_right=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}') +echo "${bucket_index_left}-${bucket_index_right}" + +rm -f $OUT_DIR/sync_diff.log +export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1') +echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound +cat $OUT_DIR/first_chunk_bound +echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index +cat $OUT_DIR/first_chunk_index +# Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before. +check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound +check_contains_regex ".:${bucket_index_left}-${bucket_index_right}:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index + + +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_rand.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml + +echo "================test random checkpoint=================" +echo "--------------1. chunk is in the middle----------------" +rm -rf $OUT_DIR +mkdir -p $OUT_DIR +export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\ +github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\ +main/wait-for-checkpoint=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +# Save the last chunk's info, +# to which we will check whether the first chunk's info is next in the next running. +last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}') +echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4 +last_chunk_bound=$(echo $last_chunk_info | awk -F ' ' '{print $1}') +echo "$last_chunk_bound" +last_chunk_index=$(echo $last_chunk_info | awk -F '=' '{print $2}') +echo "$last_chunk_index" +OLD_IFS="$IFS" +IFS=":" +last_chunk_index_array=($last_chunk_index) +IFS="$OLD_IFS" +for s in ${last_chunk_index_array[@]} +do +echo "$s" +done +# chunkIndex should be the last Index +[[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1 + +rm -f $OUT_DIR/sync_diff.log +export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1') +echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound +cat $OUT_DIR/first_chunk_bound +echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index +cat $OUT_DIR/first_chunk_index +# Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before. +check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound +check_contains_regex ".:0-0:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index + + +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_continous.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +echo "================test checkpoint continous=================" +# add a table have different table-structs of upstream and downstream +# so data-check will be skipped +mysql -uroot -h 127.0.0.1 -P 4000 -e "create table IF NOT EXISTS diff_test.ttt(a int, aa int, primary key(a), key(aa));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table IF NOT EXISTS diff_test.ttt(a int, b int, primary key(a), key(b));" +export GO_FAILPOINTS="main/wait-for-checkpoint=return()" +sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output || true +grep 'save checkpoint' $OUT_DIR/sync_diff.log | awk 'END {print}' > $OUT_DIR/checkpoint_info +check_not_contains 'has-upper\":true' $OUT_DIR/checkpoint_info + +export GO_FAILPOINTS="" \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml b/sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml new file mode 100644 index 00000000000..f56695ba72a --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/config_base_mysql.toml @@ -0,0 +1,47 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test"] + diff --git a/sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml b/sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml new file mode 100644 index 00000000000..726db6c1e00 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/config_base_tidb.toml @@ -0,0 +1,49 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.tidb1] + host = "127.0.0.1" + port = 4001 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["tidb1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test"] + + diff --git a/sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml new file mode 100644 index 00000000000..782ffa97884 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/expression/config.toml @@ -0,0 +1,54 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.tidb1] + host = "127.0.0.1" + port = 4001 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["tidb1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["expression_test.diff"] + + target-configs = ["config1"] + +######################### Table config ######################### +[table-configs.config1] +target-tables = ["test1.v"] +range = "TRUE" +chunk-size = 1 diff --git a/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh new file mode 100644 index 00000000000..612fc24cbe8 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -ex + +cd "$(dirname "$0")" +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql +rm -rf $OUT_DIR +rm -rf $FIX_DIR +mkdir -p $OUT_DIR +mkdir -p $FIX_DIR + +for port in 4000 4001; do + mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists expression_test;" + mysql -uroot -h 127.0.0.1 -P $port -e "create table expression_test.diff(\`a\`\`;sad\` int, id int);" + mysql -uroot -h 127.0.0.1 -P $port -e "alter table expression_test.diff add index i1((\`a\`\`;sad\` + 1 + \`a\`\`;sad\`));" + mysql -uroot -h 127.0.0.1 -P $port -e "insert into expression_test.diff values (1,1),(2,2),(3,3);" +done + +echo "check result should be pass" +sync_diff_inspector --config=./config.toml > $OUT_DIR/expression_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml new file mode 100644 index 00000000000..fc0d8f8e9a9 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/json/config_base.toml @@ -0,0 +1,47 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["json_test.test"] + diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/data.sql b/sync_diff_inspector/tests/sync_diff_inspector/json/data.sql new file mode 100644 index 00000000000..4a13f371b9d --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/json/data.sql @@ -0,0 +1,7 @@ +create database if not exists json_test; +create table json_test.test (a int, b json, primary key(a)); + +insert into json_test.test values (1, '{"id": 1, "name":"aaa"}'); +insert into json_test.test values (2, '{"id": 2, "name":"bbb", "sub": {"id": "2-1", "num": 3, "array": ["123", "456", "789"], "num_array": [123, 456, 789]}}'); +insert into json_test.test values (3, '{"name":"ccc", "id": 3}'); +insert into json_test.test values (4, '{"id": 4, "bool": true, "name":"aaa"}'); diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh new file mode 100644 index 00000000000..30824a26fdf --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +set -e + +cd "$(dirname "$0")" + +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +rm -rf $OUT_DIR +mkdir -p $OUT_DIR + +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql + +# tidb +mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql + +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +cat config.toml | sed 's/export-fix-sql = true/export-fix-sql = false/' > config_nofix.toml +diff config.toml config_nofix.toml || true + +echo "compare json tables, check result should be pass" +sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "compare json tables without fixsql, check result should be pass" +sync_diff_inspector --config=./config_nofix.toml > $OUT_DIR/json_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "update data to make it different, and data should not be equal" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": true, \"name\":\"aaa\"}');" +mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": false, \"name\":\"aaa\"}');" +sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "update data to make it different, and downstream json data is NULL" +mysql -uroot -h 127.0.0.1 -P 4000 -e "replace into json_test.test values (5, NULL);" +sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* diff --git a/sync_diff_inspector/tests/sync_diff_inspector/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/run.sh new file mode 100644 index 00000000000..338f57baec0 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/run.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +set -ex + +cd "$(dirname "$0")" + +# check mysql status +check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "." + +BASE_DIR=/tmp/tidb_tools_test/sync_diff_inspector +OUT_DIR=$BASE_DIR/output + + +mkdir -p $OUT_DIR || true + +echo "use importer to generate test data" +mysql -uroot -h 127.0.0.1 -P 4000 -e "create database if not exists diff_test" +# TODO: run `importer -t "create table diff_test.test(\`table\` int, b varchar(10), c float, d datetime, primary key(a));" -c 10 -n 10000 -P 4000 -h 127.0.0.1 -D diff_test -b 1000` +# will exit with parser error, need to fix it in importer later, just change column name by mysql client now +importer -t "create table diff_test.test(a int, aa int, b varchar(10), c float, d datetime, primary key(a), key(aa));" -c 10 -n 10000 -P 4000 -h 127.0.0.1 -D diff_test -b 1000 +mysql -uroot -h 127.0.0.1 -P 4000 -e "alter table diff_test.test change column a \`table\` int" + +echo "dump data and then load to tidb and mysql" +dumpling --host 127.0.0.1 --port 4000 --user root -o $BASE_DIR/dump_diff -B diff_test -T "diff_test.test" +loader -h 127.0.0.1 -P 4001 -u root -d $BASE_DIR/dump_diff +mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -e "create database if not exists tidb_loader" +loader -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -d $BASE_DIR/dump_diff +mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -e "select * from diff_test.test limit 10;" + +echo "use sync_diff_inspector to compare data" +# sync diff tidb-tidb +sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log + +echo "analyze table, and will use tidb's statistical information to split chunks" +check_contains "split range by random" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* +mysql -uroot -h 127.0.0.1 -P 4000 -e "analyze table diff_test.test" +# run the explain SQL to load the stats after analyze +mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where aa > 1" +mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where \`table\` > 1" +mysql -uroot -h 127.0.0.1 -P 4000 -e "show stats_buckets" +sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +check_not_contains "split range by random" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "test 'exclude-tables' config" +mysql -uroot -h 127.0.0.1 -P 4000 -e "create table if not exists diff_test.should_not_compare (id int)" +sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.log +# doesn't contain the table's result in check report +check_not_contains "[table=should_not_compare]" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +# sync diff tidb-mysql +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_mysql.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config_base_mysql_.toml +sync_diff_inspector --config=./config_base_mysql_.toml #> $OUT_DIR/diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +for script in ./*/run.sh; do + test_name="$(basename "$(dirname "$script")")" + echo "---------------------------------------" + echo "Running test $script..." + echo "---------------------------------------" + sh "$script" +done diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml new file mode 100644 index 00000000000..9c34352c958 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_base.toml @@ -0,0 +1,55 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + route-rules = ["rule1"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[routes.rule1] +schema-pattern = "shard_test" # 匹配数据源的库名,支持通配符 "*" 和 "?" +table-pattern = "test*" # 匹配数据源的表名,支持通配符 "*" 和 "?" +target-schema = "shard_test" # 目标库名 +target-table = "test" # 目标表名 + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["shard_test.test"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml new file mode 100644 index 00000000000..953fd67cd0a --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_1.toml @@ -0,0 +1,55 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + route-rules = ["rule1"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_HOST + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[routes.rule1] +schema-pattern = "router_test_0" +table-pattern = "tbl" +target-schema = "router_test_1" +target-table = "tbl" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["router_test_1.tbl"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml new file mode 100644 index 00000000000..a36d0ab3727 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_2.toml @@ -0,0 +1,53 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + route-rules = ["rule1"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_HOST + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[routes.rule1] +schema-pattern = "router_test_0" +target-schema = "router_test_1" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["router_test_1.tbl"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml new file mode 100644 index 00000000000..055afef9997 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_3.toml @@ -0,0 +1,55 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + route-rules = ["rule1"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_HOST + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[routes.rule1] +schema-pattern = "other_schema" +table-pattern = "tbl" +target-schema = "other_schema" +target-table = "tbl" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["router_test_1.tbl"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml new file mode 100644 index 00000000000..3b75fc35312 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_4.toml @@ -0,0 +1,49 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_HOST + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["router_test_1.tbl"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml new file mode 100644 index 00000000000..8ef05c96bc5 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/config_router_5.toml @@ -0,0 +1,55 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + + +######################### Databases config ######################### +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + + route-rules = ["rule1"] + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_HOST + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[routes.rule1] +schema-pattern = "router_test_?" +table-pattern = "tbl" +target-schema = "router_test_1" +target-table = "tbl" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["router_test_1.tbl"] diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh new file mode 100644 index 00000000000..09fdbfa041a --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh @@ -0,0 +1,134 @@ +#!/bin/sh + +set -e + +cd "$(dirname "$0")" + +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +rm -rf $OUT_DIR +mkdir -p $OUT_DIR + +echo "generate data to sharding tables" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists shard_test;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table shard_test.test1(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table shard_test.test2(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));" + +# each table only have part of data +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into shard_test.test1 (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test WHERE \`table\`%2=0" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into shard_test.test2 (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test WHERE \`table\`%2=1" + +# tidb +mysql -uroot -h 127.0.0.1 -P 4000 -e "create database if not exists shard_test;" +mysql -uroot -h 127.0.0.1 -P 4000 -e "create table shard_test.test(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));" +mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into shard_test.test (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test;" + +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml + +echo "compare sharding tables with one table in downstream, check result should be pass" +sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "update data in one shard table, and data should not be equal" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "update shard_test.test1 set b = 'abc' limit 1" +sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "check the router for shard" +# router_test_0.tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_0;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_0.tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_0.tbl values (1,\"hello1\",1);" +# Router_test_0.tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists Router_test_0;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_0.tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_0.tbl values (1,\"hello1\",1);" +# router_test_0.Tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_0;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_0.Tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_0.Tbl values (1,\"hello1\",1);" +# router_test_1.tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_1;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_1.tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_1.tbl values (1,\"hello1\",1);" +# Router_test_1.tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists Router_test_1;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_1.tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_1.tbl values (1,\"hello1\",1);" +# router_test_1.Tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists router_test_1;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table router_test_1.Tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into router_test_1.Tbl values (1,\"hello1\",1);" +# Router_test_1.Tbl +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create database if not exists Router_test_1;" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_1.Tbl (id INT(11), name VARCHAR(25), deptId INT(11));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_1.Tbl values (1,\"hello1\",1);" + +echo "test router 1: normal rule" +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_1.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log +#check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 1 +check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "test router 2: only schema rule" +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_2.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log +#check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 1 +check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log + +rm -rf $OUT_DIR/* + +echo "test router 3: other rule" +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_3.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log +#check_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 2 +check_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "test router 4: no rule" +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_4.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log +#check_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 2 +check_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "test router 5: regex rule" +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_5.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log +#check_contains "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains_count "as CHECKSUM FROM \`router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log 2 +check_contains "as CHECKSUM FROM \`Router_test_1\`.\`tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "shard test passed" \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml new file mode 100644 index 00000000000..a8921fb6ad4 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/config_base.toml @@ -0,0 +1,49 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.tidb1] + host = "127.0.0.1" + port = 4001 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + #snapshot# + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["tidb1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test"] + + diff --git a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh new file mode 100644 index 00000000000..cbd9e765968 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh @@ -0,0 +1,49 @@ + +#!/bin/sh + +set -e + +cd "$(dirname "$0")" + +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql +rm -rf $OUT_DIR +rm -rf $FIX_DIR +mkdir -p $OUT_DIR +mkdir -p $FIX_DIR + +mysql -uroot -h 127.0.0.1 -P 4000 -e "show master status" > $OUT_DIR/ts.log +#cat $OUT_DIR/sync_diff.log +ts=`grep -oE "[0-9]+" $OUT_DIR/ts.log` +echo "get ts $ts" + +echo "delete one data, diff should not passed" +mysql -uroot -h 127.0.0.1 -P 4000 -e "delete from diff_test.test limit 1" + +sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log || true +check_contains "check failed" $OUT_DIR/sync_diff.log +# move the fix sql file to $FIX_DIR +mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/ +rm -rf $OUT_DIR/* + +echo "use snapshot compare data, test sql mode by the way, will auto discover ANSI_QUOTES thus pass" +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ANSI_QUOTES';" +sleep 10 +mysql -uroot -h 127.0.0.1 -P 4000 -e "show variables like '%sql_mode%'" +mysql -uroot -h 127.0.0.1 -P 4000 -e "show create table diff_test.test" +sed "s/#snapshot#/snapshot = \"${ts}\"/g" config_base.toml > config.toml +echo "use snapshot compare data, data should be equal" +sync_diff_inspector --config=./config.toml #> $OUT_DIR/snapshot_diff.log +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "execute fix.sql and use base config, and then compare data, data should be equal" +cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000 +sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +# reset sql mode +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';" + +echo "snapshot test passed" \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml new file mode 100644 index 00000000000..5f5bc45a182 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_config/config.toml @@ -0,0 +1,60 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.tidb1] + host = "127.0.0.1" + port = 4001 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["tidb1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test"] + + # extra table config + target-configs= ["config1"] + +[table-configs] +[table-configs.config1] +# tables need to use this specified config. +# if use this config. target-tables should be a subset of #target-check-tables +target-tables = ["diff_test.test"] + +range = "TRUE"#RANGE"a < 10 OR a > 200" +index-fields = [""] +ignore-columns = [""]#IGNORE \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh new file mode 100644 index 00000000000..ad541dacb6c --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh @@ -0,0 +1,42 @@ + +#!/bin/sh + +set -e + +cd "$(dirname "$0")" + +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql +rm -rf $OUT_DIR +rm -rf $FIX_DIR +mkdir -p $OUT_DIR +mkdir -p $FIX_DIR + +echo "update data in column b (WHERE \`table\` >= 10 AND \`table\` <= 200), data should not be equal" +mysql -uroot -h 127.0.0.1 -P 4000 -e "update diff_test.test set b = 'abc' where \`table\` >= 10 AND \`table\` <= 200" + +sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +# move the fix sql file to $FIX_DIR +mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/ +rm -rf $OUT_DIR/* + +echo "ignore check column b, check result should be pass" +sed 's/\[""\]#IGNORE/["b"]/g' config.toml > config_.toml +sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "set range a < 10 OR a > 200, check result should be pass" +sed 's/"TRUE"#RANGE"a < 10 OR a > 200"/"`table` < 10 OR `table` > 200"/g' config.toml > config_.toml +sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "execute fix.sql and use base config, and then compare data, data should be equal" +cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000 +sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.log || true +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "table_config test passed" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml new file mode 100644 index 00000000000..ba341007394 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_base.toml @@ -0,0 +1,49 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +# skip validation for tables that don't exist upstream or downstream +skip-non-existing-table = true + +######################### Databases config ######################### +[data-sources] +[data-sources.mysql1] + host = "127.0.0.1"#MYSQL_HOST + port = 3306#MYSQL_PORT + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["skip_test.t*"] \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml new file mode 100644 index 00000000000..2fa4ededf3e --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/config_router.toml @@ -0,0 +1,61 @@ +# Diff Configuration. + +######################### Global config ######################### + +# The number of goroutines created to check data. The number of connections between upstream and downstream databases are slightly greater than this value +check-thread-count = 4 + +# If enabled, SQL statements is exported to fix inconsistent tables +export-fix-sql = true + +# Only compares the table structure instead of the data +check-struct-only = false + +# skip validation for tables that don't exist upstream or downstream +skip-non-existing-table = true +######################### Datasource config ######################### +[data-sources.mysql1] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + + route-rules = ["rule1"] + +[data-sources.mysql2] + host = "127.0.0.1" + port = 3306 + user = "root" + password = "" + + route-rules = ["rule2"] + +[data-sources.tidb0] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + +########################### Routes ########################### +[routes.rule1] +schema-pattern = "skip_test" # Matches the schema name of the data source. Supports the wildcards "*" and "?" +table-pattern = "t[1-2]" # Matches the table name of the data source. Supports the wildcards "*" and "?" +target-schema = "skip_test" # The name of the schema in the target database +target-table = "t5" # The name of the target table + +[routes.rule2] +schema-pattern = "skip_test" +table-pattern = "t0" +target-schema = "skip_test" +target-table = "t5" + +######################### Task config ######################### +[task] + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["mysql1", "mysql2"] + + target-instance = "tidb0" + + # The tables of downstream databases to be compared. Each table needs to contain the schema name and the table name, separated by '.' + target-check-tables = ["skip_test.t5"] \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql new file mode 100644 index 00000000000..34a2a745263 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/data.sql @@ -0,0 +1,5 @@ +create database if not exists skip_test; +create table skip_test.t0 (a int, b int, primary key(a)); +create table skip_test.t1 (a int, b int, primary key(a)); +insert into skip_test.t0 values (1,1); +insert into skip_test.t1 values (2,2); \ No newline at end of file diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh new file mode 100644 index 00000000000..441f7045806 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh @@ -0,0 +1,65 @@ +#!/bin/sh + +set -ex + +cd "$(dirname "$0")" + +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +rm -rf $OUT_DIR +mkdir -p $OUT_DIR + +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql + +# tidb +mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql + +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml + +echo "compare tables, check result should be pass" +sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "make some tables exist only upstream or downstream" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t2 (a int, b int, primary key(a));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t2 values (3,3);" +mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t3 (a int, b int, primary key(a));" +mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t3 values (1,1);" +sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +check_contains "check pass" $OUT_DIR/sync_diff.log +check_contains "Comparing the table data of \`skip_test\`.\`t2\` ...skipped" $OUT_DIR/table_skip_diff.output +check_contains "Comparing the table data of \`skip_test\`.\`t3\` ...skipped" $OUT_DIR/table_skip_diff.output +check_contains "The data of \`skip_test\`.\`t2\` does not exist in downstream database" $OUT_DIR/table_skip_diff.output +check_contains "The data of \`skip_test\`.\`t3\` does not exist in upstream database" $OUT_DIR/table_skip_diff.output +check_contains "| TABLE | RESULT | STRUCTURE EQUALITY | DATA DIFF ROWS | UPCOUNT | DOWNCOUNT |" $OUT_DIR/summary.txt +check_contains "| \`skip_test\`.\`t2\` | skipped | false | +1/-0 | 1 | 0 |" $OUT_DIR/summary.txt +check_contains "| \`skip_test\`.\`t3\` | skipped | false | +0/-1 | 0 | 1 |" $OUT_DIR/summary.txt +rm -rf $OUT_DIR/* + +echo "make some table data not equal" +mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t1 values (4,4);" +sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +check_contains "| \`skip_test\`.\`t1\` | succeed | true | +0/-1 | 1 | 2 |" $OUT_DIR/summary.txt +rm -rf $OUT_DIR/* + +echo "make some table structure not equal" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t4 (a int, b int, c int,primary key(a));" +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t4 values (1,1,1);" +mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t4 (a int, b int, primary key(a));" +sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +check_contains "| \`skip_test\`.\`t4\` | succeed | false | +0/-0 | 0 | 0 |" $OUT_DIR/summary.txt +check_contains "A total of 5 tables have been compared, 1 tables finished, 2 tables failed, 2 tables skipped" $OUT_DIR/table_skip_diff.output +cat $OUT_DIR/summary.txt +rm -rf $OUT_DIR/* + +echo "test router case" +sync_diff_inspector --config=./config_router.toml > $OUT_DIR/table_skip_diff.output || true +check_contains "check pass" $OUT_DIR/sync_diff.log +check_contains "| \`skip_test\`.\`t5\` | skipped | false | +3/-0 | 3 | 0 |" $OUT_DIR/summary.txt +check_contains "The data of \`skip_test\`.\`t5\` does not exist in downstream database" $OUT_DIR/table_skip_diff.output +check_contains "A total of 1 tables have been compared, 0 tables finished, 0 tables failed, 1 tables skipped" $OUT_DIR/table_skip_diff.output +rm -rf $OUT_DIR/* + +echo "table_skip test passed" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml new file mode 100644 index 00000000000..4f3f813eef1 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/config.toml @@ -0,0 +1,48 @@ +# Diff Configuration. + +######################### Global config ######################### + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.tidb1] + host = "127.0.0.1" + port = 4001 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["tidb1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["tz_test.diff"] + + diff --git a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh new file mode 100644 index 00000000000..b96e0d895d8 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +set -ex + +cd "$(dirname "$0")" +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +FIX_DIR=/tmp/tidb_tools_test/sync_diff_inspector/fixsql +rm -rf $OUT_DIR +rm -rf $FIX_DIR +mkdir -p $OUT_DIR +mkdir -p $FIX_DIR + +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@GLOBAL.SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';" +sleep 3 + +for port in 4000 4001; do + mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists tz_test" + mysql -uroot -h 127.0.0.1 -P $port -e "create table tz_test.diff(id int, dt datetime, ts timestamp);" + mysql -uroot -h 127.0.0.1 -P $port -e "insert into tz_test.diff values (1, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" + mysql -uroot -h 127.0.0.1 -P $port -e "set @@session.time_zone = \"-07:00\"; insert into tz_test.diff values (2, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" +done + +echo "check with the same time_zone, check result should be pass" +sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +# check upstream and downstream time_zone +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = '+08:00'"; +mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = '+00:00'"; +sleep 5 + +echo "check with different time_zone, check result should be pass again" +sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* + +echo "set different rows, check result should be failed" +mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@session.time_zone = '-06:00'; insert into tz_test.diff values (4, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-05:00'; insert into tz_test.diff values (3, '2020-05-17 10:12:13', '2020-05-17 10:12:13');" +sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output || true +check_contains "check failed" $OUT_DIR/sync_diff.log +mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/ +rm -rf $OUT_DIR/* + +echo "fix the rows, check result should be pass" +cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000 +sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output +check_contains "check pass!!!" $OUT_DIR/sync_diff.log +rm -rf $OUT_DIR/* +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-06:00'; select ts from tz_test.diff where id = 4 or id = 3;" > $OUT_DIR/tmp_sql_timezone +check_contains "2020-05-17 09:12:13" $OUT_DIR/tmp_sql_timezone +check_not_contains "2020-05-17 10:12:13" $OUT_DIR/tmp_sql_timezone + +# reset time_zone +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = 'SYSTEM'"; +mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = 'SYSTEM'"; diff --git a/sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml b/sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml new file mode 100644 index 00000000000..323134207ef --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/tls/config.toml @@ -0,0 +1,53 @@ +# Diff Configuration. + +######################### Global config ######################### + + +# how many goroutines are created to check data +check-thread-count = 4 + +# set false if just want compare data by checksum, will skip select data when checksum is not equal. +# set true if want compare all different rows, will slow down the total compare time. +export-fix-sql = true + +# ignore check table's data +check-struct-only = false + +######################### Databases config ######################### +[data-sources] +[data-sources.tidb1] + host = "127.0.0.1" + port = 4001 + user = "root" + password = "" + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +[data-sources.tidb] + host = "127.0.0.1" + port = 4000 + user = "root_tls" + password = "" + + security.ca-path = "ca-path"#CAPATH + security.cert-path = "cert-path"#CERTPATH + security.key-path = "key-path"#KEYPATH + # remove comment if use tidb's snapshot data + # snapshot = "2016-10-08 16:45:26" + +######################### Task config ######################### +[task] + # 1 fix sql: fix-target-TIDB1.sql + # 2 log: sync-diff.log + # 3 summary: summary.txt + # 4 checkpoint: a dir + output-dir = "/tmp/tidb_tools_test/sync_diff_inspector/output" + + source-instances = ["tidb1"] + + target-instance = "tidb" + + # tables need to check. + target-check-tables = ["diff_test.test"] + + diff --git a/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh new file mode 100644 index 00000000000..03ed6cbf3e1 --- /dev/null +++ b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +set -ex + +cd "$(dirname "$0")" + +CONF_PATH=`cd ../../conf && pwd` +CA_PATH="$CONF_PATH/root.crt" +CERT_PATH="$CONF_PATH/client.crt" +KEY_PATH="$CONF_PATH/client.key" +OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output +rm -rf $OUT_DIR +mkdir -p $OUT_DIR + +# create user for test tls +mysql -uroot -h 127.0.0.1 -P 4000 -e "create user 'root_tls'@'%' identified by '' require X509;" +mysql -uroot -h 127.0.0.1 -P 4000 -e "grant all privileges on *.* to 'root_tls'@'%';" +mysql -uroot_tls -h 127.0.0.1 -P 4000 --ssl-ca "$CA_PATH" --ssl-cert "$CERT_PATH" --ssl-key "$KEY_PATH" -e "SHOW STATUS LIKE \"%Ssl%\";" + +echo "use sync_diff_inspector to compare data" +# sync diff tidb-tidb +CA_PATH_REG=$(echo ${CA_PATH} | sed 's/\//\\\//g') +CERT_PATH_REG=$(echo ${CERT_PATH} | sed 's/\//\\\//g') +KEY_PATH_REG=$(echo ${KEY_PATH} | sed 's/\//\\\//g') +sed "s/\"ca-path\"#CAPATH/\"${CA_PATH_REG}\"/g" config.toml | sed "s/\"cert-path\"#CERTPATH/\"${CERT_PATH_REG}\"/g" | sed "s/\"key-path\"#KEYPATH/\"${KEY_PATH_REG}\"/g" > config_.toml +sync_diff_inspector --config=./config_.toml > $OUT_DIR/diff.output || (cat $OUT_DIR/diff.output && exit 1) +check_contains "check pass!!!" $OUT_DIR/sync_diff.log diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go index 7aadf2fbc59..af7947f1baf 100644 --- a/sync_diff_inspector/utils/pd.go +++ b/sync_diff_inspector/utils/pd.go @@ -208,7 +208,7 @@ func StartGCSavepointUpdateService(ctx context.Context, pdCli pd.Client, db *sql return nil } // get latest snapshot - snapshotTS, err := parseSnapshotToTSO(db, snapshot) + snapshotTS, err := ParseSnapshotToTSO(db, snapshot) if tidbVersion.Compare(*autoGCSafePointVersion) > 0 { log.Info("tidb support auto gc safepoint", zap.Stringer("version", tidbVersion)) if err != nil { @@ -250,7 +250,7 @@ func updateServiceSafePoint(ctx context.Context, pdClient pd.Client, snapshotTS } } -func parseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) { +func ParseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) { snapshotTS, err := strconv.ParseUint(snapshot, 10, 64) if err == nil { return snapshotTS, nil From a72a251846dce2dc162d0deab4de32b10f0a6e86 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 7 Jan 2025 09:36:59 +0800 Subject: [PATCH 19/22] update go sum --- go.sum | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go.sum b/go.sum index f08cf324d1b..78b4ac6db93 100644 --- a/go.sum +++ b/go.sum @@ -42,12 +42,12 @@ github.com/99designs/keyring v1.2.1 h1:tYLp1ULvO7i3fI5vE21ReQuj99QFSs7lGm0xWyJo8 github.com/99designs/keyring v1.2.1/go.mod h1:fc+wB5KTk9wQ9sDx0kFXB3A0MaeGHM9AwRStKOQ5vOA= github.com/AthenZ/athenz v1.10.39 h1:mtwHTF/v62ewY2Z5KWhuZgVXftBej1/Tn80zx4DcawY= github.com/AthenZ/athenz v1.10.39/go.mod h1:3Tg8HLsiQZp81BJY58JBeU2BR6B/H4/0MQGfCwhHNEA= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0 h1:1nGuui+4POelzDwI7RG56yfQJHCnKvwfMoU7VsEp+Zg= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0/go.mod h1:99EvauvlcJ1U06amZiksfYz/3aFGyIhWGHVyiZXtBAI= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 h1:U2rTu3Ef+7w9FHKIAXM6ZyqF3UOWJZ12zIm8zECAFfg= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0 h1:H+U3Gk9zY56G3u872L82bk4thcsy2Gghb9ExT4Zvm1o= -github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.0/go.mod h1:mgrmMSgaLp9hmax62XQTd0N4aAqSE5E0DulSpVYK7vc= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 h1:JZg6HRh6W6U4OLl6lk7BZ7BLisIzM9dG1R50zUk9C/M= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0/go.mod h1:YL1xnZ6QejvQHWJrX/AvhFl4WW4rqHVoKspWNVwFk0M= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 h1:u/LLAOFgsMv7HmNL4Qufg58y+qElGOt5qv0z1mURkRY= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0/go.mod h1:2e8rMJtl2+2j+HXbTBwnyGpm5Nou7KhvSfxOq8JpTag= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= From 2a793064f310a26f84bbcbba2c6d416ee58581e0 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 7 Jan 2025 09:59:56 +0800 Subject: [PATCH 20/22] fix format --- sync_diff_inspector/diff/diff_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/sync_diff_inspector/diff/diff_test.go b/sync_diff_inspector/diff/diff_test.go index e8251a13343..a85c8a817b4 100644 --- a/sync_diff_inspector/diff/diff_test.go +++ b/sync_diff_inspector/diff/diff_test.go @@ -103,5 +103,4 @@ func TestGetSnapshot(t *testing.T) { val := GetSnapshot(cs.latestSnapshot, cs.snapshot, conn) require.Equal(t, cs.expected, val, "case %d", i) } - } From 4e785cbacd6d8d9260bb2424a66772d59e75a066 Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 7 Jan 2025 10:23:18 +0800 Subject: [PATCH 21/22] fix format --- .../tests/_utils/check_contains | 10 +- .../tests/_utils/check_contains_count | 10 +- .../tests/_utils/check_contains_regex | 10 +- .../tests/_utils/check_db_status | 16 +-- .../tests/_utils/check_not_contains | 12 +- sync_diff_inspector/tests/importer/run.sh | 26 ++-- sync_diff_inspector/tests/run.sh | 128 +++++++++--------- .../sync_diff_inspector/checkpoint/run.sh | 63 ++++----- .../sync_diff_inspector/expression/run.sh | 10 +- .../tests/sync_diff_inspector/json/run.sh | 16 +-- .../tests/sync_diff_inspector/run.sh | 19 ++- .../tests/sync_diff_inspector/shard/run.sh | 28 ++-- .../tests/sync_diff_inspector/snapshot/run.sh | 13 +- .../sync_diff_inspector/table_config/run.sh | 13 +- .../sync_diff_inspector/table_skip/run.sh | 16 +-- .../sync_diff_inspector/time_zone/run.sh | 26 ++-- .../tests/sync_diff_inspector/tls/run.sh | 8 +- 17 files changed, 207 insertions(+), 217 deletions(-) diff --git a/sync_diff_inspector/tests/_utils/check_contains b/sync_diff_inspector/tests/_utils/check_contains index 93e7970b76a..651ee15fb5a 100755 --- a/sync_diff_inspector/tests/_utils/check_contains +++ b/sync_diff_inspector/tests/_utils/check_contains @@ -7,9 +7,9 @@ set -eu OUT_DIR=/tmp/tidb_tools_test if ! grep -Fq "$1" "$2"; then - echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'" - echo "____________________________________" - cat "$2" - echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" - exit 1 + echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 fi diff --git a/sync_diff_inspector/tests/_utils/check_contains_count b/sync_diff_inspector/tests/_utils/check_contains_count index 8308512d789..79dd3c79e3b 100755 --- a/sync_diff_inspector/tests/_utils/check_contains_count +++ b/sync_diff_inspector/tests/_utils/check_contains_count @@ -10,9 +10,9 @@ OUT_DIR=/tmp/tidb_tools_test count=$(grep -F "$1" "$2" | wc -l) if [ "$count" -ne "$3" ]; then - echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1' $3 times" - echo "____________________________________" - cat "$2" - echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" - exit 1 + echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1' $3 times" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 fi diff --git a/sync_diff_inspector/tests/_utils/check_contains_regex b/sync_diff_inspector/tests/_utils/check_contains_regex index ce498abbec2..c8d780a6cc5 100755 --- a/sync_diff_inspector/tests/_utils/check_contains_regex +++ b/sync_diff_inspector/tests/_utils/check_contains_regex @@ -7,9 +7,9 @@ set -eu OUT_DIR=/tmp/tidb_tools_test if ! grep -q "$1" "$2"; then - echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'" - echo "____________________________________" - cat "$2" - echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" - exit 1 + echo "TEST FAILED: '$2' DOES NOT CONTAIN '$1'" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 fi diff --git a/sync_diff_inspector/tests/_utils/check_db_status b/sync_diff_inspector/tests/_utils/check_db_status index 8dc75739c55..09945fff4e7 100755 --- a/sync_diff_inspector/tests/_utils/check_db_status +++ b/sync_diff_inspector/tests/_utils/check_db_status @@ -4,16 +4,14 @@ # argument 2 is the port # argument 3 is the database service's name -for i in {1..20} -do - if mysqladmin -h "$1" -P "$2" -u root --default-character-set utf8 ping > /dev/null 2>&1 - then - echo "$3 is alive" - exit 0 - fi +for i in {1..20}; do + if mysqladmin -h "$1" -P "$2" -u root --default-character-set utf8 ping >/dev/null 2>&1; then + echo "$3 is alive" + exit 0 + fi - echo "$3 is not alive, will try again" - sleep 2 + echo "$3 is not alive, will try again" + sleep 2 done echo "$3 is not alive" diff --git a/sync_diff_inspector/tests/_utils/check_not_contains b/sync_diff_inspector/tests/_utils/check_not_contains index 43fd007ad5f..915fcd1d603 100755 --- a/sync_diff_inspector/tests/_utils/check_not_contains +++ b/sync_diff_inspector/tests/_utils/check_not_contains @@ -7,9 +7,9 @@ set -eu OUT_DIR=/tmp/tidb_binlog_test if grep -Fq "$1" "$2"; then - echo "TEST FAILED: '$2' CONTAIN '$1'" - echo "____________________________________" - cat "$2" - echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" - exit 1 -fi \ No newline at end of file + echo "TEST FAILED: '$2' CONTAIN '$1'" + echo "____________________________________" + cat "$2" + echo "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + exit 1 +fi diff --git a/sync_diff_inspector/tests/importer/run.sh b/sync_diff_inspector/tests/importer/run.sh index b82a2ef1575..ef2b2527a9d 100644 --- a/sync_diff_inspector/tests/importer/run.sh +++ b/sync_diff_inspector/tests/importer/run.sh @@ -4,23 +4,23 @@ TEST_DATABASE_NAME=checker_test IMPORT_EXEC="../bin/importer -c 1 -h ${MYSQL_HOST} -P ${MYSQL_PORT} -D ${TEST_DATABASE_NAME}" MYSQL_EXEC="mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root" -init(){ - check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "." - ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};" - ${MYSQL_EXEC} -e "create database ${TEST_DATABASE_NAME};" +init() { + check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "." + ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};" + ${MYSQL_EXEC} -e "create database ${TEST_DATABASE_NAME};" } -destroy(){ - ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};" +destroy() { + ${MYSQL_EXEC} -e "drop database if exists ${TEST_DATABASE_NAME};" } -testImporter(){ - ${IMPORT_EXEC} -c 1 -n 10 -t "$1" -i "$2" - RESULT=`${MYSQL_EXEC} -e "$3" | sed -n '2p'` - if [[ "${RESULT}" != "$4" ]]; then - echo "Test importer failed: $1" - exit 1 - fi +testImporter() { + ${IMPORT_EXEC} -c 1 -n 10 -t "$1" -i "$2" + RESULT=$(${MYSQL_EXEC} -e "$3" | sed -n '2p') + if [[ "${RESULT}" != "$4" ]]; then + echo "Test importer failed: $1" + exit 1 + fi } set -e diff --git a/sync_diff_inspector/tests/run.sh b/sync_diff_inspector/tests/run.sh index 7d0e04a1a1f..f5f54536b88 100755 --- a/sync_diff_inspector/tests/run.sh +++ b/sync_diff_inspector/tests/run.sh @@ -6,12 +6,12 @@ OUT_DIR=/tmp/tidb_tools_test # assign default value to mysql config if [[ -z ${MYSQL_HOST+x} ]]; then - echo "set MYSQL_HOST as default value \"127.0.0.1\"" - export MYSQL_HOST="127.0.0.1" + echo "set MYSQL_HOST as default value \"127.0.0.1\"" + export MYSQL_HOST="127.0.0.1" fi if [[ -z ${MYSQL_PORT+x} ]]; then - echo "set MYSQL_PORT as default value 3306" - export MYSQL_PORT=3306 + echo "set MYSQL_PORT as default value 3306" + export MYSQL_PORT=3306 fi mkdir -p $OUT_DIR || true @@ -26,26 +26,26 @@ export PATH=$PATH:$(dirname $pwd)/bin rm -rf $OUT_DIR || true stop_services() { - killall -9 tikv-server || true - killall -9 pd-server || true - killall -9 tidb-server || true + killall -9 tikv-server || true + killall -9 pd-server || true + killall -9 tidb-server || true } start_services() { - stop_services - - echo "Starting PD..." - pd-server \ - --client-urls http://127.0.0.1:2379 \ - --log-file "$OUT_DIR/pd.log" \ - --data-dir "$OUT_DIR/pd" & - # wait until PD is online... - while ! curl -o /dev/null -sf http://127.0.0.1:2379/pd/api/v1/version; do - sleep 1 - done - - # Tries to limit the max number of open files under the system limit - cat - > "$OUT_DIR/tikv-config.toml" <"$OUT_DIR/tikv-config.toml" < "$OUT_DIR/tidb-config.toml" <"$OUT_DIR/tidb-config.toml" < ./config.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml echo "================test bucket checkpoint=================" echo "---------1. chunk is in the last of the bucket---------" export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/check-one-bucket=return();\ github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\ main/wait-for-checkpoint=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log -# Save the last chunk's info, +# Save the last chunk's info, # to which we will check whether the first chunk's info is next in the next running. last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}') echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4 @@ -31,23 +31,22 @@ OLD_IFS="$IFS" IFS=":" last_chunk_index_array=($last_chunk_index) IFS="$OLD_IFS" -for s in ${last_chunk_index_array[@]} -do -echo "$s" +for s in ${last_chunk_index_array[@]}; do + echo "$s" done # chunkIndex should be the last Index [[ $((${last_chunk_index_array[2]} + 1)) -eq ${last_chunk_index_array[3]} ]] || exit 1 -# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. +# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. bucket_index_right=$(($(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}') + 1)) echo $bucket_index_right rm -f $OUT_DIR/sync_diff.log export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1') -echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound +echo $first_chunk_info | awk -F '=' '{print $1}' >$OUT_DIR/first_chunk_bound cat $OUT_DIR/first_chunk_bound -echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index +echo $first_chunk_info | awk -F '=' '{print $3}' >$OUT_DIR/first_chunk_index cat $OUT_DIR/first_chunk_index # Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before. check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound @@ -60,9 +59,9 @@ export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\ github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\ main/wait-for-checkpoint=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log -# Save the last chunk's info, +# Save the last chunk's info, # to which we will check whether the first chunk's info is next in the next running. last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}') echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4 @@ -74,31 +73,29 @@ OLD_IFS="$IFS" IFS=":" last_chunk_index_array=($last_chunk_index) IFS="$OLD_IFS" -for s in ${last_chunk_index_array[@]} -do -echo "$s" +for s in ${last_chunk_index_array[@]}; do + echo "$s" done # chunkIndex should be the last Index [[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1 -# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. +# Save bucketIndexRight, which should be equal to bucketIndexLeft of the chunk first created in the next running. bucket_index_left=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $1}') bucket_index_right=$(echo ${last_chunk_index_array[1]} | awk -F '-' '{print $2}') echo "${bucket_index_left}-${bucket_index_right}" rm -f $OUT_DIR/sync_diff.log export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1') -echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound +echo $first_chunk_info | awk -F '=' '{print $1}' >$OUT_DIR/first_chunk_bound cat $OUT_DIR/first_chunk_bound -echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index +echo $first_chunk_info | awk -F '=' '{print $3}' >$OUT_DIR/first_chunk_index cat $OUT_DIR/first_chunk_index # Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before. check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound check_contains_regex ".:${bucket_index_left}-${bucket_index_right}:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index - -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_rand.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_rand.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml echo "================test random checkpoint=================" echo "--------------1. chunk is in the middle----------------" @@ -107,9 +104,9 @@ mkdir -p $OUT_DIR export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/ignore-last-n-chunk-in-bucket=return(1);\ github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return();\ main/wait-for-checkpoint=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log -# Save the last chunk's info, +# Save the last chunk's info, # to which we will check whether the first chunk's info is next in the next running. last_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'upperBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'END {print}') echo "$last_chunk_info" # e.g. 537 indexCode=0:0-0:3:4 @@ -121,35 +118,33 @@ OLD_IFS="$IFS" IFS=":" last_chunk_index_array=($last_chunk_index) IFS="$OLD_IFS" -for s in ${last_chunk_index_array[@]} -do -echo "$s" +for s in ${last_chunk_index_array[@]}; do + echo "$s" done # chunkIndex should be the last Index [[ $((${last_chunk_index_array[2]} + 2)) -eq ${last_chunk_index_array[3]} ]] || exit 1 rm -f $OUT_DIR/sync_diff.log export GO_FAILPOINTS="github.com/pingcap/tidb-tools/sync_diff_inspector/splitter/print-chunk-info=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output first_chunk_info=$(grep 'print-chunk-info' $OUT_DIR/sync_diff.log | awk -F 'lowerBounds=' '{print $2}' | sed 's/[]["]//g' | sort -n | awk 'NR==1') -echo $first_chunk_info | awk -F '=' '{print $1}' > $OUT_DIR/first_chunk_bound +echo $first_chunk_info | awk -F '=' '{print $1}' >$OUT_DIR/first_chunk_bound cat $OUT_DIR/first_chunk_bound -echo $first_chunk_info | awk -F '=' '{print $3}' > $OUT_DIR/first_chunk_index +echo $first_chunk_info | awk -F '=' '{print $3}' >$OUT_DIR/first_chunk_index cat $OUT_DIR/first_chunk_index # Notice: when chunk is created paralleling, the least chunk may not appear in the first line. so we sort it as before. check_contains "${last_chunk_bound}" $OUT_DIR/first_chunk_bound check_contains_regex ".:0-0:$((${last_chunk_index_array[2]} + 1)):${last_chunk_index_array[3]}" $OUT_DIR/first_chunk_index - -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_continous.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_continous.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml echo "================test checkpoint continous=================" # add a table have different table-structs of upstream and downstream # so data-check will be skipped mysql -uroot -h 127.0.0.1 -P 4000 -e "create table IF NOT EXISTS diff_test.ttt(a int, aa int, primary key(a), key(aa));" mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table IF NOT EXISTS diff_test.ttt(a int, b int, primary key(a), key(b));" export GO_FAILPOINTS="main/wait-for-checkpoint=return()" -sync_diff_inspector --config=./config.toml > $OUT_DIR/checkpoint_diff.output || true -grep 'save checkpoint' $OUT_DIR/sync_diff.log | awk 'END {print}' > $OUT_DIR/checkpoint_info +sync_diff_inspector --config=./config.toml >$OUT_DIR/checkpoint_diff.output || true +grep 'save checkpoint' $OUT_DIR/sync_diff.log | awk 'END {print}' >$OUT_DIR/checkpoint_info check_not_contains 'has-upper\":true' $OUT_DIR/checkpoint_info -export GO_FAILPOINTS="" \ No newline at end of file +export GO_FAILPOINTS="" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh index 612fc24cbe8..77336b7461a 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/expression/run.sh @@ -11,13 +11,13 @@ mkdir -p $OUT_DIR mkdir -p $FIX_DIR for port in 4000 4001; do - mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists expression_test;" - mysql -uroot -h 127.0.0.1 -P $port -e "create table expression_test.diff(\`a\`\`;sad\` int, id int);" - mysql -uroot -h 127.0.0.1 -P $port -e "alter table expression_test.diff add index i1((\`a\`\`;sad\` + 1 + \`a\`\`;sad\`));" - mysql -uroot -h 127.0.0.1 -P $port -e "insert into expression_test.diff values (1,1),(2,2),(3,3);" + mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists expression_test;" + mysql -uroot -h 127.0.0.1 -P $port -e "create table expression_test.diff(\`a\`\`;sad\` int, id int);" + mysql -uroot -h 127.0.0.1 -P $port -e "alter table expression_test.diff add index i1((\`a\`\`;sad\` + 1 + \`a\`\`;sad\`));" + mysql -uroot -h 127.0.0.1 -P $port -e "insert into expression_test.diff values (1,1),(2,2),(3,3);" done echo "check result should be pass" -sync_diff_inspector --config=./config.toml > $OUT_DIR/expression_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/expression_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* diff --git a/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh index 30824a26fdf..16312c44a9a 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/json/run.sh @@ -8,34 +8,34 @@ OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output rm -rf $OUT_DIR mkdir -p $OUT_DIR -mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} <./data.sql # tidb -mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql +mysql -uroot -h 127.0.0.1 -P 4000 <./data.sql -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml -cat config.toml | sed 's/export-fix-sql = true/export-fix-sql = false/' > config_nofix.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml +cat config.toml | sed 's/export-fix-sql = true/export-fix-sql = false/' >config_nofix.toml diff config.toml config_nofix.toml || true echo "compare json tables, check result should be pass" -sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/json_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "compare json tables without fixsql, check result should be pass" -sync_diff_inspector --config=./config_nofix.toml > $OUT_DIR/json_diff.output +sync_diff_inspector --config=./config_nofix.toml >$OUT_DIR/json_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "update data to make it different, and data should not be equal" mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": true, \"name\":\"aaa\"}');" mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into json_test.test values (5, '{\"id\": 5, \"bool\": false, \"name\":\"aaa\"}');" -sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/json_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "update data to make it different, and downstream json data is NULL" mysql -uroot -h 127.0.0.1 -P 4000 -e "replace into json_test.test values (5, NULL);" -sync_diff_inspector --config=./config.toml > $OUT_DIR/json_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/json_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* diff --git a/sync_diff_inspector/tests/sync_diff_inspector/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/run.sh index 338f57baec0..cc82282f31b 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/run.sh @@ -10,7 +10,6 @@ check_db_status "${MYSQL_HOST}" "${MYSQL_PORT}" mysql "." BASE_DIR=/tmp/tidb_tools_test/sync_diff_inspector OUT_DIR=$BASE_DIR/output - mkdir -p $OUT_DIR || true echo "use importer to generate test data" @@ -29,7 +28,7 @@ mysql -h ${MYSQL_HOST} -P ${MYSQL_PORT} -u root -e "select * from diff_test.test echo "use sync_diff_inspector to compare data" # sync diff tidb-tidb -sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output +sync_diff_inspector --config=./config_base_tidb.toml >$OUT_DIR/diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log echo "analyze table, and will use tidb's statistical information to split chunks" @@ -40,28 +39,28 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "analyze table diff_test.test" mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where aa > 1" mysql -uroot -h 127.0.0.1 -P 4000 -e "explain select * from diff_test.test where \`table\` > 1" mysql -uroot -h 127.0.0.1 -P 4000 -e "show stats_buckets" -sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.output +sync_diff_inspector --config=./config_base_tidb.toml >$OUT_DIR/diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log check_not_contains "split range by random" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "test 'exclude-tables' config" mysql -uroot -h 127.0.0.1 -P 4000 -e "create table if not exists diff_test.should_not_compare (id int)" -sync_diff_inspector --config=./config_base_tidb.toml > $OUT_DIR/diff.log +sync_diff_inspector --config=./config_base_tidb.toml >$OUT_DIR/diff.log # doesn't contain the table's result in check report check_not_contains "[table=should_not_compare]" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* # sync diff tidb-mysql -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_mysql.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config_base_mysql_.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base_mysql.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config_base_mysql_.toml sync_diff_inspector --config=./config_base_mysql_.toml #> $OUT_DIR/diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* for script in ./*/run.sh; do - test_name="$(basename "$(dirname "$script")")" - echo "---------------------------------------" - echo "Running test $script..." - echo "---------------------------------------" - sh "$script" + test_name="$(basename "$(dirname "$script")")" + echo "---------------------------------------" + echo "Running test $script..." + echo "---------------------------------------" + sh "$script" done diff --git a/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh index 09fdbfa041a..729059c5cc7 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/shard/run.sh @@ -22,16 +22,16 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "create database if not exists shard_test;" mysql -uroot -h 127.0.0.1 -P 4000 -e "create table shard_test.test(\`table\` int, aa int, b varchar(10), c float, d datetime, primary key(\`table\`));" mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into shard_test.test (\`table\`, aa, b, c, d) SELECT \`table\`, aa, b, c, d FROM diff_test.test;" -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml echo "compare sharding tables with one table in downstream, check result should be pass" -sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/shard_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "update data in one shard table, and data should not be equal" mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "update shard_test.test1 set b = 'abc' limit 1" -sync_diff_inspector --config=./config.toml > $OUT_DIR/shard_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/shard_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* @@ -66,8 +66,8 @@ mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table Router_test_1.Tb mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into Router_test_1.Tbl values (1,\"hello1\",1);" echo "test router 1: normal rule" -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_1.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml -sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_1.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml +sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log @@ -79,8 +79,8 @@ check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_di rm -rf $OUT_DIR/* echo "test router 2: only schema rule" -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_2.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml -sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_2.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml +sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log @@ -93,8 +93,8 @@ check_not_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_di rm -rf $OUT_DIR/* echo "test router 3: other rule" -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_3.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml -sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_3.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml +sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log @@ -106,8 +106,8 @@ check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.l rm -rf $OUT_DIR/* echo "test router 4: no rule" -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_4.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml -sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_4.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml +sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_not_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_not_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log @@ -119,8 +119,8 @@ check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.l rm -rf $OUT_DIR/* echo "test router 5: regex rule" -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_5.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml -sync_diff_inspector --config=./config.toml -L debug > $OUT_DIR/shard_diff.output || true +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_router_5.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml +sync_diff_inspector --config=./config.toml -L debug >$OUT_DIR/shard_diff.output || true check_contains "as CHECKSUM FROM \`router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_contains "as CHECKSUM FROM \`Router_test_0\`.\`tbl\`" $OUT_DIR/sync_diff.log check_contains "as CHECKSUM FROM \`router_test_0\`.\`Tbl\`" $OUT_DIR/sync_diff.log @@ -131,4 +131,4 @@ check_contains "as CHECKSUM FROM \`router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.l check_contains "as CHECKSUM FROM \`Router_test_1\`.\`Tbl\`" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* -echo "shard test passed" \ No newline at end of file +echo "shard test passed" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh index cbd9e765968..c34c78b81ec 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/snapshot/run.sh @@ -1,4 +1,3 @@ - #!/bin/sh set -e @@ -12,15 +11,15 @@ rm -rf $FIX_DIR mkdir -p $OUT_DIR mkdir -p $FIX_DIR -mysql -uroot -h 127.0.0.1 -P 4000 -e "show master status" > $OUT_DIR/ts.log +mysql -uroot -h 127.0.0.1 -P 4000 -e "show master status" >$OUT_DIR/ts.log #cat $OUT_DIR/sync_diff.log -ts=`grep -oE "[0-9]+" $OUT_DIR/ts.log` +ts=$(grep -oE "[0-9]+" $OUT_DIR/ts.log) echo "get ts $ts" echo "delete one data, diff should not passed" mysql -uroot -h 127.0.0.1 -P 4000 -e "delete from diff_test.test limit 1" -sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log || true +sync_diff_inspector --config=./config_base.toml >$OUT_DIR/snapshot_diff.log || true check_contains "check failed" $OUT_DIR/sync_diff.log # move the fix sql file to $FIX_DIR mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/ @@ -31,7 +30,7 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ANSI_QUOTES';" sleep 10 mysql -uroot -h 127.0.0.1 -P 4000 -e "show variables like '%sql_mode%'" mysql -uroot -h 127.0.0.1 -P 4000 -e "show create table diff_test.test" -sed "s/#snapshot#/snapshot = \"${ts}\"/g" config_base.toml > config.toml +sed "s/#snapshot#/snapshot = \"${ts}\"/g" config_base.toml >config.toml echo "use snapshot compare data, data should be equal" sync_diff_inspector --config=./config.toml #> $OUT_DIR/snapshot_diff.log check_contains "check pass!!!" $OUT_DIR/sync_diff.log @@ -39,11 +38,11 @@ rm -rf $OUT_DIR/* echo "execute fix.sql and use base config, and then compare data, data should be equal" cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000 -sync_diff_inspector --config=./config_base.toml > $OUT_DIR/snapshot_diff.log +sync_diff_inspector --config=./config_base.toml >$OUT_DIR/snapshot_diff.log check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* # reset sql mode mysql -uroot -h 127.0.0.1 -P 4000 -e "SET GLOBAL sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';" -echo "snapshot test passed" \ No newline at end of file +echo "snapshot test passed" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh index ad541dacb6c..fa5b97a702d 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_config/run.sh @@ -1,4 +1,3 @@ - #!/bin/sh set -e @@ -15,27 +14,27 @@ mkdir -p $FIX_DIR echo "update data in column b (WHERE \`table\` >= 10 AND \`table\` <= 200), data should not be equal" mysql -uroot -h 127.0.0.1 -P 4000 -e "update diff_test.test set b = 'abc' where \`table\` >= 10 AND \`table\` <= 200" -sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/ignore_column_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log # move the fix sql file to $FIX_DIR mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/ rm -rf $OUT_DIR/* echo "ignore check column b, check result should be pass" -sed 's/\[""\]#IGNORE/["b"]/g' config.toml > config_.toml -sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true +sed 's/\[""\]#IGNORE/["b"]/g' config.toml >config_.toml +sync_diff_inspector --config=./config_.toml >$OUT_DIR/ignore_column_diff.output || true check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "set range a < 10 OR a > 200, check result should be pass" -sed 's/"TRUE"#RANGE"a < 10 OR a > 200"/"`table` < 10 OR `table` > 200"/g' config.toml > config_.toml -sync_diff_inspector --config=./config_.toml > $OUT_DIR/ignore_column_diff.output || true +sed 's/"TRUE"#RANGE"a < 10 OR a > 200"/"`table` < 10 OR `table` > 200"/g' config.toml >config_.toml +sync_diff_inspector --config=./config_.toml >$OUT_DIR/ignore_column_diff.output || true check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "execute fix.sql and use base config, and then compare data, data should be equal" cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000 -sync_diff_inspector --config=./config.toml > $OUT_DIR/ignore_column_diff.log || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/ignore_column_diff.log || true check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* diff --git a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh index 441f7045806..4ee4e3b98b1 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/table_skip/run.sh @@ -8,15 +8,15 @@ OUT_DIR=/tmp/tidb_tools_test/sync_diff_inspector/output rm -rf $OUT_DIR mkdir -p $OUT_DIR -mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} < ./data.sql +mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} <./data.sql # tidb -mysql -uroot -h 127.0.0.1 -P 4000 < ./data.sql +mysql -uroot -h 127.0.0.1 -P 4000 <./data.sql -sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" > ./config.toml +sed "s/\"127.0.0.1\"#MYSQL_HOST/\"${MYSQL_HOST}\"/g" ./config_base.toml | sed "s/3306#MYSQL_PORT/${MYSQL_PORT}/g" >./config.toml echo "compare tables, check result should be pass" -sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* @@ -25,7 +25,7 @@ mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t2 (a mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t2 values (3,3);" mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t3 (a int, b int, primary key(a));" mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t3 values (1,1);" -sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true check_contains "check pass" $OUT_DIR/sync_diff.log check_contains "Comparing the table data of \`skip_test\`.\`t2\` ...skipped" $OUT_DIR/table_skip_diff.output check_contains "Comparing the table data of \`skip_test\`.\`t3\` ...skipped" $OUT_DIR/table_skip_diff.output @@ -38,7 +38,7 @@ rm -rf $OUT_DIR/* echo "make some table data not equal" mysql -uroot -h 127.0.0.1 -P 4000 -e "insert into skip_test.t1 values (4,4);" -sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log check_contains "| \`skip_test\`.\`t1\` | succeed | true | +0/-1 | 1 | 2 |" $OUT_DIR/summary.txt rm -rf $OUT_DIR/* @@ -47,7 +47,7 @@ echo "make some table structure not equal" mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "create table skip_test.t4 (a int, b int, c int,primary key(a));" mysql -uroot -h ${MYSQL_HOST} -P ${MYSQL_PORT} -e "insert into skip_test.t4 values (1,1,1);" mysql -uroot -h 127.0.0.1 -P 4000 -e "create table skip_test.t4 (a int, b int, primary key(a));" -sync_diff_inspector --config=./config.toml > $OUT_DIR/table_skip_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/table_skip_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log check_contains "| \`skip_test\`.\`t4\` | succeed | false | +0/-0 | 0 | 0 |" $OUT_DIR/summary.txt check_contains "A total of 5 tables have been compared, 1 tables finished, 2 tables failed, 2 tables skipped" $OUT_DIR/table_skip_diff.output @@ -55,7 +55,7 @@ cat $OUT_DIR/summary.txt rm -rf $OUT_DIR/* echo "test router case" -sync_diff_inspector --config=./config_router.toml > $OUT_DIR/table_skip_diff.output || true +sync_diff_inspector --config=./config_router.toml >$OUT_DIR/table_skip_diff.output || true check_contains "check pass" $OUT_DIR/sync_diff.log check_contains "| \`skip_test\`.\`t5\` | skipped | false | +3/-0 | 3 | 0 |" $OUT_DIR/summary.txt check_contains "The data of \`skip_test\`.\`t5\` does not exist in downstream database" $OUT_DIR/table_skip_diff.output diff --git a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh index b96e0d895d8..05b015ec8f4 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/time_zone/run.sh @@ -14,44 +14,44 @@ mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@GLOBAL.SQL_MODE='ONLY_FULL_GROUP_BY, sleep 3 for port in 4000 4001; do - mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists tz_test" - mysql -uroot -h 127.0.0.1 -P $port -e "create table tz_test.diff(id int, dt datetime, ts timestamp);" - mysql -uroot -h 127.0.0.1 -P $port -e "insert into tz_test.diff values (1, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" - mysql -uroot -h 127.0.0.1 -P $port -e "set @@session.time_zone = \"-07:00\"; insert into tz_test.diff values (2, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" + mysql -uroot -h 127.0.0.1 -P $port -e "create database if not exists tz_test" + mysql -uroot -h 127.0.0.1 -P $port -e "create table tz_test.diff(id int, dt datetime, ts timestamp);" + mysql -uroot -h 127.0.0.1 -P $port -e "insert into tz_test.diff values (1, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" + mysql -uroot -h 127.0.0.1 -P $port -e "set @@session.time_zone = \"-07:00\"; insert into tz_test.diff values (2, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" done echo "check with the same time_zone, check result should be pass" -sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* # check upstream and downstream time_zone -mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = '+08:00'"; -mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = '+00:00'"; +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = '+08:00'" +mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = '+00:00'" sleep 5 echo "check with different time_zone, check result should be pass again" -sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* echo "set different rows, check result should be failed" mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@session.time_zone = '-06:00'; insert into tz_test.diff values (4, '2020-05-17 09:12:13', '2020-05-17 09:12:13');" mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-05:00'; insert into tz_test.diff values (3, '2020-05-17 10:12:13', '2020-05-17 10:12:13');" -sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output || true +sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output || true check_contains "check failed" $OUT_DIR/sync_diff.log mv $OUT_DIR/fix-on-tidb/ $FIX_DIR/ rm -rf $OUT_DIR/* echo "fix the rows, check result should be pass" cat $FIX_DIR/fix-on-tidb/*.sql | mysql -uroot -h127.0.0.1 -P 4000 -sync_diff_inspector --config=./config.toml > $OUT_DIR/time_zone_diff.output +sync_diff_inspector --config=./config.toml >$OUT_DIR/time_zone_diff.output check_contains "check pass!!!" $OUT_DIR/sync_diff.log rm -rf $OUT_DIR/* -mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-06:00'; select ts from tz_test.diff where id = 4 or id = 3;" > $OUT_DIR/tmp_sql_timezone +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@session.time_zone = '-06:00'; select ts from tz_test.diff where id = 4 or id = 3;" >$OUT_DIR/tmp_sql_timezone check_contains "2020-05-17 09:12:13" $OUT_DIR/tmp_sql_timezone check_not_contains "2020-05-17 10:12:13" $OUT_DIR/tmp_sql_timezone # reset time_zone -mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = 'SYSTEM'"; -mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = 'SYSTEM'"; +mysql -uroot -h 127.0.0.1 -P 4000 -e "SET @@global.time_zone = 'SYSTEM'" +mysql -uroot -h 127.0.0.1 -P 4001 -e "SET @@global.time_zone = 'SYSTEM'" diff --git a/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh index 03ed6cbf3e1..cfec9d28126 100644 --- a/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh +++ b/sync_diff_inspector/tests/sync_diff_inspector/tls/run.sh @@ -4,7 +4,7 @@ set -ex cd "$(dirname "$0")" -CONF_PATH=`cd ../../conf && pwd` +CONF_PATH=$(cd ../../conf && pwd) CA_PATH="$CONF_PATH/root.crt" CERT_PATH="$CONF_PATH/client.crt" KEY_PATH="$CONF_PATH/client.key" @@ -15,13 +15,13 @@ mkdir -p $OUT_DIR # create user for test tls mysql -uroot -h 127.0.0.1 -P 4000 -e "create user 'root_tls'@'%' identified by '' require X509;" mysql -uroot -h 127.0.0.1 -P 4000 -e "grant all privileges on *.* to 'root_tls'@'%';" -mysql -uroot_tls -h 127.0.0.1 -P 4000 --ssl-ca "$CA_PATH" --ssl-cert "$CERT_PATH" --ssl-key "$KEY_PATH" -e "SHOW STATUS LIKE \"%Ssl%\";" +mysql -uroot_tls -h 127.0.0.1 -P 4000 --ssl-ca "$CA_PATH" --ssl-cert "$CERT_PATH" --ssl-key "$KEY_PATH" -e "SHOW STATUS LIKE \"%Ssl%\";" echo "use sync_diff_inspector to compare data" # sync diff tidb-tidb CA_PATH_REG=$(echo ${CA_PATH} | sed 's/\//\\\//g') CERT_PATH_REG=$(echo ${CERT_PATH} | sed 's/\//\\\//g') KEY_PATH_REG=$(echo ${KEY_PATH} | sed 's/\//\\\//g') -sed "s/\"ca-path\"#CAPATH/\"${CA_PATH_REG}\"/g" config.toml | sed "s/\"cert-path\"#CERTPATH/\"${CERT_PATH_REG}\"/g" | sed "s/\"key-path\"#KEYPATH/\"${KEY_PATH_REG}\"/g" > config_.toml -sync_diff_inspector --config=./config_.toml > $OUT_DIR/diff.output || (cat $OUT_DIR/diff.output && exit 1) +sed "s/\"ca-path\"#CAPATH/\"${CA_PATH_REG}\"/g" config.toml | sed "s/\"cert-path\"#CERTPATH/\"${CERT_PATH_REG}\"/g" | sed "s/\"key-path\"#KEYPATH/\"${KEY_PATH_REG}\"/g" >config_.toml +sync_diff_inspector --config=./config_.toml >$OUT_DIR/diff.output || (cat $OUT_DIR/diff.output && exit 1) check_contains "check pass!!!" $OUT_DIR/sync_diff.log From ed2a5dc4c57dd58c5fe2e3c3baab84e478976dac Mon Sep 17 00:00:00 2001 From: Ruihao Chen Date: Tue, 7 Jan 2025 10:48:00 +0800 Subject: [PATCH 22/22] fix lint --- sync_diff_inspector/diff/diff.go | 2 +- sync_diff_inspector/utils/pd.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sync_diff_inspector/diff/diff.go b/sync_diff_inspector/diff/diff.go index 3feeb9e9de1..2630897351f 100644 --- a/sync_diff_inspector/diff/diff.go +++ b/sync_diff_inspector/diff/diff.go @@ -50,7 +50,7 @@ func fileExists(name string) bool { return !os.IsNotExist(err) } -// GetSnapsnot get the snapshot +// GetSnapshot get the snapshot func GetSnapshot(latestSnap []string, snap string, db *sql.DB) string { if len(latestSnap) != 1 { return snap diff --git a/sync_diff_inspector/utils/pd.go b/sync_diff_inspector/utils/pd.go index af7947f1baf..c3a4f3cb845 100644 --- a/sync_diff_inspector/utils/pd.go +++ b/sync_diff_inspector/utils/pd.go @@ -250,6 +250,7 @@ func updateServiceSafePoint(ctx context.Context, pdClient pd.Client, snapshotTS } } +// ParseSnapshotToTSO parse snapshot string to TSO func ParseSnapshotToTSO(pool *sql.DB, snapshot string) (uint64, error) { snapshotTS, err := strconv.ParseUint(snapshot, 10, 64) if err == nil {