From 357b67dc5a017ec23fdd69834cc28831e3278481 Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Tue, 24 May 2022 16:52:37 +0800 Subject: [PATCH 01/12] 1. add perf mode in force-quote 2. optimize Insert Sql building: avoid mem copy 3. fix last char parsing in splitWithQuoteEscape --- batch-tool/src/main/java/cmd/CommandUtil.java | 21 ++++++++++ .../src/main/java/exec/BaseExecutor.java | 6 +-- .../src/main/java/exec/DeleteExecutor.java | 7 ---- .../src/main/java/exec/ImportExecutor.java | 11 ++--- .../src/main/java/exec/UpdateExecutor.java | 11 +---- .../src/main/java/exec/WriteDbExecutor.java | 17 ++++++-- .../java/exec/export/BaseExportExecutor.java | 8 +--- .../exec/export/OrderByExportExecutor.java | 12 +----- .../export/SingleThreadExportExecutor.java | 6 --- .../java/model/ConsumerExecutionContext.java | 30 ++++++++++---- .../java/model/config/ConfigConstant.java | 1 + .../src/main/java/model/config/GlobalVar.java | 2 + batch-tool/src/main/java/util/FileUtil.java | 14 +++++-- .../worker/common/BaseDefaultConsumer.java | 6 ++- .../java/worker/common/BaseWorkHandler.java | 7 ++++ .../java/worker/insert/ImportConsumer.java | 14 ++++++- .../insert/ProcessOnlyImportConsumer.java | 8 +++- .../src/main/java/worker/util/ImportUtil.java | 41 +++++++++++++++---- 18 files changed, 145 insertions(+), 77 deletions(-) diff --git a/batch-tool/src/main/java/cmd/CommandUtil.java b/batch-tool/src/main/java/cmd/CommandUtil.java index 2fa2c08..4243f40 100644 --- a/batch-tool/src/main/java/cmd/CommandUtil.java +++ b/batch-tool/src/main/java/cmd/CommandUtil.java @@ -416,6 +416,7 @@ private static void configureCommonContext(CommandLine result, private static void configureGlobalVar(CommandLine result) { setBatchSize(result); setRingBufferSize(result); + setPerfMode(result); } /** @@ -457,6 +458,17 @@ private static void configureConsumerContext(CommandLine result, consumerExecutionContext.setWhereInEnabled(getWhereInEnabled(result)); consumerExecutionContext.setWithLastSep(getWithLastSep(result)); consumerExecutionContext.setTpsLimit(getTpsLimit(result)); + consumerExecutionContext.setUseColumns(getUseColumns(result)); + + consumerExecutionContext.validate(); + } + + private static String getUseColumns(CommandLine result) { + List columnNames = getColumnNames(result); + if (columnNames == null) { + return null; + } + return StringUtils.join(columnNames, ","); } private static boolean getWhereInEnabled(CommandLine result) { @@ -610,6 +622,10 @@ private static void setBatchSize(CommandLine result) { result.getOptionValue(ARG_SHORT_BATCH_SIZE)); } } + + private static void setPerfMode(CommandLine result) { + GlobalVar.IN_PERF_MODE = result.hasOption(ARG_SHORT_PERF_MODE); + } //endregion 全局相关设置 //region 命令行参数校验与帮助 @@ -808,6 +824,11 @@ private static void addBatchOperationOptions(Options options) { .hasArg() .desc("Max error count threshold.") .build()); + // 性能模式 + options.addOption(Option.builder(ARG_SHORT_PERF_MODE) + .longOpt("perf") + .desc("perf mode") + .build()); } private static void addConnectDbOptions(Options options) { diff --git a/batch-tool/src/main/java/exec/BaseExecutor.java b/batch-tool/src/main/java/exec/BaseExecutor.java index 28885e4..49eb7c1 100644 --- a/batch-tool/src/main/java/exec/BaseExecutor.java +++ b/batch-tool/src/main/java/exec/BaseExecutor.java @@ -36,7 +36,6 @@ import model.config.ExportConfig; import model.config.FileLineRecord; import model.config.GlobalVar; -import model.config.QuoteEncloseMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import util.DbUtil; @@ -66,13 +65,14 @@ public abstract class BaseExecutor { private final DataSourceConfig dataSourceConfig; protected final DataSource dataSource; + protected final BaseOperateCommand command; public BaseExecutor(DataSourceConfig dataSourceConfig, DataSource dataSource, BaseOperateCommand baseCommand) { this.dataSourceConfig = dataSourceConfig; this.dataSource = dataSource; - setCommand(baseCommand); + this.command = baseCommand; } public void preCheck() { @@ -92,8 +92,6 @@ protected void checkTableExists(List tableNames) { } } - protected abstract void setCommand(BaseOperateCommand baseCommand); - public abstract void execute(); public static BaseExecutor getExecutor(BaseOperateCommand command, DataSourceConfig dataSourceConfig, diff --git a/batch-tool/src/main/java/exec/DeleteExecutor.java b/batch-tool/src/main/java/exec/DeleteExecutor.java index 6f2a3c8..fcda2c1 100644 --- a/batch-tool/src/main/java/exec/DeleteExecutor.java +++ b/batch-tool/src/main/java/exec/DeleteExecutor.java @@ -29,19 +29,12 @@ public class DeleteExecutor extends WriteDbExecutor { private static final Logger logger = LoggerFactory.getLogger(DeleteExecutor.class); - private DeleteCommand command; - public DeleteExecutor(DataSourceConfig dataSourceConfig, DruidDataSource druid, BaseOperateCommand baseCommand) { super(dataSourceConfig, druid, baseCommand); } - @Override - protected void setCommand(BaseOperateCommand baseCommand) { - this.command = (DeleteCommand) baseCommand; - } - @Override public void execute() { configurePkList(); diff --git a/batch-tool/src/main/java/exec/ImportExecutor.java b/batch-tool/src/main/java/exec/ImportExecutor.java index f34ed97..257d4d4 100644 --- a/batch-tool/src/main/java/exec/ImportExecutor.java +++ b/batch-tool/src/main/java/exec/ImportExecutor.java @@ -23,6 +23,7 @@ import exception.DatabaseException; import model.config.ConfigConstant; import model.config.DdlMode; +import model.config.GlobalVar; import model.config.QuoteEncloseMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,19 +41,12 @@ public class ImportExecutor extends WriteDbExecutor { private static final Logger logger = LoggerFactory.getLogger(ImportExecutor.class); - private ImportCommand command; - public ImportExecutor(DataSourceConfig dataSourceConfig, DruidDataSource druid, BaseOperateCommand baseCommand) { super(dataSourceConfig, druid, baseCommand); } - @Override - protected void setCommand(BaseOperateCommand baseCommand) { - this.command = (ImportCommand) baseCommand; - } - @Override public void preCheck() { if (producerExecutionContext.getDdlMode() != DdlMode.NO_DDL) { @@ -198,6 +192,9 @@ private void doShardingImport(String tableName) { } private boolean useBlockReader() { + if (GlobalVar.IN_PERF_MODE) { + return true; + } if (producerExecutionContext.getQuoteEncloseMode() == QuoteEncloseMode.FORCE) { return false; } diff --git a/batch-tool/src/main/java/exec/UpdateExecutor.java b/batch-tool/src/main/java/exec/UpdateExecutor.java index 502c600..557d061 100644 --- a/batch-tool/src/main/java/exec/UpdateExecutor.java +++ b/batch-tool/src/main/java/exec/UpdateExecutor.java @@ -34,31 +34,24 @@ public class UpdateExecutor extends WriteDbExecutor { private static final Logger logger = LoggerFactory.getLogger(UpdateExecutor.class); - private UpdateCommand command; - public UpdateExecutor(DataSourceConfig dataSourceConfig, DruidDataSource druid, BaseOperateCommand baseCommand) { super(dataSourceConfig, druid, baseCommand); } - @Override - protected void setCommand(BaseOperateCommand baseCommand) { - this.command = (UpdateCommand) baseCommand; - } - @Override public void execute() { configureFieldMetaInfo(); configurePkList(); - if (command.getConsumerExecutionContext().isFuncSqlForUpdateEnabled()) { + if (consumerExecutionContext.isFuncSqlForUpdateEnabled()) { // 启用函数则优先 doUpdateWithFunc(); logger.info("更新 {} 数据完成", tableNames); return; } - if (!StringUtils.isEmpty(command.getConsumerExecutionContext().getWhereCondition())) { + if (!StringUtils.isEmpty(consumerExecutionContext.getWhereCondition())) { // 有where子句用默认方法 doDefaultUpdate(UpdateConsumer.class); logger.info("更新 {} 数据完成", tableNames); diff --git a/batch-tool/src/main/java/exec/WriteDbExecutor.java b/batch-tool/src/main/java/exec/WriteDbExecutor.java index be49285..45f46ee 100644 --- a/batch-tool/src/main/java/exec/WriteDbExecutor.java +++ b/batch-tool/src/main/java/exec/WriteDbExecutor.java @@ -85,15 +85,24 @@ protected void configurePkList() { */ protected void configureFieldMetaInfo() { logger.info("正在获取所有表的元信息..."); - Map tableFieldMetaInfo = null; + Map tableFieldMetaInfoMap = null; try { - tableFieldMetaInfo = DbUtil.getDbFieldMetaInfo(dataSource.getConnection(), - getSchemaName(), tableNames); + if (command.getColumnNames() != null) { + assert tableNames.size() == 1; + tableFieldMetaInfoMap = new HashMap<>(); + TableFieldMetaInfo fieldMetaInfo = DbUtil.getTableFieldMetaInfo(dataSource.getConnection(), getSchemaName(), + tableNames.get(0), command.getColumnNames()); + tableFieldMetaInfoMap.put(tableNames.get(0), fieldMetaInfo); + } else { + tableFieldMetaInfoMap = DbUtil.getDbFieldMetaInfo(dataSource.getConnection(), + getSchemaName(), tableNames); + } } catch (DatabaseException | SQLException e) { logger.error(e.getMessage()); throw new RuntimeException(e); } - consumerExecutionContext.setTableFieldMetaInfo(tableFieldMetaInfo); + + consumerExecutionContext.setTableFieldMetaInfo(tableFieldMetaInfoMap); logger.info("所有表的元信息获取完毕"); } diff --git a/batch-tool/src/main/java/exec/export/BaseExportExecutor.java b/batch-tool/src/main/java/exec/export/BaseExportExecutor.java index d5d6375..cacab5a 100644 --- a/batch-tool/src/main/java/exec/export/BaseExportExecutor.java +++ b/batch-tool/src/main/java/exec/export/BaseExportExecutor.java @@ -26,18 +26,12 @@ public abstract class BaseExportExecutor extends BaseExecutor { - protected ExportCommand command; protected ExportConfig config; public BaseExportExecutor(DataSourceConfig dataSourceConfig, DruidDataSource druid, BaseOperateCommand baseCommand) { super(dataSourceConfig, druid, baseCommand); - } - - @Override - protected void setCommand(BaseOperateCommand baseCommand) { - this.command = (ExportCommand) baseCommand; - this.config = command.getExportConfig(); + this.config = ((ExportCommand) command).getExportConfig(); } /** diff --git a/batch-tool/src/main/java/exec/export/OrderByExportExecutor.java b/batch-tool/src/main/java/exec/export/OrderByExportExecutor.java index 7a7587c..549bcc2 100644 --- a/batch-tool/src/main/java/exec/export/OrderByExportExecutor.java +++ b/batch-tool/src/main/java/exec/export/OrderByExportExecutor.java @@ -54,19 +54,13 @@ public class OrderByExportExecutor extends BaseExportExecutor { private static final Logger logger = LoggerFactory.getLogger(OrderByExportExecutor.class); - private ExportCommand command; private ExportConfig config; public OrderByExportExecutor(DataSourceConfig dataSourceConfig, DruidDataSource druid, BaseOperateCommand baseCommand) { super(dataSourceConfig, druid, baseCommand); - } - - @Override - protected void setCommand(BaseOperateCommand baseCommand) { - this.command = (ExportCommand) baseCommand; - this.config = command.getExportConfig(); + this.config = ((ExportCommand) command).getExportConfig(); } @Override @@ -98,7 +92,6 @@ private void handleExportOrderBy() { */ private void doExportWithOrderByLocal() { List topologyList; - ExportConfig config = command.getExportConfig(); List orderByColumnInfoList; for (String tableName : command.getTableNames()) { String filePathPrefix = FileUtil.getFilePathPrefix(config.getPath(), @@ -171,7 +164,7 @@ private void handleExportWithOrderByFromDb() { TableFieldMetaInfo tableFieldMetaInfo = DbUtil.getTableFieldMetaInfo(dataSource.getConnection(), getSchemaName(), tableName); DirectOrderExportWorker directOrderByExportWorker = ExportWorkerFactory - .buildDirectOrderExportWorker(dataSource, tableFieldMetaInfo, command, tableName); + .buildDirectOrderExportWorker(dataSource, tableFieldMetaInfo, (ExportCommand) command, tableName); // 就单线程地写入 directOrderByExportWorker.exportSerially(); logger.info("导出 {} 数据完成", tableName); @@ -188,7 +181,6 @@ private void handleExportWithOrderByFromDb() { private void handleExportWithOrderByParallelMerge() { for (String tableName : command.getTableNames()) { List topologyList; - ExportConfig config = command.getExportConfig(); List orderByColumnInfoList; try { String filePathPrefix = FileUtil.getFilePathPrefix(config.getPath(), diff --git a/batch-tool/src/main/java/exec/export/SingleThreadExportExecutor.java b/batch-tool/src/main/java/exec/export/SingleThreadExportExecutor.java index f1227d0..e6ed9b5 100644 --- a/batch-tool/src/main/java/exec/export/SingleThreadExportExecutor.java +++ b/batch-tool/src/main/java/exec/export/SingleThreadExportExecutor.java @@ -48,12 +48,6 @@ public SingleThreadExportExecutor(DataSourceConfig dataSourceConfig, super(dataSourceConfig, druid, baseCommand); } - @Override - protected void setCommand(BaseOperateCommand baseCommand) { - this.command = (ExportCommand) baseCommand; - this.config = command.getExportConfig(); - } - @Override void exportData() { doDefaultExport(); diff --git a/batch-tool/src/main/java/model/ConsumerExecutionContext.java b/batch-tool/src/main/java/model/ConsumerExecutionContext.java index b9d3289..813716d 100644 --- a/batch-tool/src/main/java/model/ConsumerExecutionContext.java +++ b/batch-tool/src/main/java/model/ConsumerExecutionContext.java @@ -137,6 +137,11 @@ public class ConsumerExecutionContext extends BaseConfig { private boolean useMagicSeparator = false; + /** + * 以逗号拼接的指定使用列 + */ + private String useColumns = null; + private volatile Exception exception; public ConsumerExecutionContext() { @@ -294,20 +299,15 @@ public void setBatchTpsLimitPerConsumer(double batchTpsLimitPerConsumer) { @Override public String toString() { return "ConsumerExecutionContext{" + - "tableName='" + tableNames + '\'' + + "tableNames=" + tableNames + + ", partitionKey=" + tablePartitionKey + ", pkList=" + tablePkList + - ", pkIndexSet=" + tablePkIndexSet + - ", tableFieldMetaInfo=" + tableFieldMetaInfo + ", insertIgnoreAndResumeEnabled=" + insertIgnoreAndResumeEnabled + - ", funcSqlForUpdateEnabled=" + funcSqlForUpdateEnabled + ", parallelism=" + parallelism + ", whereCondition='" + whereCondition + '\'' + ", toUpdateColumns='" + toUpdateColumns + '\'' + - ", topologyList=" + topologyList + - ", partitionKey=" + tablePartitionKey + ", updateWithFuncPattern='" + updateWithFuncPattern + '\'' + ", sqlEscapeEnabled=" + sqlEscapeEnabled + - ", readProcessFileOnly=" + readProcessFileOnly + '}'; } @@ -410,4 +410,20 @@ public Exception getException() { public void setException(Exception exception) { this.exception = exception; } + + public String getUseColumns() { + return useColumns; + } + + public void setUseColumns(String useColumns) { + this.useColumns = useColumns; + } + + @Override + public void validate() { + super.validate(); + if (tableNames.size() > 1 && useColumns != null) { + throw new UnsupportedOperationException("Do not support multi-table operation with specified columns"); + } + } } diff --git a/batch-tool/src/main/java/model/config/ConfigConstant.java b/batch-tool/src/main/java/model/config/ConfigConstant.java index d602704..c84621a 100644 --- a/batch-tool/src/main/java/model/config/ConfigConstant.java +++ b/batch-tool/src/main/java/model/config/ConfigConstant.java @@ -77,6 +77,7 @@ public class ConfigConstant { public static final String ARG_SHORT_KEY = "key"; public static final String ARG_SHORT_FILE_FORMAT = "format"; public static final String ARG_SHORT_MAX_ERROR = "error"; + public static final String ARG_SHORT_PERF_MODE = "perf"; public static final int CPU_NUM = Runtime.getRuntime().availableProcessors(); /** diff --git a/batch-tool/src/main/java/model/config/GlobalVar.java b/batch-tool/src/main/java/model/config/GlobalVar.java index 31b0d6e..c4269a5 100644 --- a/batch-tool/src/main/java/model/config/GlobalVar.java +++ b/batch-tool/src/main/java/model/config/GlobalVar.java @@ -33,4 +33,6 @@ public class GlobalVar { * 4K */ public static int DEFAULT_DIRECT_BUFFER_SIZE_PER_WORKER = 1024 * 4; + + public static boolean IN_PERF_MODE = false; } diff --git a/batch-tool/src/main/java/util/FileUtil.java b/batch-tool/src/main/java/util/FileUtil.java index 42efab8..6474d79 100644 --- a/batch-tool/src/main/java/util/FileUtil.java +++ b/batch-tool/src/main/java/util/FileUtil.java @@ -182,8 +182,10 @@ private static ArrayList splitWithQuoteEscape(String line, String sep, f for (int i = 0; i < len; i++) { if (i == len - 1) { // 最后一个字符 - if (chars[i] == '\"' && hasEscapedQuote) { - stringBuilder.append(chars[i]); + if (chars[i] == '\"') { + if (hasEscapedQuote) { + stringBuilder.append(chars[i]); + } subStrings.add(stringBuilder.toString()); stringBuilder.setLength(0); break; @@ -192,8 +194,12 @@ private static ArrayList splitWithQuoteEscape(String line, String sep, f if (!hasEscapedQuote && enclosingByQuote) { badFormatException("Unclosed quote", line); } else { - // 说明当前为最后一个字段 - stringBuilder.append(chars[i]); + if (sep.length() == 1 && chars[i] == sepStart) { + endsWithSep = true; + } else { + // 说明当前为最后一个字段 + stringBuilder.append(chars[i]); + } subStrings.add(stringBuilder.toString()); stringBuilder.setLength(0); } diff --git a/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java b/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java index a8daec7..49f50ec 100644 --- a/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java +++ b/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java @@ -47,7 +47,11 @@ public void onProxyEvent(BatchLineEvent event) { initLocalVars(); try { String[] lines = event.getBatchLines(); - StringBuilder stringBuilder = new StringBuilder(lines.length * 10); + int estimateLineSize = 10; + if (lines.length > 0 && lines[0] != null) { + estimateLineSize = Math.min(estimateLineSize, lines[0].length()); + } + StringBuilder stringBuilder = new StringBuilder(lines.length * estimateLineSize); for (String line : lines) { if (StringUtils.isEmpty(line)) { continue; diff --git a/batch-tool/src/main/java/worker/common/BaseWorkHandler.java b/batch-tool/src/main/java/worker/common/BaseWorkHandler.java index 2944787..9d6eb3f 100644 --- a/batch-tool/src/main/java/worker/common/BaseWorkHandler.java +++ b/batch-tool/src/main/java/worker/common/BaseWorkHandler.java @@ -18,8 +18,10 @@ import com.google.common.util.concurrent.RateLimiter; import com.lmax.disruptor.WorkHandler; +import jdk.nashorn.internal.objects.Global; import model.ConsumerExecutionContext; import model.config.ConfigConstant; +import model.config.GlobalVar; /** * 限流代理类 @@ -36,6 +38,11 @@ public abstract class BaseWorkHandler implements WorkHandler { protected String tableName; protected void initLocalVars() { + if (GlobalVar.IN_PERF_MODE) { + this.sep = consumerContext.getSeparator(); + hasEscapedQuote = true; + return; + } if (consumerContext.isUseMagicSeparator()) { this.sep = ConfigConstant.MAGIC_CSV_SEP; hasEscapedQuote = true; diff --git a/batch-tool/src/main/java/worker/insert/ImportConsumer.java b/batch-tool/src/main/java/worker/insert/ImportConsumer.java index bfdd765..8316ff6 100644 --- a/batch-tool/src/main/java/worker/insert/ImportConsumer.java +++ b/batch-tool/src/main/java/worker/insert/ImportConsumer.java @@ -31,11 +31,18 @@ public class ImportConsumer extends BaseDefaultConsumer { private static final Logger logger = LoggerFactory.getLogger(ImportConsumer.class); private List fieldMetaInfoList; + /** + * 仅指定了列名才会设置 + */ + private String columns = null; + private StringBuilder insertSqlBuilder; @Override protected void initLocalVars() { super.initLocalVars(); this.fieldMetaInfoList = consumerContext.getTableFieldMetaInfo(tableName).getFieldMetaInfoList(); + this.columns = consumerContext.getUseColumns(); + this.insertSqlBuilder = new StringBuilder(40 + fieldMetaInfoList.size() * 10); } @Override @@ -57,7 +64,10 @@ protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { protected String getSql(StringBuilder data) { // 去除最后一个逗号 data.setLength(data.length() - 1); - return ImportUtil.getBatchInsertSql(tableName, - data.toString(), consumerContext.isInsertIgnoreAndResumeEnabled()); + ImportUtil.getBatchInsertSql(insertSqlBuilder, tableName, columns, + data, consumerContext.isInsertIgnoreAndResumeEnabled()); + String sql = insertSqlBuilder.toString(); + insertSqlBuilder.setLength(0); + return sql; } } diff --git a/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java b/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java index fa8c198..87dd8c2 100644 --- a/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java +++ b/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java @@ -45,9 +45,15 @@ public class ProcessOnlyImportConsumer extends BaseWorkHandler { public void onProxyEvent(BatchLineEvent event) { try { String[] lines = event.getBatchLines(); - StringBuilder stringBuilder = new StringBuilder(); + List fieldMetaInfoList = consumerContext.getTableFieldMetaInfo(tableName) .getFieldMetaInfoList(); + int estimateLineSize = 10; + if (lines.length > 0 && lines[0] != null) { + estimateLineSize = Math.min(estimateLineSize, lines[0].length()); + } + StringBuilder stringBuilder = new StringBuilder(lines.length * estimateLineSize); + for (String line : lines) { if (StringUtils.isEmpty(line)) { continue; diff --git a/batch-tool/src/main/java/worker/util/ImportUtil.java b/batch-tool/src/main/java/worker/util/ImportUtil.java index db66831..c2ff8df 100644 --- a/batch-tool/src/main/java/worker/util/ImportUtil.java +++ b/batch-tool/src/main/java/worker/util/ImportUtil.java @@ -17,6 +17,7 @@ package worker.util; import exception.DatabaseException; +import model.config.GlobalVar; import model.db.FieldMetaInfo; import org.apache.commons.lang.StringUtils; import util.FileUtil; @@ -33,6 +34,12 @@ public class ImportUtil { private static final String BATCH_INSERT_IGNORE_SQL_PATTERN = "INSERT IGNORE INTO `%s` VALUES %s;"; + private static final String BATCH_INSERT_WITH_COL_SQL_PATTERN = + "INSERT INTO `%s` (%s) VALUES %s;"; + + private static final String BATCH_INSERT_IGNORE_SQL_WITH_COL_PATTERN = + "INSERT IGNORE INTO `%s` (%s) VALUES %s;"; + private static final String BATCH_INSERT_HINT_SQL_PATTERN = DIRECT_NODE_HINT + "INSERT INTO `%s` VALUES %s;"; @@ -47,20 +54,38 @@ public static String getBatchInsertSql(String tableName, String values, boolean } } + public static void getBatchInsertSql(StringBuilder insertSqlBuilder, + String tableName, String columns, + StringBuilder values, boolean insertIgnoreEnabled) { + insertSqlBuilder.append("INSERT "); + if (insertIgnoreEnabled) { + insertSqlBuilder.append("IGNORE "); + } + insertSqlBuilder.append("INTO `").append(tableName).append("` "); + if (columns != null) { + insertSqlBuilder.append('(').append(columns).append(") "); + } + insertSqlBuilder.append("VALUES ").append(values).append(";"); + } + public static void appendInsertStrValue(StringBuilder sqlStringBuilder, String rawValue, boolean sqlEscapeEnabled, boolean hasEscapedQuote) { if (rawValue.equals(FileUtil.NULL_ESC_STR)) { // NULL字段处理 sqlStringBuilder.append("NULL"); + return; + } + if (GlobalVar.IN_PERF_MODE) { + sqlStringBuilder.append(rawValue); + return; + } + if (sqlEscapeEnabled) { + // 字符串要考虑转义 + sqlStringBuilder.append("'") + .append(escapeSqlSpecialChar(rawValue)) + .append("'"); } else { - if (sqlEscapeEnabled) { - // 字符串要考虑转义 - sqlStringBuilder.append("'") - .append(escapeSqlSpecialChar(rawValue)) - .append("'"); - } else { - sqlStringBuilder.append("'").append(rawValue).append("'"); - } + sqlStringBuilder.append("'").append(rawValue).append("'"); } } From 0e0095f476a7e9e4da2eaba500a7ad1e61fd3a55 Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Tue, 26 Jul 2022 19:22:35 +0800 Subject: [PATCH 02/12] reduce array copy operations --- .../main/java/datasource/DatasourceConstant.java | 2 +- batch-tool/src/main/java/util/DbUtil.java | 4 ++-- batch-tool/src/main/java/util/FileUtil.java | 16 ++++++++++------ .../java/worker/common/BaseDefaultConsumer.java | 9 ++++++--- .../java/worker/common/BaseShardedConsumer.java | 6 +++--- .../main/java/worker/delete/DeleteConsumer.java | 9 +++++---- .../java/worker/delete/DeleteInConsumer.java | 4 ++-- .../worker/delete/ShardedDeleteInConsumer.java | 4 ++-- .../java/worker/insert/DirectImportWorker.java | 7 +++++-- .../main/java/worker/insert/ImportConsumer.java | 5 +++-- .../worker/insert/ProcessOnlyImportConsumer.java | 2 +- .../worker/insert/ShardedImportConsumer.java | 2 +- .../main/java/worker/update/ReplaceConsumer.java | 2 +- .../worker/update/ShardedReplaceConsumer.java | 2 +- .../main/java/worker/update/UpdateConsumer.java | 4 ++-- .../worker/update/UpdateWithFuncConsumer.java | 9 +++++---- .../worker/update/UpdateWithFuncInConsumer.java | 4 ++-- .../src/main/java/worker/util/DeleteUtil.java | 4 ++-- .../src/main/java/worker/util/ImportUtil.java | 16 ++++++++-------- .../src/main/java/worker/util/UpdateUtil.java | 14 +++++++------- 20 files changed, 69 insertions(+), 56 deletions(-) diff --git a/batch-tool/src/main/java/datasource/DatasourceConstant.java b/batch-tool/src/main/java/datasource/DatasourceConstant.java index c9b85ca..0e80f0b 100644 --- a/batch-tool/src/main/java/datasource/DatasourceConstant.java +++ b/batch-tool/src/main/java/datasource/DatasourceConstant.java @@ -18,7 +18,7 @@ public class DatasourceConstant { - public static final int MAX_CONN_NUM = 2048; + public static final int MAX_CONN_NUM = 1024; public static final int MIN_CONN_NUM = 32; /** diff --git a/batch-tool/src/main/java/util/DbUtil.java b/batch-tool/src/main/java/util/DbUtil.java index 8fcce71..89a15f9 100644 --- a/batch-tool/src/main/java/util/DbUtil.java +++ b/batch-tool/src/main/java/util/DbUtil.java @@ -401,10 +401,10 @@ public static int getPartitionIndex(String value, PartitionKey partitionKey) { /** * 拼接出主键的where条件 */ - public static String formatPkConditions(List pkList, String[] values) { + public static String formatPkConditions(List pkList, List values) { String[] pkConditions = new String[pkList.size()]; for (int i = 0; i < pkList.size(); i++) { - pkConditions[i] = pkList.get(i).getName() + "='" + values[i] + "'"; + pkConditions[i] = pkList.get(i).getName() + "='" + values.get(i) + "'"; } return StringUtils.join(pkConditions, " AND "); } diff --git a/batch-tool/src/main/java/util/FileUtil.java b/batch-tool/src/main/java/util/FileUtil.java index 6474d79..5575e93 100644 --- a/batch-tool/src/main/java/util/FileUtil.java +++ b/batch-tool/src/main/java/util/FileUtil.java @@ -144,9 +144,13 @@ public static ByteBuffer getNullStrWithCommaByteBuffer() { } - public static String[] split(String line, String sep, boolean withLastSep, boolean hasEscapedQuote) { - ArrayList values = splitWithQuoteEscape(line, sep, withLastSep, 10, hasEscapedQuote); - return values.toArray(new String[values.size()]); + public static List split(String line, String sep, boolean withLastSep, boolean hasEscapedQuote) { + return splitWithEstimateCount(line, sep, withLastSep, 16, hasEscapedQuote); + } + + public static List splitWithEstimateCount(String line, String sep, boolean withLastSep, + int estimateCount, boolean hasEscapedQuote) { + return splitWithQuoteEscape(line, sep, withLastSep, estimateCount, hasEscapedQuote); } /** @@ -166,7 +170,7 @@ public static String[] split(String line, String sep, boolean withLastSep, int e } private static ArrayList splitWithQuoteEscape(String line, String sep, final boolean withLastSep, - int expectedCount, final boolean hasEscapedQuote) { + int estimateCount, final boolean hasEscapedQuote) { char[] chars = line.toCharArray(); int len = chars.length; @@ -174,8 +178,8 @@ private static ArrayList splitWithQuoteEscape(String line, String sep, f // 结尾有分隔符则忽略 len -= sep.length(); } - ArrayList subStrings = new ArrayList<>(expectedCount); - StringBuilder stringBuilder = new StringBuilder(line.length() / expectedCount); + ArrayList subStrings = new ArrayList<>(estimateCount); + StringBuilder stringBuilder = new StringBuilder(line.length() / estimateCount); char sepStart = sep.charAt(0); boolean enclosingByQuote = false; boolean endsWithSep = false; diff --git a/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java b/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java index 49f50ec..8d4c3b1 100644 --- a/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java +++ b/batch-tool/src/main/java/worker/common/BaseDefaultConsumer.java @@ -25,6 +25,7 @@ import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; +import java.util.List; import static model.config.ConfigConstant.END_OF_BATCH_LINES; @@ -34,6 +35,8 @@ public abstract class BaseDefaultConsumer extends BaseWorkHandler { private static final Logger logger = LoggerFactory.getLogger(BaseDefaultConsumer.class); + protected int estimateFieldCount = 16; + protected void initLocalVars() { super.initLocalVars(); } @@ -59,8 +62,8 @@ public void onProxyEvent(BatchLineEvent event) { if (line == END_OF_BATCH_LINES) { break; } - String[] values = FileUtil.split(line, sep, - consumerContext.isWithLastSep(), hasEscapedQuote); + List values = FileUtil.splitWithEstimateCount(line, sep, + consumerContext.isWithLastSep(), estimateFieldCount, hasEscapedQuote); fillLocalBuffer(stringBuilder, values); } @@ -81,7 +84,7 @@ public void onProxyEvent(BatchLineEvent event) { } } - protected abstract void fillLocalBuffer(StringBuilder stringBuilder, String[] values); + protected abstract void fillLocalBuffer(StringBuilder stringBuilder, List values); protected abstract String getSql(StringBuilder data); diff --git a/batch-tool/src/main/java/worker/common/BaseShardedConsumer.java b/batch-tool/src/main/java/worker/common/BaseShardedConsumer.java index ecc5827..bc655d4 100644 --- a/batch-tool/src/main/java/worker/common/BaseShardedConsumer.java +++ b/batch-tool/src/main/java/worker/common/BaseShardedConsumer.java @@ -68,9 +68,9 @@ public void onProxyEvent(BatchLineEvent event) { if (line == END_OF_BATCH_LINES) { break; } - String[] values = FileUtil.split(line, sep, + List values = FileUtil.split(line, sep, consumerContext.isWithLastSep(), hasEscapedQuote); - partitionFieldValue = values[partitionKey.getFieldMetaInfo().getIndex()]; + partitionFieldValue = values.get(partitionKey.getFieldMetaInfo().getIndex()); partitionIndex = DbUtil.getPartitionIndex(partitionFieldValue, partitionKey); try { @@ -107,7 +107,7 @@ public void onProxyEvent(BatchLineEvent event) { * 根据切分出的字段值 * 按照格式填充localBuffer */ - protected abstract void fillLocalBuffer(StringBuilder localBuffer, String[] values, + protected abstract void fillLocalBuffer(StringBuilder localBuffer, List values, List fieldMetaInfoList) throws Throwable; /** diff --git a/batch-tool/src/main/java/worker/delete/DeleteConsumer.java b/batch-tool/src/main/java/worker/delete/DeleteConsumer.java index 82389b4..cdcecb1 100644 --- a/batch-tool/src/main/java/worker/delete/DeleteConsumer.java +++ b/batch-tool/src/main/java/worker/delete/DeleteConsumer.java @@ -22,6 +22,7 @@ import worker.common.BaseDefaultConsumer; import worker.util.DeleteUtil; +import java.util.ArrayList; import java.util.List; public class DeleteConsumer extends BaseDefaultConsumer { @@ -35,10 +36,10 @@ protected void initLocalVars() { } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { - String[] pkValues = new String[pkList.size()]; - for (int i = 0; i < pkList.size(); i++) { - pkValues[i] = values[pkList.get(i).getOrdinalPosition() - 1]; + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { + List pkValues = new ArrayList<>(pkList.size()); + for (PrimaryKey primaryKey : pkList) { + pkValues.add(values.get(primaryKey.getOrdinalPosition() - 1)); } stringBuilder.append(DeleteUtil.getDeleteSql(tableName, pkList, pkValues, consumerContext.getWhereCondition())); diff --git a/batch-tool/src/main/java/worker/delete/DeleteInConsumer.java b/batch-tool/src/main/java/worker/delete/DeleteInConsumer.java index 734c271..8c11795 100644 --- a/batch-tool/src/main/java/worker/delete/DeleteInConsumer.java +++ b/batch-tool/src/main/java/worker/delete/DeleteInConsumer.java @@ -40,9 +40,9 @@ protected void initLocalVars() { } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { for (int i = 0; i < pkList.size(); i++) { - pkValues[i] = values[pkList.get(i).getOrdinalPosition() - 1]; + pkValues[i] = values.get(pkList.get(i).getOrdinalPosition() - 1); } stringBuilder.append("("); DeleteUtil.appendPkValuesByFieldMetaInfo(stringBuilder, fieldMetaInfoList, diff --git a/batch-tool/src/main/java/worker/delete/ShardedDeleteInConsumer.java b/batch-tool/src/main/java/worker/delete/ShardedDeleteInConsumer.java index ac02fe8..d7da269 100644 --- a/batch-tool/src/main/java/worker/delete/ShardedDeleteInConsumer.java +++ b/batch-tool/src/main/java/worker/delete/ShardedDeleteInConsumer.java @@ -43,11 +43,11 @@ protected void initLocalVars() { */ @Override protected void fillLocalBuffer(StringBuilder localBuffer, - String[] values, + List values, List fieldMetaInfoList) throws Throwable { for (int i = 0; i < pkList.size(); i++) { - pkValues[i] = values[pkList.get(i).getOrdinalPosition() - 1]; + pkValues[i] = values.get(pkList.get(i).getOrdinalPosition() - 1); } localBuffer.append("("); diff --git a/batch-tool/src/main/java/worker/insert/DirectImportWorker.java b/batch-tool/src/main/java/worker/insert/DirectImportWorker.java index b730ca0..2fa8a88 100644 --- a/batch-tool/src/main/java/worker/insert/DirectImportWorker.java +++ b/batch-tool/src/main/java/worker/insert/DirectImportWorker.java @@ -39,10 +39,13 @@ import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; +import java.util.Arrays; import java.util.List; /** @@ -102,13 +105,13 @@ public void run() { curFile = filePath; curLine = startLine; CSVReader reader = new CSVReaderBuilder(new InputStreamReader( - new FileInputStream(filePath), charset)) + Files.newInputStream(Paths.get(filePath)), charset)) .withCSVParser(parser).build(); reader.skip(startLine - 1); for (String[] values; (values = reader.readNext()) != null; ) { try { ImportUtil.getDirectImportSql(insertSqlBuilder, tableName, - fieldMetaInfoList, values, sqlEscapeEnabled, true); + fieldMetaInfoList, Arrays.asList(values), sqlEscapeEnabled, true); stmt.execute(insertSqlBuilder.toString()); importedLines++; diff --git a/batch-tool/src/main/java/worker/insert/ImportConsumer.java b/batch-tool/src/main/java/worker/insert/ImportConsumer.java index 8316ff6..90e5550 100644 --- a/batch-tool/src/main/java/worker/insert/ImportConsumer.java +++ b/batch-tool/src/main/java/worker/insert/ImportConsumer.java @@ -41,12 +41,13 @@ public class ImportConsumer extends BaseDefaultConsumer { protected void initLocalVars() { super.initLocalVars(); this.fieldMetaInfoList = consumerContext.getTableFieldMetaInfo(tableName).getFieldMetaInfoList(); + this.estimateFieldCount = fieldMetaInfoList.size(); this.columns = consumerContext.getUseColumns(); - this.insertSqlBuilder = new StringBuilder(40 + fieldMetaInfoList.size() * 10); + this.insertSqlBuilder = new StringBuilder(64 + fieldMetaInfoList.size() * 16); } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { stringBuilder.append("("); try { ImportUtil.appendValuesByFieldMetaInfo(stringBuilder, fieldMetaInfoList, diff --git a/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java b/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java index 87dd8c2..72ea58d 100644 --- a/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java +++ b/batch-tool/src/main/java/worker/insert/ProcessOnlyImportConsumer.java @@ -61,7 +61,7 @@ public void onProxyEvent(BatchLineEvent event) { if (line == END_OF_BATCH_LINES) { break; } - String[] values = FileUtil.split(line, consumerContext.getSeparator(), + List values = FileUtil.split(line, consumerContext.getSeparator(), consumerContext.isWithLastSep(), hasEscapedQuote); stringBuilder.append("("); try { diff --git a/batch-tool/src/main/java/worker/insert/ShardedImportConsumer.java b/batch-tool/src/main/java/worker/insert/ShardedImportConsumer.java index c7f4c0e..c8fc5f3 100644 --- a/batch-tool/src/main/java/worker/insert/ShardedImportConsumer.java +++ b/batch-tool/src/main/java/worker/insert/ShardedImportConsumer.java @@ -30,7 +30,7 @@ public class ShardedImportConsumer extends BaseShardedConsumer { @Override protected void fillLocalBuffer(StringBuilder localBuffer, - String[] values, + List values, List fieldMetaInfoList) throws Throwable { localBuffer.append("("); ImportUtil.appendValuesByFieldMetaInfo(localBuffer, fieldMetaInfoList, diff --git a/batch-tool/src/main/java/worker/update/ReplaceConsumer.java b/batch-tool/src/main/java/worker/update/ReplaceConsumer.java index 58f2423..82c0e5f 100644 --- a/batch-tool/src/main/java/worker/update/ReplaceConsumer.java +++ b/batch-tool/src/main/java/worker/update/ReplaceConsumer.java @@ -35,7 +35,7 @@ protected void initLocalVars() { } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { stringBuilder.append("("); stringBuilder.append(UpdateUtil.getUpdatedValuesByMetaInfo(consumerContext.getTablePkIndexSet(tableName), values, fieldMetaInfoList)); diff --git a/batch-tool/src/main/java/worker/update/ShardedReplaceConsumer.java b/batch-tool/src/main/java/worker/update/ShardedReplaceConsumer.java index 900360a..85382cc 100644 --- a/batch-tool/src/main/java/worker/update/ShardedReplaceConsumer.java +++ b/batch-tool/src/main/java/worker/update/ShardedReplaceConsumer.java @@ -29,7 +29,7 @@ public class ShardedReplaceConsumer extends BaseShardedConsumer { private static final Logger logger = LoggerFactory.getLogger(ShardedReplaceConsumer.class); @Override - protected void fillLocalBuffer(StringBuilder localBuffer, String[] values, List fieldMetaInfoList) { + protected void fillLocalBuffer(StringBuilder localBuffer, List values, List fieldMetaInfoList) { localBuffer.append("("); localBuffer.append(UpdateUtil.getUpdatedValuesByMetaInfo(consumerContext.getTablePkIndexSet(tableName), values, fieldMetaInfoList)); diff --git a/batch-tool/src/main/java/worker/update/UpdateConsumer.java b/batch-tool/src/main/java/worker/update/UpdateConsumer.java index 1fb2c2f..a8a0b28 100644 --- a/batch-tool/src/main/java/worker/update/UpdateConsumer.java +++ b/batch-tool/src/main/java/worker/update/UpdateConsumer.java @@ -40,9 +40,9 @@ protected void initLocalVars() { } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { for (int i = 0; i < pkList.size(); i++) { - pkValues[i] = values[pkList.get(i).getOrdinalPosition() - 1]; + pkValues[i] = values.get(pkList.get(i).getOrdinalPosition() - 1); } stringBuilder.append(UpdateUtil.getUpdateSql(tableName, pkList, consumerContext.getTablePkIndexSet(tableName), diff --git a/batch-tool/src/main/java/worker/update/UpdateWithFuncConsumer.java b/batch-tool/src/main/java/worker/update/UpdateWithFuncConsumer.java index 45cef91..ab96740 100644 --- a/batch-tool/src/main/java/worker/update/UpdateWithFuncConsumer.java +++ b/batch-tool/src/main/java/worker/update/UpdateWithFuncConsumer.java @@ -22,6 +22,7 @@ import worker.common.BaseDefaultConsumer; import worker.util.UpdateUtil; +import java.util.ArrayList; import java.util.List; /** @@ -31,18 +32,18 @@ public class UpdateWithFuncConsumer extends BaseDefaultConsumer { private static final Logger logger = LoggerFactory.getLogger(UpdateWithFuncConsumer.class); private List pkList; - private String[] pkValues; + private List pkValues; @Override protected void initLocalVars() { this.pkList = consumerContext.getTablePkList(tableName); - this.pkValues = new String[pkList.size()]; + this.pkValues = new ArrayList<>(pkList.size()); } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { for (int i = 0; i < pkList.size(); i++) { - pkValues[i] = values[pkList.get(i).getOrdinalPosition() - 1]; + pkValues.add(values.get(pkList.get(i).getOrdinalPosition() - 1)); } stringBuilder.append(UpdateUtil.getUpdateWithFuncSql(consumerContext.getUpdateWithFuncPattern(), diff --git a/batch-tool/src/main/java/worker/update/UpdateWithFuncInConsumer.java b/batch-tool/src/main/java/worker/update/UpdateWithFuncInConsumer.java index 0b2bb2c..268d73e 100644 --- a/batch-tool/src/main/java/worker/update/UpdateWithFuncInConsumer.java +++ b/batch-tool/src/main/java/worker/update/UpdateWithFuncInConsumer.java @@ -45,9 +45,9 @@ protected void initLocalVars() { } @Override - protected void fillLocalBuffer(StringBuilder stringBuilder, String[] values) { + protected void fillLocalBuffer(StringBuilder stringBuilder, List values) { for (int i = 0; i < pkList.size(); i++) { - pkValues[i] = values[pkList.get(i).getOrdinalPosition() - 1]; + pkValues[i] = values.get(pkList.get(i).getOrdinalPosition() - 1); } stringBuilder.append("("); // 此处与删除in的逻辑相同 diff --git a/batch-tool/src/main/java/worker/util/DeleteUtil.java b/batch-tool/src/main/java/worker/util/DeleteUtil.java index 4a09368..f3dbd24 100644 --- a/batch-tool/src/main/java/worker/util/DeleteUtil.java +++ b/batch-tool/src/main/java/worker/util/DeleteUtil.java @@ -99,7 +99,7 @@ public static String getDeleteSqlWithHint(String nodeName, String tableName, Lis /** * 简单的根据主键delete一行 */ - public static String getDeleteSql(String tableName, List pkList, String[] values, String where) { + public static String getDeleteSql(String tableName, List pkList, List values, String where) { if (StringUtils.isEmpty(where)) { return getDeleteSql(tableName, pkList, values); } @@ -108,7 +108,7 @@ public static String getDeleteSql(String tableName, List pkList, Str return String.format(sqlPattern, tableName, pkCondition, where); } - public static String getDeleteSql(String tableName, List pkList, String[] values) { + public static String getDeleteSql(String tableName, List pkList, List values) { String sqlPattern = "DELETE FROM `%s` WHERE %s;"; String pkCondition = DbUtil.formatPkConditions(pkList, values); return String.format(sqlPattern, tableName, pkCondition); diff --git a/batch-tool/src/main/java/worker/util/ImportUtil.java b/batch-tool/src/main/java/worker/util/ImportUtil.java index c2ff8df..3acdc63 100644 --- a/batch-tool/src/main/java/worker/util/ImportUtil.java +++ b/batch-tool/src/main/java/worker/util/ImportUtil.java @@ -121,33 +121,33 @@ public static void appendInsertNonStrValue(StringBuilder sqlStringBuilder, Strin public static void appendValuesByFieldMetaInfo(StringBuilder stringBuilder, List fieldMetaInfoList, - String[] values, boolean sqlEscapeEnabled, + List values, boolean sqlEscapeEnabled, boolean hasEscapedQuote) throws DatabaseException { - if (fieldMetaInfoList.size() != values.length) { + if (fieldMetaInfoList.size() != values.size()) { throw new DatabaseException(String.format("required field size %d, " - + "actual size %d", fieldMetaInfoList.size(), values.length)); + + "actual size %d", fieldMetaInfoList.size(), values.size())); } int fieldLen = fieldMetaInfoList.size(); for (int i = 0; i < fieldLen - 1; i++) { if (fieldMetaInfoList.get(i).needQuote()) { // 字符串和日期都需要单引号 - ImportUtil.appendInsertStrValue(stringBuilder, values[i], sqlEscapeEnabled, hasEscapedQuote); + ImportUtil.appendInsertStrValue(stringBuilder, values.get(i), sqlEscapeEnabled, hasEscapedQuote); } else { - ImportUtil.appendInsertNonStrValue(stringBuilder, values[i], hasEscapedQuote); + ImportUtil.appendInsertNonStrValue(stringBuilder, values.get(i), hasEscapedQuote); } stringBuilder.append(","); } if (fieldMetaInfoList.get(fieldLen - 1).needQuote()) { - ImportUtil.appendInsertStrValue(stringBuilder, values[fieldLen - 1], sqlEscapeEnabled, hasEscapedQuote); + ImportUtil.appendInsertStrValue(stringBuilder, values.get(fieldLen - 1), sqlEscapeEnabled, hasEscapedQuote); } else { - ImportUtil.appendInsertNonStrValue(stringBuilder, values[fieldLen - 1], hasEscapedQuote); + ImportUtil.appendInsertNonStrValue(stringBuilder, values.get(fieldLen - 1), hasEscapedQuote); } } public static void getDirectImportSql(StringBuilder stringBuilder, String tableName, List fieldMetaInfoList, - String[] values, boolean sqlEscapeEnabled, + List values, boolean sqlEscapeEnabled, boolean hasEscapedQuote) throws DatabaseException { stringBuilder.append("INSERT INTO `").append(tableName).append("` VALUES ("); appendValuesByFieldMetaInfo(stringBuilder, fieldMetaInfoList, values, diff --git a/batch-tool/src/main/java/worker/util/UpdateUtil.java b/batch-tool/src/main/java/worker/util/UpdateUtil.java index ff990f6..67b37b2 100644 --- a/batch-tool/src/main/java/worker/util/UpdateUtil.java +++ b/batch-tool/src/main/java/worker/util/UpdateUtil.java @@ -89,14 +89,14 @@ public static String getBatchReplaceSql(String tableName, /** * 根据字段类型对值进行更新 */ - public static String getUpdatedValuesByMetaInfo(Set pkIndexSet, String[] values, + public static String getUpdatedValuesByMetaInfo(Set pkIndexSet, List values, List fieldMetaInfoList) { List updatedValueList = new ArrayList<>(fieldMetaInfoList.size()); String fieldValue; int fieldIntValue; float fieldFloatValue; for (FieldMetaInfo fieldMetaInfo : fieldMetaInfoList) { - fieldValue = values[fieldMetaInfo.getIndex()]; + fieldValue = values.get(fieldMetaInfo.getIndex()); if (pkIndexSet.contains(fieldMetaInfo.getIndex())) { // 主键不变 if (fieldMetaInfo.getType() == FieldMetaInfo.Type.STRING) { @@ -154,7 +154,7 @@ private static String formatTableFieldWithPlaceholder(TableFieldMetaInfo tableFi } public static String getUpdateSql(String tableName, List pkList, Set pkIndexSet, - List fieldMetaInfoList, String[] values, + List fieldMetaInfoList, List values, String where) { if (StringUtils.isEmpty(where)) { return getUpdateSql(tableName, pkList, @@ -168,7 +168,7 @@ public static String getUpdateSql(String tableName, List pkList, Set } public static String getUpdateSql(String tableName, List pkList, Set pkIndexSet, - List fieldMetaInfoList, String[] values) { + List fieldMetaInfoList, List values) { String updateSqlPattern = "UPDATE %s SET %s WHERE %s;"; String pkCondition = DbUtil.formatPkConditions(pkList, values); String setUpdatedValue = formatSetUpdatedValues(pkIndexSet, fieldMetaInfoList, values); @@ -178,13 +178,13 @@ public static String getUpdateSql(String tableName, List pkList, Set public static String formatSetUpdatedValues(Set pkIndexSet, List fieldMetaInfoList, - String[] values) { + List values) { List updatedValueList = new ArrayList<>(fieldMetaInfoList.size() - pkIndexSet.size()); String fieldValue; int fieldIntValue; float fieldFloatValue; for (FieldMetaInfo fieldMetaInfo : fieldMetaInfoList) { - fieldValue = values[fieldMetaInfo.getIndex()]; + fieldValue = values.get(fieldMetaInfo.getIndex()); if (pkIndexSet.contains(fieldMetaInfo.getIndex())) { // 主键不在set的值里面 continue; @@ -222,7 +222,7 @@ public static String formatSetUpdatedValues(Set pkIndexSet, } public static String getUpdateWithFuncSql(String updateWithFuncPattern, List pkList, - String[] pkValues) { + List pkValues) { String pkCondition = DbUtil.formatPkConditions(pkList, pkValues); return String.format(updateWithFuncPattern, pkCondition); } From 64a56cf91c7398026de3cf0d8a19eea2e6609417 Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Tue, 2 Aug 2022 17:13:54 +0800 Subject: [PATCH 03/12] refactor BaseExportWorker: make produceData a common method --- .../java/worker/export/BaseExportWorker.java | 79 ++++++++++++++++- .../worker/export/DirectExportWorker.java | 85 ++++++------------ .../java/worker/export/ExportProducer.java | 87 +++++-------------- 3 files changed, 127 insertions(+), 124 deletions(-) diff --git a/batch-tool/src/main/java/worker/export/BaseExportWorker.java b/batch-tool/src/main/java/worker/export/BaseExportWorker.java index c3f2f6f..dfa7da7 100644 --- a/batch-tool/src/main/java/worker/export/BaseExportWorker.java +++ b/batch-tool/src/main/java/worker/export/BaseExportWorker.java @@ -18,20 +18,33 @@ import model.config.CompressMode; import model.config.FileFormat; +import model.config.GlobalVar; import model.config.QuoteEncloseMode; import model.db.FieldMetaInfo; import model.db.TableFieldMetaInfo; import model.db.TableTopology; +import model.mask.AbstractDataMasker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import util.DataSourceUtil; import util.FileUtil; +import util.IOUtil; import javax.sql.DataSource; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; public abstract class BaseExportWorker implements Runnable { + private static final Logger logger = LoggerFactory.getLogger(BaseExportWorker.class); + protected final DataSource druid; protected final TableTopology topology; protected final TableFieldMetaInfo tableFieldMetaInfo; @@ -44,6 +57,10 @@ public abstract class BaseExportWorker implements Runnable { protected final List isStringTypeList; + protected Map columnDataMasker; + protected ByteArrayOutputStream os; + protected int bufferedRowNum = 0; // 已经缓存的行数 + protected BaseExportWorker(DataSource druid, TableTopology topology, TableFieldMetaInfo tableFieldMetaInfo, String separator, QuoteEncloseMode quoteEncloseMode) { @@ -82,11 +99,66 @@ protected BaseExportWorker(DataSource druid, TableTopology topology, this.fileFormat = fileFormat; } + protected void produceData() { + String sql = getExportSql(); + + try (Connection conn = druid.getConnection(); + Statement stmt = DataSourceUtil.createStreamingStatement(conn); + ResultSet resultSet = stmt.executeQuery(sql)) { + + logger.info("{} 开始执行导出", topology); + + byte[] value; + int colNum = resultSet.getMetaData().getColumnCount(); + this.os = new ByteArrayOutputStream(colNum * 16); + while (resultSet.next()) { + for (int i = 1; i < colNum; i++) { + value = resultSet.getBytes(i); + writeFieldValue(os, value, i - 1); + // 附加分隔符 + os.write(separator); + } + value = resultSet.getBytes(colNum); + writeFieldValue(os, value, colNum - 1); + // 附加换行符 + os.write(FileUtil.SYS_NEW_LINE_BYTE); + bufferedRowNum++; + + if (bufferedRowNum == GlobalVar.EMIT_BATCH_SIZE) { + emitBatchData(); + os.reset(); + bufferedRowNum = 0; + } + } + if (bufferedRowNum != 0) { + // 最后剩余的元组 + dealWithRemainData(); + os.reset(); + } + afterProduceData(); + } catch (SQLException | IOException e) { + e.printStackTrace(); + logger.error("{} 导出发生错误: {}", topology, e.getMessage()); + } finally { + IOUtil.close(os); + } + } + + protected void afterProduceData() { + } + + protected abstract void emitBatchData(); + + protected abstract void dealWithRemainData(); + + protected abstract String getExportSql(); + /** * 根据引号模式来写入字段值 - * todo 字段值本身包含引号 还需要对引号重新转义 + * @param columnIdx 从 0 开始 */ - protected void writeFieldValue(ByteArrayOutputStream os, byte[] value, boolean isStringType) throws IOException { + protected void writeFieldValue(ByteArrayOutputStream os, byte[] value, int columnIdx) throws IOException { + boolean isStringType = isStringTypeList.get(columnIdx); switch (quoteEncloseMode) { case NONE: FileUtil.writeToByteArrayStream(os, value); @@ -115,4 +187,7 @@ public void setCompressMode(CompressMode compressMode) { this.compressMode = compressMode; } + public void setColumnDataMasker(Map columnDataMasker) { + this.columnDataMasker = columnDataMasker; + } } diff --git a/batch-tool/src/main/java/worker/export/DirectExportWorker.java b/batch-tool/src/main/java/worker/export/DirectExportWorker.java index 49bbc55..3700dd4 100644 --- a/batch-tool/src/main/java/worker/export/DirectExportWorker.java +++ b/batch-tool/src/main/java/worker/export/DirectExportWorker.java @@ -36,7 +36,6 @@ import javax.sql.DataSource; import java.io.ByteArrayOutputStream; -import java.io.IOException; import java.nio.charset.Charset; import java.sql.Connection; import java.sql.ResultSet; @@ -224,59 +223,37 @@ private void afterRun() { fileWriter.close(); } - private void produceData() { - String sql = getExportSql(); + @Override + protected void emitBatchData() { + if (isLimitLine() && curLineNum + bufferedRowNum > maxLine) { + // 超过了行数 + // 新建文件 + createNewPartFile(); + } + writeToFile(os); + curLineNum += bufferedRowNum; + } - try (Connection conn = druid.getConnection(); - Statement stmt = DataSourceUtil.createStreamingStatement(conn); - ResultSet resultSet = stmt.executeQuery(sql)) { + @Override + protected void dealWithRemainData() { + if (isLimitLine() && curLineNum + bufferedRowNum > maxLine) { + // 超过了行数 + // 新建文件 + createNewPartFile(); + } + writeToFile(os); + bufferedRowNum = 0; + } - logger.info("{} 开始导出", topology); - // 字段数 - int colNum; - // 已经缓存的行数 - int bufferedRowNum = 0; - byte[] value; - ByteArrayOutputStream os = new ByteArrayOutputStream(); - colNum = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i < colNum; i++) { - value = resultSet.getBytes(i); - writeFieldValue(os, value, isStringTypeList.get(i - 1)); - // 附加分隔符 - os.write(separator); - } - value = resultSet.getBytes(colNum); - writeFieldValue(os, value, isStringTypeList.get(colNum - 1)); - // 附加换行符 - os.write(FileUtil.SYS_NEW_LINE_BYTE); + @Override + protected String getExportSql() { + return ExportUtil.getDirectSql(topology, + tableFieldMetaInfo.getFieldMetaInfoList(), whereCondition); + } - bufferedRowNum++; - if (bufferedRowNum == GlobalVar.EMIT_BATCH_SIZE) { - if (isLimitLine() && curLineNum + bufferedRowNum > maxLine) { - // 超过了行数 - // 新建文件 - createNewPartFile(); - } - writeToFile(os); - curLineNum += bufferedRowNum; - bufferedRowNum = 0; - } - } - if (bufferedRowNum != 0) { - // 最后剩余的元组 - if (isLimitLine() && curLineNum + bufferedRowNum > maxLine) { - // 超过了行数 - // 新建文件 - createNewPartFile(); - } - writeToFile(os); - bufferedRowNum = 0; - } - logger.info("{} 导出完成", topology); - } catch (SQLException | IOException e) { - e.printStackTrace(); - } + @Override + protected void afterProduceData() { + logger.info("{} 导出完成", topology); } private void writeToFile(ByteArrayOutputStream os) { @@ -290,7 +267,6 @@ private void writeToFile(ByteArrayOutputStream os) { } } fileWriter.write(data); - os.reset(); } /** @@ -320,11 +296,6 @@ private void produceDataByLine() { } } - protected String getExportSql() { - return ExportUtil.getDirectSql(topology, - tableFieldMetaInfo.getFieldMetaInfoList(), whereCondition); - } - private boolean isLimitLine() { return maxLine != 0; } diff --git a/batch-tool/src/main/java/worker/export/ExportProducer.java b/batch-tool/src/main/java/worker/export/ExportProducer.java index 5796863..5adaaaa 100644 --- a/batch-tool/src/main/java/worker/export/ExportProducer.java +++ b/batch-tool/src/main/java/worker/export/ExportProducer.java @@ -16,7 +16,6 @@ package worker.export; -import com.alibaba.druid.util.JdbcUtils; import com.lmax.disruptor.RingBuffer; import model.config.QuoteEncloseMode; import model.db.FieldMetaInfo; @@ -24,25 +23,15 @@ import model.db.TableTopology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import util.DataSourceUtil; -import util.FileUtil; import worker.util.ExportUtil; import javax.sql.DataSource; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; import java.util.List; import java.util.Queue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; -import static model.config.GlobalVar.EMIT_BATCH_SIZE; - public class ExportProducer extends BaseExportWorker { private static final Logger logger = LoggerFactory.getLogger(ExportProducer.class); @@ -98,63 +87,31 @@ private void afterRun() { countDownLatch.countDown(); } - public void produceData() { - List metaInfoList = tableFieldMetaInfo.getFieldMetaInfoList(); - String sql = ExportUtil.getDirectSql(topology, metaInfoList, whereCondition); - - // 字段数 - int colNum; - // 已经缓存的行数 - int bufferedRowNum = 0; - byte[] value; - // 字段数过多可考虑增加os的初始长度 - ByteArrayOutputStream os = new ByteArrayOutputStream(metaInfoList.size() * 8); - Connection conn = null; - Statement stmt = null; - ResultSet resultSet = null; - try { - conn = druid.getConnection(); - stmt = DataSourceUtil.createStreamingStatement(conn); - logger.info("{} 开始执行导出", topology); - resultSet = stmt.executeQuery(sql); - colNum = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i < colNum; i++) { - value = resultSet.getBytes(i); - writeFieldValue(os, value, isStringTypeList.get(i - 1)); - // 附加分隔符 - os.write(separator); - } - value = resultSet.getBytes(colNum); - writeFieldValue(os, value, isStringTypeList.get(colNum - 1)); - // 附加换行符 - os.write(FileUtil.SYS_NEW_LINE_BYTE); - bufferedRowNum++; - if (bufferedRowNum == EMIT_BATCH_SIZE) { - emitData(os.toByteArray()); - os.reset(); - bufferedRowNum = 0; - } - } - if (bufferedRowNum != 0) { - // 最后剩余的元组 - if (collectFragmentEnabled) { - emitRemainData(os.toByteArray()); - } else { - emitData(os.toByteArray()); - } - } - logger.info("{} 发送完成", topology); - } catch (SQLException | IOException e) { - e.printStackTrace(); - logger.error(e.getMessage()); - } finally { - JdbcUtils.close(resultSet); - JdbcUtils.close(stmt); - JdbcUtils.close(conn); + @Override + protected void emitBatchData() { + emitData(os.toByteArray()); + } + + @Override + protected void dealWithRemainData() { + if (collectFragmentEnabled) { + emitRemainData(os.toByteArray()); + } else { + emitData(os.toByteArray()); } } + @Override + protected String getExportSql() { + List metaInfoList = tableFieldMetaInfo.getFieldMetaInfoList(); + return ExportUtil.getDirectSql(topology, metaInfoList, whereCondition); + } + + @Override + protected void afterProduceData() { + logger.info("{} 发送完成", topology); + } + /** * 发送数据给消费者 * From 28988ca5bfdba23a4a29c7525c262931e2b2192c Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Thu, 4 Aug 2022 22:25:52 +0800 Subject: [PATCH 04/12] support data masking: 1. hiding 2. hash --- batch-tool/docs/usage-details.md | 12 +- batch-tool/pom.xml | 11 +- batch-tool/src/main/java/cmd/CommandUtil.java | 52 +++++-- .../exec/export/ShardingExportExecutor.java | 6 +- .../java/model/config/ConfigConstant.java | 3 +- .../main/java/model/config/ExportConfig.java | 17 +++ .../java/model/mask/AbstractDataMasker.java | 24 ++++ .../java/model/mask/DataMaskerFactory.java | 86 +++++++++++ .../src/main/java/model/mask/HashMasker.java | 73 ++++++++++ .../main/java/model/mask/HidingMasker.java | 120 ++++++++++++++++ .../src/main/java/model/mask/MaskType.java | 39 +++++ .../java/worker/common/reader/CsvReader.java | 3 + .../java/worker/export/BaseExportWorker.java | 34 ++++- .../java/worker/export/ExportProducer.java | 4 +- .../worker/factory/ExportWorkerFactory.java | 1 + .../worker/insert/DirectImportWorker.java | 4 + .../src/test/java/preprocess/MaskingTest.java | 136 ++++++++++++++++++ 17 files changed, 596 insertions(+), 29 deletions(-) create mode 100644 batch-tool/src/main/java/model/mask/AbstractDataMasker.java create mode 100644 batch-tool/src/main/java/model/mask/DataMaskerFactory.java create mode 100644 batch-tool/src/main/java/model/mask/HashMasker.java create mode 100644 batch-tool/src/main/java/model/mask/HidingMasker.java create mode 100644 batch-tool/src/main/java/model/mask/MaskType.java create mode 100644 batch-tool/src/test/java/preprocess/MaskingTest.java diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index c03c4b0..f2ff71e 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -39,14 +39,10 @@ `-D sbtest -o export -s , -t "sbtest1" -sharding off` ### 进行数据脱敏 -#### 对手机号、邮箱、身份证等信息进行掩码保护 -内置默认规则的支持类型: -- 手机号 -- 邮箱 -- 身份证 - -`-D sbtest -o export -s , -t "customer" -mask ""` - +#### 对手机号进行掩码保护 +`-D sbtest -o export -s , -t "customer" -mask "{ +\"phone\": { \"type\": \"hiding\", \"show_region\" : \"0-2,8-10\" +}"` ## 数据库表导入 ### 单表导入 diff --git a/batch-tool/pom.xml b/batch-tool/pom.xml index 1875d9c..0a47460 100644 --- a/batch-tool/pom.xml +++ b/batch-tool/pom.xml @@ -18,7 +18,7 @@ 1.8 UTF-8 - 1.7.21 + 1.7.36 8.0.16 1.2.8 @@ -28,7 +28,8 @@ 2.6 2.6 1.4 - 4.6 + 2.0.7 + 5.6 27.0.1-jre 2.0.0.Final 1.60 @@ -102,6 +103,12 @@ ${commons-cli.version} + + com.alibaba.fastjson2 + fastjson2 + ${fastjson.version} + + com.google.guava diff --git a/batch-tool/src/main/java/cmd/CommandUtil.java b/batch-tool/src/main/java/cmd/CommandUtil.java index 4243f40..e88a091 100644 --- a/batch-tool/src/main/java/cmd/CommandUtil.java +++ b/batch-tool/src/main/java/cmd/CommandUtil.java @@ -16,12 +16,13 @@ package cmd; +import com.alibaba.fastjson2.JSONException; +import com.alibaba.fastjson2.JSONObject; import com.google.common.collect.Lists; import datasource.DataSourceConfig; import datasource.DatasourceConstant; import model.ConsumerExecutionContext; import model.ProducerExecutionContext; -import model.config.BaseConfig; import model.config.CompressMode; import model.config.ConfigConstant; import model.config.DdlMode; @@ -46,7 +47,9 @@ import java.nio.charset.Charset; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import static model.config.ConfigConstant.*; @@ -229,7 +232,7 @@ private static List getColumnNames(CommandLine result) { } private static BaseOperateCommand parseImportCommand(CommandLine result) { - requireOnlyOneArg(result, ARG_SHORT_FROM, ARG_SHORT_DIRECTORY); + requireOnlyOneArg(result, ARG_SHORT_FROM_FILE, ARG_SHORT_DIRECTORY); ProducerExecutionContext producerExecutionContext = new ProducerExecutionContext(); ConsumerExecutionContext consumerExecutionContext = new ConsumerExecutionContext(); @@ -239,7 +242,7 @@ private static BaseOperateCommand parseImportCommand(CommandLine result) { } private static BaseOperateCommand parseDeleteCommand(CommandLine result) { - requireOnlyOneArg(result, ARG_SHORT_FROM, ARG_SHORT_DIRECTORY); + requireOnlyOneArg(result, ARG_SHORT_FROM_FILE, ARG_SHORT_DIRECTORY); ProducerExecutionContext producerExecutionContext = new ProducerExecutionContext(); ConsumerExecutionContext consumerExecutionContext = new ConsumerExecutionContext(); @@ -250,7 +253,7 @@ private static BaseOperateCommand parseDeleteCommand(CommandLine result) { } private static BaseOperateCommand parseUpdateCommand(CommandLine result) { - requireOnlyOneArg(result, ARG_SHORT_FROM, ARG_SHORT_DIRECTORY); + requireOnlyOneArg(result, ARG_SHORT_FROM_FILE, ARG_SHORT_DIRECTORY); ProducerExecutionContext producerExecutionContext = new ProducerExecutionContext(); ConsumerExecutionContext consumerExecutionContext = new ConsumerExecutionContext(); @@ -342,6 +345,7 @@ private static ExportCommand parseExportCommand(CommandLine result) { setFileNum(result, exportConfig); setFileLine(result, exportConfig); setOrderBy(result, exportConfig); + setColumnMaskerMap(result, exportConfig); exportConfig.validate(); return new ExportCommand(getDbName(result), tableNames, exportConfig); } @@ -371,6 +375,24 @@ private static void setOrderBy(CommandLine result, ExportConfig exportConfig) { } } + private static void setColumnMaskerMap(CommandLine result, ExportConfig exportConfig) { + if (result.hasOption(ARG_SHORT_MASK)) { + String maskConfigStr = result.getOptionValue(ARG_SHORT_MASK); + JSONObject maskConfig; + try { + maskConfig = JSONObject.parseObject(maskConfigStr); + } catch (JSONException e) { + throw new IllegalArgumentException("Illegal json format: " + maskConfigStr); + } + Map columnMaskerMap = new HashMap<>(); + for (String column : maskConfig.keySet()) { + JSONObject jsonConfig = maskConfig.getJSONObject(column); + columnMaskerMap.put(column, jsonConfig); + } + exportConfig.setColumnMaskerConfigMap(columnMaskerMap); + } + } + private static void setFileLine(CommandLine result, ExportConfig exportConfig) { if (result.hasOption(ARG_SHORT_LINE)) { if (exportConfig.getExportWay() != ExportConfig.ExportWay.DEFAULT) { @@ -528,8 +550,8 @@ private static boolean getForceParallelism(CommandLine result) { * 并检测文件是否存在 */ private static List getFileRecordList(CommandLine result) { - if (result.hasOption(ARG_SHORT_FROM)) { - String filePathListStr = result.getOptionValue(ARG_SHORT_FROM); + if (result.hasOption(ARG_SHORT_FROM_FILE)) { + String filePathListStr = result.getOptionValue(ARG_SHORT_FROM_FILE); return Arrays.stream(StringUtils.split(filePathListStr, CMD_SEPARATOR)) .filter(StringUtils::isNotBlank) .map(s -> { @@ -687,11 +709,11 @@ private static void addBatchOperationOptions(Options options) { .argName("prefix") .desc("Export file name prefix.") .build()); - // 添加文件源选项 -f --from - options.addOption(Option.builder(ARG_SHORT_FROM) - .longOpt("from") + // 添加文件源选项 -f --file + options.addOption(Option.builder(ARG_SHORT_FROM_FILE) + .longOpt("file") .hasArg() - .argName("from") + .argName("file path") .desc("Source file(s), separated by ; .") .build()); // 添加导出文件行数限制选项 -L --line @@ -822,12 +844,20 @@ private static void addBatchOperationOptions(Options options) { options.addOption(Option.builder(ARG_SHORT_MAX_ERROR) .longOpt("max-error") .hasArg() + .argName("max error count") .desc("Max error count threshold.") .build()); // 性能模式 options.addOption(Option.builder(ARG_SHORT_PERF_MODE) .longOpt("perf") - .desc("perf mode") + .desc("Performance mode.") + .build()); + // 数据脱敏 + options.addOption(Option.builder(ARG_SHORT_MASK) + .longOpt("mask") + .hasArg() + .argName("json config") + .desc("Masking sensitive columns while exporting data.") .build()); } diff --git a/batch-tool/src/main/java/exec/export/ShardingExportExecutor.java b/batch-tool/src/main/java/exec/export/ShardingExportExecutor.java index 93d84ae..8a66e8b 100644 --- a/batch-tool/src/main/java/exec/export/ShardingExportExecutor.java +++ b/batch-tool/src/main/java/exec/export/ShardingExportExecutor.java @@ -88,8 +88,8 @@ private void doExportWithSharding(String tableName) { try { topologyList = DbUtil.getTopology(dataSource.getConnection(), tableName); } catch (DatabaseException e) { - logger.error("{}. Try export with -sharding off", e.getMessage()); - throw new RuntimeException(e); + logger.error("Try export with '-sharding off'"); + throw new RuntimeException(e.getMessage()); } catch (SQLException e) { throw new RuntimeException(e); } @@ -188,6 +188,7 @@ private void shardingExportWithFixedFile(List topologyList, countDownLatch, emittedDataCounter, false, config.getQuoteEncloseMode()); producer.setPermitted(permitted); producer.setWhereCondition(config.getWhereCondition()); + producer.putDataMaskerMap(config.getColumnMaskerConfigMap()); producerExecutor.submit(producer); } waitForFinish(countDownLatch, emittedDataCounter); @@ -201,6 +202,7 @@ private void shardingExportWithFixedFile(List topologyList, countDownLatch, emittedDataCounter, true, config.getQuoteEncloseMode()); producer.setWhereCondition(config.getWhereCondition()); + producer.putDataMaskerMap(config.getColumnMaskerConfigMap()); producer.setFragmentQueue(fragmentQueue); producer.setPermitted(permitted); producerExecutor.submit(producer); diff --git a/batch-tool/src/main/java/model/config/ConfigConstant.java b/batch-tool/src/main/java/model/config/ConfigConstant.java index c84621a..00deb7a 100644 --- a/batch-tool/src/main/java/model/config/ConfigConstant.java +++ b/batch-tool/src/main/java/model/config/ConfigConstant.java @@ -41,7 +41,7 @@ public class ConfigConstant { public static final String ARG_SHORT_TABLE = "t"; public static final String ARG_SHORT_SEP = "s"; public static final String ARG_SHORT_PREFIX = "pre"; - public static final String ARG_SHORT_FROM = "f"; + public static final String ARG_SHORT_FROM_FILE = "f"; public static final String ARG_SHORT_LINE = "L"; public static final String ARG_SHORT_FILE_NUM = "F"; public static final String ARG_SHORT_HISTORY_FILE = "H"; @@ -78,6 +78,7 @@ public class ConfigConstant { public static final String ARG_SHORT_FILE_FORMAT = "format"; public static final String ARG_SHORT_MAX_ERROR = "error"; public static final String ARG_SHORT_PERF_MODE = "perf"; + public static final String ARG_SHORT_MASK = "mask"; public static final int CPU_NUM = Runtime.getRuntime().availableProcessors(); /** diff --git a/batch-tool/src/main/java/model/config/ExportConfig.java b/batch-tool/src/main/java/model/config/ExportConfig.java index a768596..78fb468 100644 --- a/batch-tool/src/main/java/model/config/ExportConfig.java +++ b/batch-tool/src/main/java/model/config/ExportConfig.java @@ -16,7 +16,11 @@ package model.config; +import com.alibaba.fastjson2.JSONObject; +import model.mask.AbstractDataMasker; + import java.util.List; +import java.util.Map; /** * 导出的设置项 @@ -59,6 +63,11 @@ public class ExportConfig extends BaseConfig { */ private int parallelism = 0; + /** + * 字段脱敏配置 + */ + private Map columnMaskerConfigMap; + private boolean isAscending = true; private boolean isLocalMerge = false; private boolean isParallelMerge = false; @@ -204,6 +213,14 @@ private String getParallelismConfig() { } } + public Map getColumnMaskerConfigMap() { + return columnMaskerConfigMap; + } + + public void setColumnMaskerConfigMap(Map columnMaskerConfigMap) { + this.columnMaskerConfigMap = columnMaskerConfigMap; + } + @Override public void validate() { super.validate(); diff --git a/batch-tool/src/main/java/model/mask/AbstractDataMasker.java b/batch-tool/src/main/java/model/mask/AbstractDataMasker.java new file mode 100644 index 0000000..bf53004 --- /dev/null +++ b/batch-tool/src/main/java/model/mask/AbstractDataMasker.java @@ -0,0 +1,24 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package model.mask; + +public abstract class AbstractDataMasker { + + public abstract MaskType getType(); + + public abstract byte[] doMask(byte[] input); +} diff --git a/batch-tool/src/main/java/model/mask/DataMaskerFactory.java b/batch-tool/src/main/java/model/mask/DataMaskerFactory.java new file mode 100644 index 0000000..93e3ab3 --- /dev/null +++ b/batch-tool/src/main/java/model/mask/DataMaskerFactory.java @@ -0,0 +1,86 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package model.mask; + +import com.alibaba.fastjson2.JSONObject; + +public class DataMaskerFactory { + + private static final String MASK_TYPE_KEY = "type"; + + /** + * 掩码脱敏 + */ + private static final String HIDING_SHOW_END_KEY = "show_end"; + private static final String HIDING_SHOW_REGION_KEY = "show_region"; + + + /** + * 哈希脱敏 + */ + private static final String HASH_SALT_KEY = "salt"; + + public static AbstractDataMasker getDataMasker(JSONObject jsonConfig) { + MaskType maskType = MaskType.fromString(jsonConfig.getString(MASK_TYPE_KEY)); + switch (maskType) { + case HIDING: + return buildHidingMasker(jsonConfig); + case ENCRYPT: + return buildEncryptMasker(jsonConfig); + case HASH: + return buildHashMasker(jsonConfig); + case FLOOR: + return buildFloorMasker(jsonConfig); + default: + throw new UnsupportedOperationException("Unsupported mask type: " + maskType); + } + } + + private static AbstractDataMasker buildHidingMasker(JSONObject jsonConfig) { + HidingMasker hidingMasker = new HidingMasker(); + boolean hasShowOption = false; + if (jsonConfig.containsKey(HIDING_SHOW_END_KEY)) { + hidingMasker.setShowEndRegion(jsonConfig.getIntValue(HIDING_SHOW_END_KEY)); + hasShowOption = true; + } + if (jsonConfig.containsKey(HIDING_SHOW_REGION_KEY)) { + hidingMasker.setShowRegions(jsonConfig.getString(HIDING_SHOW_REGION_KEY)); + hasShowOption = true; + } + if (!hasShowOption) { + throw new IllegalArgumentException("Hiding masker requires at least one show region"); + } + return hidingMasker; + } + + private static AbstractDataMasker buildEncryptMasker(JSONObject jsonConfig) { + throw new UnsupportedOperationException("Encrypt masker is not implemented yet"); + } + + private static AbstractDataMasker buildHashMasker(JSONObject jsonConfig) { + HashMasker hashMasker = new HashMasker(); + if (jsonConfig.containsKey(HASH_SALT_KEY)) { + hashMasker.setSalt(jsonConfig.getString(HASH_SALT_KEY)); + } + + return hashMasker; + } + + private static AbstractDataMasker buildFloorMasker(JSONObject jsonConfig) { + throw new UnsupportedOperationException("Floor masker is not implemented yet"); + } +} diff --git a/batch-tool/src/main/java/model/mask/HashMasker.java b/batch-tool/src/main/java/model/mask/HashMasker.java new file mode 100644 index 0000000..c3aad07 --- /dev/null +++ b/batch-tool/src/main/java/model/mask/HashMasker.java @@ -0,0 +1,73 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package model.mask; + +import com.google.common.base.Preconditions; +import model.config.ConfigConstant; + +import javax.annotation.concurrent.NotThreadSafe; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Base64; + +@NotThreadSafe +public class HashMasker extends AbstractDataMasker { + + private static final int MAX_SALT_LENGTH = 16; + private byte[] salt = null; + + private final MessageDigest MD5_DIGEST; + private final Base64.Encoder BASE64_ENCODER = Base64.getEncoder(); + + + public HashMasker() { + try { + MD5_DIGEST = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + + @Override + public MaskType getType() { + return MaskType.HASH; + } + + @Override + public byte[] doMask(byte[] input) { + if (salt == null) { + return BASE64_ENCODER.encode(MD5_DIGEST.digest(input)); + } + MD5_DIGEST.update(input); + MD5_DIGEST.update(salt); + return BASE64_ENCODER.encode(MD5_DIGEST.digest()); + } + + public void setSalt(byte[] salt) { + if (this.salt != null) { + throw new IllegalArgumentException("salt can only be initialized once"); + } + Preconditions.checkNotNull(salt); + Preconditions.checkArgument(salt.length <= MAX_SALT_LENGTH, + "Hash salt max length is " + MAX_SALT_LENGTH); + this.salt = salt; + } + + public void setSalt(String salt) { + setSalt(salt.getBytes(ConfigConstant.DEFAULT_CHARSET)); + } +} diff --git a/batch-tool/src/main/java/model/mask/HidingMasker.java b/batch-tool/src/main/java/model/mask/HidingMasker.java new file mode 100644 index 0000000..41cc3ce --- /dev/null +++ b/batch-tool/src/main/java/model/mask/HidingMasker.java @@ -0,0 +1,120 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package model.mask; + +import com.google.common.base.Preconditions; +import model.config.ConfigConstant; +import model.config.GlobalVar; +import org.apache.commons.lang3.StringUtils; + +import javax.annotation.concurrent.NotThreadSafe; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.util.Arrays; + +@NotThreadSafe +public class HidingMasker extends AbstractDataMasker { + + private Charset charset = ConfigConstant.DEFAULT_CHARSET; + private final static char HIDING_CHAR = '*'; + + /** + * 展示末尾几位 + */ + private int showEndRegion = 0; + /** + * 展示区间 + */ + private int[] showRegions = null; + + @Override + public MaskType getType() { + return MaskType.HIDING; + } + + @Override + public byte[] doMask(byte[] input) { + if (GlobalVar.IN_PERF_MODE) { + doSimpleHiding(input); + return input; + } + + return doStringHiding(input); + } + + private byte[] doStringHiding(byte[] input) { + String inputStr = new String(input, charset); + char[] chars = inputStr.toCharArray(); + for (int i = 0; i < chars.length; i++) { + if (isHidingChar(i, chars.length)) { + chars[i] = HIDING_CHAR; + } + } + ByteBuffer byteBuffer = charset.encode(CharBuffer.wrap(chars)); + return Arrays.copyOfRange(byteBuffer.array(), byteBuffer.position(), + byteBuffer.limit()); + } + + private void doSimpleHiding(byte[] input) { + for (int i = 0; i < input.length; i++) { + if (isHidingChar(i, input.length)) { + input[i] = HIDING_CHAR; + } + } + } + + private boolean isHidingChar(int index, int totalLength) { + if (index >= totalLength - showEndRegion) { + return false; + } + if (showRegions != null) { + for (int j = 0; j < showRegions.length; j += 2) { + if (index >= showRegions[j] && index <= showRegions[j + 1]) { + return false; + } + } + } + return true; + } + + public void setCharset(Charset charset) { + this.charset = charset; + } + + public void setShowEndRegion(int showEndRegion) { + this.showEndRegion = showEndRegion; + } + + /** + * 区间用,分隔 + * @param showRegions 0-2,4-5 + */ + public void setShowRegions(String showRegions) { + Preconditions.checkArgument(!StringUtils.isBlank(showRegions), "Empty show region"); + String[] regionStrs = StringUtils.split(showRegions, ","); + int[] regions = new int[regionStrs.length * 2]; + for (int i = 0; i < regionStrs.length; i++) { + String[] pair = StringUtils.split(regionStrs[i], "-"); + Preconditions.checkArgument(pair.length == 2, + "Illegal region format: " + regionStrs[i]); + regions[i * 2] = Integer.parseInt(pair[0]); + regions[i * 2 + 1] = Integer.parseInt(pair[1]); + } + this.showRegions = regions; + } +} diff --git a/batch-tool/src/main/java/model/mask/MaskType.java b/batch-tool/src/main/java/model/mask/MaskType.java new file mode 100644 index 0000000..254a8de --- /dev/null +++ b/batch-tool/src/main/java/model/mask/MaskType.java @@ -0,0 +1,39 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package model.mask; + +public enum MaskType { + HIDING, // 掩码 + ENCRYPT, // 加密 + HASH, // 摘要 + FLOOR; // 取整 + + public static MaskType fromString(String type) { + switch (type.toUpperCase()) { + case "HIDING": + return HIDING; + case "ENCRYPT": + return ENCRYPT; + case "HASH": + return HASH; + case "FLOOR": + return FLOOR; + default: + throw new UnsupportedOperationException("Unsupported mask type: " + type); + } + } +} diff --git a/batch-tool/src/main/java/worker/common/reader/CsvReader.java b/batch-tool/src/main/java/worker/common/reader/CsvReader.java index b057e66..b202445 100644 --- a/batch-tool/src/main/java/worker/common/reader/CsvReader.java +++ b/batch-tool/src/main/java/worker/common/reader/CsvReader.java @@ -21,6 +21,7 @@ import com.opencsv.CSVParserBuilder; import com.opencsv.CSVReader; import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvValidationException; import model.ProducerExecutionContext; import model.config.ConfigConstant; import org.slf4j.Logger; @@ -72,6 +73,8 @@ protected void readData() { logger.info("{} 读取完毕", fileList.get(localProcessingFileIndex).getPath()); } catch (IOException e) { logger.error(e.getMessage()); + } catch (CsvValidationException e) { + throw new RuntimeException(e); } } diff --git a/batch-tool/src/main/java/worker/export/BaseExportWorker.java b/batch-tool/src/main/java/worker/export/BaseExportWorker.java index dfa7da7..1f1462c 100644 --- a/batch-tool/src/main/java/worker/export/BaseExportWorker.java +++ b/batch-tool/src/main/java/worker/export/BaseExportWorker.java @@ -16,6 +16,7 @@ package worker.export; +import com.alibaba.fastjson2.JSONObject; import model.config.CompressMode; import model.config.FileFormat; import model.config.GlobalVar; @@ -24,6 +25,8 @@ import model.db.TableFieldMetaInfo; import model.db.TableTopology; import model.mask.AbstractDataMasker; +import model.mask.DataMaskerFactory; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import util.DataSourceUtil; @@ -38,6 +41,7 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -57,7 +61,7 @@ public abstract class BaseExportWorker implements Runnable { protected final List isStringTypeList; - protected Map columnDataMasker; + protected List columnDataMaskerList; protected ByteArrayOutputStream os; protected int bufferedRowNum = 0; // 已经缓存的行数 @@ -158,6 +162,9 @@ protected void afterProduceData() { * @param columnIdx 从 0 开始 */ protected void writeFieldValue(ByteArrayOutputStream os, byte[] value, int columnIdx) throws IOException { + if (columnDataMaskerList != null && columnDataMaskerList.get(columnIdx) != null) { + value = columnDataMaskerList.get(columnIdx).doMask(value); + } boolean isStringType = isStringTypeList.get(columnIdx); switch (quoteEncloseMode) { case NONE: @@ -187,7 +194,28 @@ public void setCompressMode(CompressMode compressMode) { this.compressMode = compressMode; } - public void setColumnDataMasker(Map columnDataMasker) { - this.columnDataMasker = columnDataMasker; + public void putDataMaskerMap(Map columnMaskerMap) { + if (columnMaskerMap == null || columnMaskerMap.isEmpty()) { + return; + } + if (this.columnDataMaskerList == null) { + this.columnDataMaskerList = new ArrayList<>(Collections.nCopies( + tableFieldMetaInfo.getFieldMetaInfoList().size(), null)); + } + for (Map.Entry columnMasker : columnMaskerMap.entrySet()) { + AbstractDataMasker masker = DataMaskerFactory.getDataMasker(columnMasker.getValue()); + this.putDataMasker(columnMasker.getKey(), masker); + } + } + + private void putDataMasker(String columnName, AbstractDataMasker dataMasker) { + List fieldMetaInfoList = tableFieldMetaInfo.getFieldMetaInfoList(); + for (int i = 0; i < fieldMetaInfoList.size(); i++) { + if (StringUtils.equalsIgnoreCase(columnName, fieldMetaInfoList.get(i).getName())) { + this.columnDataMaskerList.set(i, dataMasker); + return; + } + } + throw new IllegalArgumentException("Unknown mask column: " + columnName); } } diff --git a/batch-tool/src/main/java/worker/export/ExportProducer.java b/batch-tool/src/main/java/worker/export/ExportProducer.java index 5adaaaa..ad5563a 100644 --- a/batch-tool/src/main/java/worker/export/ExportProducer.java +++ b/batch-tool/src/main/java/worker/export/ExportProducer.java @@ -103,8 +103,8 @@ protected void dealWithRemainData() { @Override protected String getExportSql() { - List metaInfoList = tableFieldMetaInfo.getFieldMetaInfoList(); - return ExportUtil.getDirectSql(topology, metaInfoList, whereCondition); + return ExportUtil.getDirectSql(topology, tableFieldMetaInfo.getFieldMetaInfoList(), + whereCondition); } @Override diff --git a/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java b/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java index 44ea7ef..6dc2932 100644 --- a/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java +++ b/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java @@ -63,6 +63,7 @@ public static DirectExportWorker buildDefaultDirectExportWorker(DataSource druid throw new UnsupportedOperationException("Do not support direct export when fixed file num"); } directExportWorker.setWhereCondition(config.getWhereCondition()); + directExportWorker.putDataMaskerMap(config.getColumnMaskerConfigMap()); return directExportWorker; } diff --git a/batch-tool/src/main/java/worker/insert/DirectImportWorker.java b/batch-tool/src/main/java/worker/insert/DirectImportWorker.java index 2fa8a88..cc457fd 100644 --- a/batch-tool/src/main/java/worker/insert/DirectImportWorker.java +++ b/batch-tool/src/main/java/worker/insert/DirectImportWorker.java @@ -20,6 +20,7 @@ import com.opencsv.CSVParserBuilder; import com.opencsv.CSVReader; import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvValidationException; import exception.DatabaseException; import model.ConsumerExecutionContext; import model.ProducerExecutionContext; @@ -139,6 +140,9 @@ public void run() { } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); + } catch (CsvValidationException e) { + logger.error("CSV format invalid {} at line: {}", e.getMessage(), curLine); + throw new RuntimeException(e); } } } diff --git a/batch-tool/src/test/java/preprocess/MaskingTest.java b/batch-tool/src/test/java/preprocess/MaskingTest.java new file mode 100644 index 0000000..86ff7cf --- /dev/null +++ b/batch-tool/src/test/java/preprocess/MaskingTest.java @@ -0,0 +1,136 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package preprocess; + +import model.mask.AbstractDataMasker; +import model.mask.HashMasker; +import model.mask.HidingMasker; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; + +import java.util.Arrays; + +@RunWith(Enclosed.class) +public class MaskingTest { + + public static class HidingMaskTest { + private HidingMasker masker; + + @Before + public void before() { + this.masker = new HidingMasker(); + } + + @Test + public void testRegionMarking() { + String[] input = { + "", + "zxcv", + "zxcv1234", + "中文zxc你好4", + }; + String[] output = { + "", + "zx**", + "zx******", + "中文******", + }; + masker.setShowRegions("0-1"); + checkMaskingResult(input, output, masker); + } + + @Test + public void testRegionMarkingWithEnd() { + String[] input = { + "", + "zxcv", + "zxcv1234", + "中文zxc你好", + }; + String[] output = { + "", + "*xcv", + "*****234", + "****c你好", + }; + masker.setShowEndRegion(3); + checkMaskingResult(input, output, masker); + } + + @Test + public void testMultiRegionMarking() { + String[] input = { + "", + "zxcv", + "zxcv1234", + "中文zxc你好4123", + "中文zxc你好4123中文", + }; + String[] output = { + "", + "zxcv", + "zxc*1234", + "中文z*c你***23", + "中文z*c你*****中文", + }; + masker.setShowRegions("0-2,4-5"); + masker.setShowEndRegion(2); + checkMaskingResult(input, output, masker); + } + } + + public static class HashMaskTest { + private HashMasker masker; + + @Before + public void before() { + this.masker = new HashMasker(); + } + + @Test + public void testHashWithSalt() { + masker.setSalt("abc"); + byte[] input1 = "zxcvbn中文".getBytes(); + byte[] input2 = "zxcvb中文".getBytes(); + + byte[] output1 = masker.doMask(input1); + byte[] output2 = masker.doMask(input2); + Assert.assertFalse(Arrays.equals(output1, output2)); + Assert.assertArrayEquals(masker.doMask(input1), output1); + + HashMasker masker2 = new HashMasker(); + masker2.setSalt("def"); + Assert.assertFalse(Arrays.equals( + masker.doMask(input1), masker2.doMask(input1))); + } + + } + + private static void checkMaskingResult(String[] input, String[] output, AbstractDataMasker masker) { + Assert.assertEquals("output count does not match", input.length, + output.length); + for (int i = 0; i < input.length; i++) { + byte[] inputBytes = input[i].getBytes(); + byte[] outPutBytes = masker.doMask(inputBytes); + String outputStr = new String(outPutBytes); + Assert.assertEquals(output[i], outputStr); + } + } +} From 817f9dd185c2666d75e3b5acdb81b546a27206c0 Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Fri, 26 Aug 2022 10:31:11 +0800 Subject: [PATCH 05/12] 1. refactor command line arg parsing 2. fix export with option -lastSep 3. WIP: support yaml config file --- batch-tool/docs/usage-details.md | 3 + .../src/main/java/BatchToolLauncher.java | 6 +- .../java/cmd/CommandLineConfigResult.java | 38 ++ batch-tool/src/main/java/cmd/CommandUtil.java | 520 ++++-------------- .../src/main/java/cmd/ConfigArgOption.java | 160 ++++++ .../src/main/java/cmd/ConfigResult.java | 24 + .../src/main/java/cmd/YamlConfigResult.java | 38 ++ .../main/java/model/config/BaseConfig.java | 10 + .../java/model/config/ConfigConstant.java | 55 -- .../java/model/config/EncryptionMode.java | 2 +- .../java/worker/export/BaseExportWorker.java | 10 + .../worker/factory/ExportWorkerFactory.java | 17 +- .../src/main/java/worker/util/ExportUtil.java | 5 +- 13 files changed, 410 insertions(+), 478 deletions(-) create mode 100644 batch-tool/src/main/java/cmd/CommandLineConfigResult.java create mode 100644 batch-tool/src/main/java/cmd/ConfigArgOption.java create mode 100644 batch-tool/src/main/java/cmd/ConfigResult.java create mode 100644 batch-tool/src/main/java/cmd/YamlConfigResult.java diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index f2ff71e..81c332b 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -71,3 +71,6 @@ ### 导入Excel文件 `-D sbtest_auto -o import -s , -t "sbtest1" -format XLSX -f "sbtest1_0.xlsx"` + +# 报错排查 +1. \ No newline at end of file diff --git a/batch-tool/src/main/java/BatchToolLauncher.java b/batch-tool/src/main/java/BatchToolLauncher.java index dfa9e6c..583af2c 100644 --- a/batch-tool/src/main/java/BatchToolLauncher.java +++ b/batch-tool/src/main/java/BatchToolLauncher.java @@ -16,8 +16,8 @@ import cmd.BaseOperateCommand; import cmd.CommandUtil; +import cmd.ConfigResult; import datasource.DataSourceConfig; -import org.apache.commons.cli.CommandLine; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,7 +34,7 @@ public static void main(String[] args) { return; } - CommandLine commandLine = CommandUtil.parseStartUpCommand(args); + ConfigResult commandLine = CommandUtil.parseStartUpCommand(args); if (commandLine == null || CommandUtil.doHelpCmd(commandLine)) { return; } @@ -49,7 +49,7 @@ public static void main(String[] args) { } - private static void handleCmd(CommandLine commandLine) throws SQLException { + private static void handleCmd(ConfigResult commandLine) throws SQLException { DataSourceConfig dataSourceConfig; try { dataSourceConfig = CommandUtil.getDataSourceConfigFromCmd(commandLine); diff --git a/batch-tool/src/main/java/cmd/CommandLineConfigResult.java b/batch-tool/src/main/java/cmd/CommandLineConfigResult.java new file mode 100644 index 0000000..022b2fe --- /dev/null +++ b/batch-tool/src/main/java/cmd/CommandLineConfigResult.java @@ -0,0 +1,38 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd; + +import org.apache.commons.cli.CommandLine; + +public class CommandLineConfigResult implements ConfigResult { + + private final CommandLine commandLine; + + public CommandLineConfigResult(CommandLine commandLine) { + this.commandLine = commandLine; + } + + @Override + public boolean hasOption(ConfigArgOption option) { + return commandLine.hasOption(option.argShort); + } + + @Override + public String getOptionValue(ConfigArgOption option) { + return commandLine.getOptionValue(option.argShort); + } +} diff --git a/batch-tool/src/main/java/cmd/CommandUtil.java b/batch-tool/src/main/java/cmd/CommandUtil.java index e88a091..0900ade 100644 --- a/batch-tool/src/main/java/cmd/CommandUtil.java +++ b/batch-tool/src/main/java/cmd/CommandUtil.java @@ -45,6 +45,8 @@ import util.FileUtil; import util.Version; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; import java.nio.charset.Charset; import java.util.Arrays; import java.util.HashMap; @@ -52,6 +54,7 @@ import java.util.Map; import java.util.stream.Collectors; +import static cmd.ConfigArgOption.*; import static model.config.ConfigConstant.*; /** @@ -64,45 +67,67 @@ public class CommandUtil { private static final Options options = new Options(); static { - addConnectDbOptions(options); - addBatchOperationOptions(options); - // 添加帮助选项 -? --help - options.addOption(Option.builder(ARG_SHORT_HELP) - .longOpt("help") - .desc("Help message.") - .build()); - // 添加版本信息 -v --version - options.addOption(Option.builder(ARG_SHORT_VERSION) - .longOpt("version") - .desc("Show version") - .build()); + formatter.setWidth(110); + addCommandOptions(); + } + + private static void addCommandOptions() { + Field[] fields = ConfigArgOption.class.getFields(); + try { + for (Field field : fields) { + if (Modifier.isStatic(field.getModifiers()) + && field.getType() == ConfigArgOption.class) { + ConfigArgOption option = (ConfigArgOption) field.get(ConfigArgOption.class); + addConfigOption(option); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static void addConfigOption(ConfigArgOption option) { + Option.Builder builder = Option.builder(option.argShort) + .longOpt(option.argLong) + .desc(option.desc); + if (option.hasArg()) { + builder.hasArg().argName(option.argName); + } + + options.addOption(builder.build()); } /** * 解析程序启动参数 */ - public static CommandLine parseStartUpCommand(String[] args) { + public static ConfigResult parseStartUpCommand(String[] args) { CommandLineParser parser = new DefaultParser(); - CommandLine result = null; + CommandLine commandLine = null; try { // 开始解析命令行参数 - result = parser.parse(options, args); + commandLine = parser.parse(options, args); } catch (ParseException e) { logger.error(e.getMessage()); printHelp(); + return null; + } + + if (commandLine.hasOption(ARG_SHORT_CONFIG_FILE.argShort)) { + return new YamlConfigResult(commandLine.getOptionValue(ARG_SHORT_CONFIG_FILE.argShort), commandLine); + } else { + return new CommandLineConfigResult(commandLine); } - return result; } //region 数据源设置 - private static void validateDataSourceArgs(CommandLine result) { + private static void validateDataSourceArgs(ConfigResult result) { requireArg(result, ARG_SHORT_HOST); requireArg(result, ARG_SHORT_USERNAME); requireArg(result, ARG_SHORT_PASSWORD); requireArg(result, ARG_SHORT_DBNAME); } - public static DataSourceConfig getDataSourceConfigFromCmd(CommandLine result) { + public static DataSourceConfig getDataSourceConfigFromCmd(ConfigResult result) { validateDataSourceArgs(result); // 判断是否使用负载均衡方式访问 @@ -127,7 +152,7 @@ public static DataSourceConfig getDataSourceConfigFromCmd(CommandLine result) { return configBuilder.build(); } - private static int getMaxWait(CommandLine result) { + private static int getMaxWait(ConfigResult result) { if (result.hasOption(ARG_SHORT_MAX_WAIT)) { return Integer.parseInt(result.getOptionValue(ARG_SHORT_MAX_WAIT)); } else { @@ -135,7 +160,7 @@ private static int getMaxWait(CommandLine result) { } } - private static int getMaxConnNum(CommandLine result) { + private static int getMaxConnNum(ConfigResult result) { if (result.hasOption(ARG_SHORT_MAX_CONN_NUM)) { return Integer.parseInt(result.getOptionValue(ARG_SHORT_MAX_CONN_NUM)); } else { @@ -143,7 +168,7 @@ private static int getMaxConnNum(CommandLine result) { } } - private static int getMinConnNum(CommandLine result) { + private static int getMinConnNum(ConfigResult result) { if (result.hasOption(ARG_SHORT_MIN_CONN_NUM)) { return Integer.parseInt(result.getOptionValue(ARG_SHORT_MIN_CONN_NUM)); } else { @@ -151,14 +176,14 @@ private static int getMinConnNum(CommandLine result) { } } - private static String getConnParam(CommandLine result) { + private static String getConnParam(ConfigResult result) { if (result.hasOption(ARG_SHORT_CONN_PARAM)) { return result.getOptionValue(ARG_SHORT_CONN_PARAM); } return null; } - private static String getInitSqls(CommandLine result) { + private static String getInitSqls(ConfigResult result) { if (result.hasOption(ARG_SHORT_CONN_INIT_SQL)) { return result.getOptionValue(ARG_SHORT_CONN_INIT_SQL); } @@ -167,20 +192,20 @@ private static String getInitSqls(CommandLine result) { //endregion 数据源设置 //region 批处理命令解析 - public static BaseOperateCommand getOperateCommandFromCmd(CommandLine result) { + public static BaseOperateCommand getOperateCommandFromCmd(ConfigResult result) { validateOperateArgs(result); BaseOperateCommand command = initCommand(result); afterInitCommand(command, result); return command; } - private static void validateOperateArgs(CommandLine result) { + private static void validateOperateArgs(ConfigResult result) { requireArg(result, ARG_SHORT_OPERATION); requireArg(result, ARG_SHORT_SEP); requireArg(result, ARG_SHORT_DBNAME); } - private static BaseOperateCommand initCommand(CommandLine result) { + private static BaseOperateCommand initCommand(ConfigResult result) { // 获取命令类型 String commandTypeStr = result.getOptionValue(ARG_SHORT_OPERATION); CommandType commandType = CommandUtil.lookup(commandTypeStr); @@ -206,32 +231,32 @@ private static BaseOperateCommand initCommand(CommandLine result) { return command; } - private static void afterInitCommand(BaseOperateCommand command, CommandLine result) { + private static void afterInitCommand(BaseOperateCommand command, ConfigResult result) { if (result.hasOption(ARG_SHORT_ENABLE_SHARDING)) { boolean shardingEnabled = parseFlag(result.getOptionValue(ARG_SHORT_ENABLE_SHARDING)); command.setShardingEnabled(shardingEnabled); } } - private static List getTableNames(CommandLine result) { + private static List getTableNames(ConfigResult result) { if (!result.hasOption(ARG_SHORT_TABLE)) { return null; } String tableNameStr = result.getOptionValue(ARG_SHORT_TABLE); return Lists.newArrayList( - StringUtils.split(tableNameStr, ConfigConstant.CMD_SEPARATOR)); + StringUtils.split(tableNameStr, CMD_SEPARATOR)); } - private static List getColumnNames(CommandLine result) { + private static List getColumnNames(ConfigResult result) { if (!result.hasOption(ARG_SHORT_COLUMNS)) { return null; } String columnNameStr = result.getOptionValue(ARG_SHORT_COLUMNS); return Lists.newArrayList( - StringUtils.split(columnNameStr, ConfigConstant.CMD_SEPARATOR)); + StringUtils.split(columnNameStr, CMD_SEPARATOR)); } - private static BaseOperateCommand parseImportCommand(CommandLine result) { + private static BaseOperateCommand parseImportCommand(ConfigResult result) { requireOnlyOneArg(result, ARG_SHORT_FROM_FILE, ARG_SHORT_DIRECTORY); ProducerExecutionContext producerExecutionContext = new ProducerExecutionContext(); @@ -241,7 +266,7 @@ private static BaseOperateCommand parseImportCommand(CommandLine result) { return new ImportCommand(getDbName(result), producerExecutionContext, consumerExecutionContext); } - private static BaseOperateCommand parseDeleteCommand(CommandLine result) { + private static BaseOperateCommand parseDeleteCommand(ConfigResult result) { requireOnlyOneArg(result, ARG_SHORT_FROM_FILE, ARG_SHORT_DIRECTORY); ProducerExecutionContext producerExecutionContext = new ProducerExecutionContext(); @@ -252,7 +277,7 @@ private static BaseOperateCommand parseDeleteCommand(CommandLine result) { return new DeleteCommand(getDbName(result), producerExecutionContext, consumerExecutionContext); } - private static BaseOperateCommand parseUpdateCommand(CommandLine result) { + private static BaseOperateCommand parseUpdateCommand(ConfigResult result) { requireOnlyOneArg(result, ARG_SHORT_FROM_FILE, ARG_SHORT_DIRECTORY); ProducerExecutionContext producerExecutionContext = new ProducerExecutionContext(); @@ -266,11 +291,11 @@ private static BaseOperateCommand parseUpdateCommand(CommandLine result) { //endregion 批处理命令解析 //region 读写文件相关配置 - private static String getSep(CommandLine result) { + private static String getSep(ConfigResult result) { return result.getOptionValue(ARG_SHORT_SEP); } - private static Charset getCharset(CommandLine result) { + private static Charset getCharset(ConfigResult result) { if (result.hasOption(ARG_SHORT_CHARSET)) { String charset = result.getOptionValue(ARG_SHORT_CHARSET); return Charset.forName(charset); @@ -279,11 +304,11 @@ private static Charset getCharset(CommandLine result) { } } - private static boolean getWithHeader(CommandLine result) { + private static boolean getWithHeader(ConfigResult result) { return result.hasOption(ARG_SHORT_WITH_HEADER); } - private static CompressMode getCompressMode(CommandLine result) { + private static CompressMode getCompressMode(ConfigResult result) { if (result.hasOption(ARG_SHORT_COMPRESS)) { return CompressMode.fromString(result.getOptionValue(ARG_SHORT_COMPRESS)); } else { @@ -291,7 +316,7 @@ private static CompressMode getCompressMode(CommandLine result) { } } - private static EncryptionConfig getEncryptionConfig(CommandLine result) { + private static EncryptionConfig getEncryptionConfig(ConfigResult result) { if (result.hasOption(ARG_SHORT_ENCRYPTION)) { String encryptionMode = result.getOptionValue(ARG_SHORT_ENCRYPTION); String key = result.getOptionValue(ARG_SHORT_KEY); @@ -301,7 +326,7 @@ private static EncryptionConfig getEncryptionConfig(CommandLine result) { } } - private static int getReadBlockSizeInMb(CommandLine result) { + private static int getReadBlockSizeInMb(ConfigResult result) { if (result.hasOption(ARG_SHORT_READ_BLOCK_SIZE)) { return Integer.parseInt( result.getOptionValue(ARG_SHORT_READ_BLOCK_SIZE)); @@ -310,14 +335,11 @@ private static int getReadBlockSizeInMb(CommandLine result) { } } - private static boolean getWithLastSep(CommandLine result) { + private static boolean getWithLastSep(ConfigResult result) { return result.hasOption(ARG_SHORT_WITH_LAST_SEP); } - /** - * TODO 文件格式、压缩格式、加密模式三者的设置冲突解决 - */ - private static FileFormat getFileFormat(CommandLine result) { + private static FileFormat getFileFormat(ConfigResult result) { if (result.hasOption(ARG_SHORT_FILE_FORMAT)) { String fileFormat = result.getOptionValue(ARG_SHORT_FILE_FORMAT); return FileFormat.fromString(fileFormat); @@ -328,7 +350,7 @@ private static FileFormat getFileFormat(CommandLine result) { //endregion 读写文件相关配置 //region 导出相关设置 - private static ExportCommand parseExportCommand(CommandLine result) { + private static ExportCommand parseExportCommand(ConfigResult result) { List tableNames = getTableNames(result); ExportConfig exportConfig = new ExportConfig(); exportConfig.setCharset(getCharset(result)); @@ -341,6 +363,7 @@ private static ExportCommand parseExportCommand(CommandLine result) { exportConfig.setCompressMode(getCompressMode(result)); exportConfig.setParallelism(getProducerParallelism(result)); exportConfig.setQuoteEncloseMode(getQuoteEncloseMode(result)); + exportConfig.setWithLastSep(getWithLastSep(result)); setFilenamePrefix(result, exportConfig); setFileNum(result, exportConfig); setFileLine(result, exportConfig); @@ -350,7 +373,7 @@ private static ExportCommand parseExportCommand(CommandLine result) { return new ExportCommand(getDbName(result), tableNames, exportConfig); } - private static void setFilenamePrefix(CommandLine result, ExportConfig exportConfig) { + private static void setFilenamePrefix(ConfigResult result, ExportConfig exportConfig) { if (result.hasOption(ARG_SHORT_PREFIX)) { exportConfig.setFilenamePrefix(result.getOptionValue(ARG_SHORT_PREFIX)); } else { @@ -358,7 +381,7 @@ private static void setFilenamePrefix(CommandLine result, ExportConfig exportCon } } - private static void setOrderBy(CommandLine result, ExportConfig exportConfig) { + private static void setOrderBy(ConfigResult result, ExportConfig exportConfig) { if (result.hasOption(ARG_SHORT_ORDER)) { if (!result.hasOption(ARG_SHORT_ORDER_COLUMN)) { throw new IllegalArgumentException("Order column name cannot be empty"); @@ -375,7 +398,7 @@ private static void setOrderBy(CommandLine result, ExportConfig exportConfig) { } } - private static void setColumnMaskerMap(CommandLine result, ExportConfig exportConfig) { + private static void setColumnMaskerMap(ConfigResult result, ExportConfig exportConfig) { if (result.hasOption(ARG_SHORT_MASK)) { String maskConfigStr = result.getOptionValue(ARG_SHORT_MASK); JSONObject maskConfig; @@ -393,7 +416,7 @@ private static void setColumnMaskerMap(CommandLine result, ExportConfig exportCo } } - private static void setFileLine(CommandLine result, ExportConfig exportConfig) { + private static void setFileLine(ConfigResult result, ExportConfig exportConfig) { if (result.hasOption(ARG_SHORT_LINE)) { if (exportConfig.getExportWay() != ExportConfig.ExportWay.DEFAULT) { // 只能指定一个导出方式 @@ -404,7 +427,7 @@ private static void setFileLine(CommandLine result, ExportConfig exportConfig) { } } - private static void setFileNum(CommandLine result, ExportConfig exportConfig) { + private static void setFileNum(ConfigResult result, ExportConfig exportConfig) { if (result.hasOption(ARG_SHORT_FILE_NUM)) { exportConfig.setExportWay(ExportConfig.ExportWay.FIXED_FILE_NUM); exportConfig.setFixedFileNum(Integer.parseInt(result.getOptionValue(ARG_SHORT_FILE_NUM))); @@ -414,7 +437,7 @@ private static void setFileNum(CommandLine result, ExportConfig exportConfig) { } } - private static boolean getParaMerge(CommandLine result) { + private static boolean getParaMerge(ConfigResult result) { return result.hasOption(ARG_SHORT_PARALLEL_MERGE); } //endregion 导出相关设置 @@ -424,7 +447,7 @@ private static boolean getParaMerge(CommandLine result) { * 主要针对插入/更新/删除 * 配置公共的上下文执行环境 */ - private static void configureCommonContext(CommandLine result, + private static void configureCommonContext(ConfigResult result, ProducerExecutionContext producerExecutionContext, ConsumerExecutionContext consumerExecutionContext) { configureGlobalVar(result); @@ -435,7 +458,7 @@ private static void configureCommonContext(CommandLine result, /** * 设置全局可调参数 */ - private static void configureGlobalVar(CommandLine result) { + private static void configureGlobalVar(ConfigResult result) { setBatchSize(result); setRingBufferSize(result); setPerfMode(result); @@ -444,7 +467,7 @@ private static void configureGlobalVar(CommandLine result) { /** * 配置生产者 */ - private static void configureProducerContext(CommandLine result, + private static void configureProducerContext(ConfigResult result, ProducerExecutionContext producerExecutionContext) { producerExecutionContext.setCharset(getCharset(result)); producerExecutionContext.setSeparator(getSep(result)); @@ -467,7 +490,7 @@ private static void configureProducerContext(CommandLine result, /** * 配置消费者 */ - private static void configureConsumerContext(CommandLine result, + private static void configureConsumerContext(ConfigResult result, ConsumerExecutionContext consumerExecutionContext) { consumerExecutionContext.setCharset(getCharset(result)); consumerExecutionContext.setSeparator(getSep(result)); @@ -485,7 +508,7 @@ private static void configureConsumerContext(CommandLine result, consumerExecutionContext.validate(); } - private static String getUseColumns(CommandLine result) { + private static String getUseColumns(ConfigResult result) { List columnNames = getColumnNames(result); if (columnNames == null) { return null; @@ -493,19 +516,19 @@ private static String getUseColumns(CommandLine result) { return StringUtils.join(columnNames, ","); } - private static boolean getWhereInEnabled(CommandLine result) { + private static boolean getWhereInEnabled(ConfigResult result) { return result.hasOption(ARG_SHORT_USING_IN); } - private static boolean getReadAndProcessFileOnly(CommandLine result) { + private static boolean getReadAndProcessFileOnly(ConfigResult result) { return result.hasOption(ARG_SHORT_READ_FILE_ONLY); } - private static String getDbName(CommandLine result) { + private static String getDbName(ConfigResult result) { return result.getOptionValue(ARG_SHORT_DBNAME); } - private static int getConsumerParallelism(CommandLine result) { + private static int getConsumerParallelism(ConfigResult result) { if (result.hasOption(ARG_SHORT_CONSUMER)) { int parallelism = Integer.parseInt(result.getOptionValue(ARG_SHORT_CONSUMER)); if (parallelism <= 0) { @@ -517,7 +540,7 @@ private static int getConsumerParallelism(CommandLine result) { } } - private static int getProducerParallelism(CommandLine result) { + private static int getProducerParallelism(ConfigResult result) { if (result.hasOption(ARG_SHORT_PRODUCER)) { int parallelism = Integer.parseInt(result.getOptionValue(ARG_SHORT_PRODUCER)); if (parallelism <= 0) { @@ -529,7 +552,7 @@ private static int getProducerParallelism(CommandLine result) { } } - private static QuoteEncloseMode getQuoteEncloseMode(CommandLine result) { + private static QuoteEncloseMode getQuoteEncloseMode(ConfigResult result) { if (result.hasOption(ARG_SHORT_QUOTE_ENCLOSE_MODE)) { return QuoteEncloseMode.parseMode(result.getOptionValue(ARG_SHORT_QUOTE_ENCLOSE_MODE)); } else { @@ -537,7 +560,7 @@ private static QuoteEncloseMode getQuoteEncloseMode(CommandLine result) { } } - private static boolean getForceParallelism(CommandLine result) { + private static boolean getForceParallelism(ConfigResult result) { if (result.hasOption(ARG_SHORT_FORCE_CONSUMER)) { return Boolean.parseBoolean(result.getOptionValue(ARG_SHORT_FORCE_CONSUMER)); } else { @@ -549,7 +572,7 @@ private static boolean getForceParallelism(CommandLine result) { * 解析文件路径与行号 * 并检测文件是否存在 */ - private static List getFileRecordList(CommandLine result) { + private static List getFileRecordList(ConfigResult result) { if (result.hasOption(ARG_SHORT_FROM_FILE)) { String filePathListStr = result.getOptionValue(ARG_SHORT_FROM_FILE); return Arrays.stream(StringUtils.split(filePathListStr, CMD_SEPARATOR)) @@ -575,7 +598,7 @@ private static List getFileRecordList(CommandLine result) { throw new IllegalStateException("cannot get file path list"); } - private static int getTpsLimit(CommandLine result) { + private static int getTpsLimit(ConfigResult result) { if (result.hasOption(ARG_SHORT_TPS_LIMIT)) { int tpsLimit = Integer.parseInt(result.getOptionValue(ARG_SHORT_TPS_LIMIT)); if (tpsLimit <= 0) { @@ -587,18 +610,18 @@ private static int getTpsLimit(CommandLine result) { } } - private static boolean getInsertIgnoreAndResumeEnabled(CommandLine result) { + private static boolean getInsertIgnoreAndResumeEnabled(ConfigResult result) { return result.hasOption(ARG_SHORT_IGNORE_AND_RESUME); } - private static DdlMode getDdlMode(CommandLine result) { + private static DdlMode getDdlMode(ConfigResult result) { if (!result.hasOption(ARG_SHORT_WITH_DDL)) { return DdlMode.NO_DDL; } return DdlMode.fromString(result.getOptionValue(ARG_SHORT_WITH_DDL)); } - private static int getMaxErrorCount(CommandLine result) { + private static int getMaxErrorCount(ConfigResult result) { if (result.hasOption(ARG_SHORT_MAX_ERROR)) { return Integer.parseInt(result.getOptionValue(ARG_SHORT_MAX_ERROR)); } else { @@ -606,7 +629,7 @@ private static int getMaxErrorCount(CommandLine result) { } } - private static String getHistoryFile(CommandLine result) { + private static String getHistoryFile(ConfigResult result) { if (result.hasOption(ARG_SHORT_HISTORY_FILE)) { return result.getOptionValue(ARG_SHORT_HISTORY_FILE); } else { @@ -614,21 +637,21 @@ private static String getHistoryFile(CommandLine result) { } } - private static String getWhereCondition(CommandLine result) { + private static String getWhereCondition(ConfigResult result) { return result.getOptionValue(ARG_SHORT_WHERE); } - private static boolean getSqlEscapeEnabled(CommandLine result) { + private static boolean getSqlEscapeEnabled(ConfigResult result) { return !result.hasOption(ARG_SHORT_NO_ESCAPE); } - private static boolean getFuncEnabled(CommandLine result) { + private static boolean getFuncEnabled(ConfigResult result) { return result.hasOption(ARG_SHORT_SQL_FUNC); } //endregion 写入数据库操作的设置 //region 全局相关设置 - private static void setRingBufferSize(CommandLine result) { + private static void setRingBufferSize(ConfigResult result) { if (result.hasOption(ARG_SHORT_RING_BUFFER_SIZE)) { int size = Integer.parseInt(result.getOptionValue(ARG_SHORT_RING_BUFFER_SIZE)); if (Integer.bitCount(size) != 1) { @@ -638,14 +661,14 @@ private static void setRingBufferSize(CommandLine result) { } } - private static void setBatchSize(CommandLine result) { + private static void setBatchSize(ConfigResult result) { if (result.hasOption(ARG_SHORT_BATCH_SIZE)) { GlobalVar.EMIT_BATCH_SIZE = Integer.parseInt( result.getOptionValue(ARG_SHORT_BATCH_SIZE)); } } - private static void setPerfMode(CommandLine result) { + private static void setPerfMode(ConfigResult result) { GlobalVar.IN_PERF_MODE = result.hasOption(ARG_SHORT_PERF_MODE); } //endregion 全局相关设置 @@ -655,7 +678,7 @@ private static void setPerfMode(CommandLine result) { * 保证命令有参数 argShort * 否则抛出异常 */ - private static void requireArg(CommandLine result, String argShort) { + private static void requireArg(ConfigResult result, ConfigArgOption argShort) { if (!result.hasOption(argShort)) { throw new IllegalArgumentException("Missing required argument: " + argShort); } @@ -664,9 +687,9 @@ private static void requireArg(CommandLine result, String argShort) { /** * 有且仅有其中一个参数 */ - private static void requireOnlyOneArg(CommandLine result, String ... argsShort) { + private static void requireOnlyOneArg(ConfigResult result, ConfigArgOption ... argsShort) { boolean contains = false; - for (String arg : argsShort) { + for (ConfigArgOption arg : argsShort) { if (result.hasOption(arg)) { if (contains) { throw new IllegalArgumentException("can only exists one of these arguments: " + StringUtils.join(argsShort, ", ")); @@ -680,336 +703,11 @@ private static void requireOnlyOneArg(CommandLine result, String ... argsShort) } } - private static void addBatchOperationOptions(Options options) { - // 添加批量操作选项 -o --operation - options.addOption(Option.builder(ARG_SHORT_OPERATION) - .longOpt("operation") - .hasArg() - .argName("operation") - .desc("Batch operation type: export / import / delete / update.") - .build()); - // 添加待操作数据表选项 -t --table - options.addOption(Option.builder(ARG_SHORT_TABLE) - .longOpt("table") - .hasArg() - .argName("table") - .desc("Target table.") - .build()); - // 添加分隔符选项 -s --sep - options.addOption(Option.builder(ARG_SHORT_SEP) - .longOpt("sep") - .hasArg() - .argName("sep") - .desc("Separator between fields (delimiter).") - .build()); - // 添加文件名前缀选项 -pre --prefix - options.addOption(Option.builder(ARG_SHORT_PREFIX) - .longOpt("prefix") - .hasArg() - .argName("prefix") - .desc("Export file name prefix.") - .build()); - // 添加文件源选项 -f --file - options.addOption(Option.builder(ARG_SHORT_FROM_FILE) - .longOpt("file") - .hasArg() - .argName("file path") - .desc("Source file(s), separated by ; .") - .build()); - // 添加导出文件行数限制选项 -L --line - options.addOption(Option.builder(ARG_SHORT_LINE) - .longOpt("line") - .hasArg() - .argName("line") - .desc("Max line limit of exported files.") - .build()); - // 添加导出文件个数限制选项 -F --filenum - options.addOption(Option.builder(ARG_SHORT_FILE_NUM) - .longOpt("filenum") - .hasArg() - .argName("filenum") - .desc("Fixed number of exported files.") - .build()); - // 添加导出where条件选项 -w --where - options.addOption(Option.builder(ARG_SHORT_WHERE) - .longOpt("where") - .hasArg() - .argName("where") - .desc("Where condition: col1>99 AND col2<100 ...") - .build()); - // 添加insert ignore开关选项 -i --ignore - options.addOption(Option.builder(ARG_SHORT_IGNORE_AND_RESUME) - .longOpt("ignoreandresume") - .argName("ignore") - .desc("Flag of insert ignore and resume breakpoint.") - .build()); - // 添加historyfile文件名配置 -H --historyfile - options.addOption(Option.builder(ARG_SHORT_HISTORY_FILE) - .longOpt("historyFile") - .hasArg() - .argName("history file name") - .desc("Configure of historyfile name.") - .build()); - // 添加限流配置 - options.addOption(Option.builder(ARG_SHORT_TPS_LIMIT) - .longOpt("tpsLimit") - .hasArg() - .argName("tps limit") - .desc("Configure of tps limit, default -1: no limit.") - .build()); - // 添加生产者线程数选项 - options.addOption(Option.builder(ARG_SHORT_PRODUCER) - .longOpt("producer") - .hasArg() - .argName("producer count") - .desc("Configure number of producer threads (export / import).") - .build()); - // 添加消费者者线程数选项 - options.addOption(Option.builder(ARG_SHORT_CONSUMER) - .longOpt("consumer") - .hasArg() - .argName("consumer count") - .desc("Configure number of consumer threads.") - .build()); - options.addOption(Option.builder(ARG_SHORT_FORCE_CONSUMER) - .longOpt("force consumer") - .hasArg() - .argName("use force consumer") - .desc("Configure if allow force consumer parallelism.") - .build()); - // 添加只读取文件并处理选项 - options.addOption(Option.builder(ARG_SHORT_READ_FILE_ONLY) - .longOpt("rfonly") - .desc("Only read and process file, no sql execution.") - .build()); - // 添加只读取文件并处理选项 - options.addOption(Option.builder(ARG_SHORT_USING_IN) - .longOpt("wherein") - .desc("Using where ... in (...)") - .build()); - // 添加每行最后以分隔符结尾开关选项 - options.addOption(Option.builder(ARG_SHORT_WITH_LAST_SEP) - .longOpt("withLastSep") - .desc("Whether line ends with separator.") - .build()); - // 添加并行归并选项 - options.addOption(Option.builder(ARG_SHORT_PARALLEL_MERGE) - .longOpt("paraMerge") - .desc("Using parallel merge when doing order by export.") - .build()); - // 添加header是否为字段名选项 - options.addOption(Option.builder(ARG_SHORT_WITH_HEADER) - .longOpt("header") - .desc("Whether the header line is column names.") - .build()); - // 添加引号转义模式 - options.addOption(Option.builder(ARG_SHORT_QUOTE_ENCLOSE_MODE) - .longOpt("quoteMode") - .hasArg() - .argName("auto/force/none") - .desc("The mode of how field values are enclosed by double-quotes when exporting table." - + " Default value is auto.") - .build()); - // 添加导出/导入DDL建表语句模式 - options.addOption(Option.builder(ARG_SHORT_WITH_DDL) - .longOpt("DDL") - .hasArg() - .desc("Export or import with table definition DDL mode: NONE / ONLY / WITH") - .build()); - // 添加导出/导入使用的压缩模式 - options.addOption(Option.builder(ARG_SHORT_COMPRESS) - .longOpt("compress") - .hasArg() - .desc("Export or import compressed file: NONE / GZIP") - .build()); - // 加解密算法 - options.addOption(Option.builder(ARG_SHORT_ENCRYPTION) - .longOpt("encrypt") - .hasArg() - .desc("Export or import with encrypted file: NONE / AES-CBC") - .build()); - // 对称加解密密钥 - options.addOption(Option.builder(ARG_SHORT_KEY) - .longOpt("key") - .hasArg() - .desc("Encryption key (string).") - .build()); - // 文件格式 - options.addOption(Option.builder(ARG_SHORT_FILE_FORMAT) - .longOpt("fileformat") - .hasArg() - .desc("File format: NONE / TXT / CSV") - .build()); - // 最大错误阈值 - options.addOption(Option.builder(ARG_SHORT_MAX_ERROR) - .longOpt("max-error") - .hasArg() - .argName("max error count") - .desc("Max error count threshold.") - .build()); - // 性能模式 - options.addOption(Option.builder(ARG_SHORT_PERF_MODE) - .longOpt("perf") - .desc("Performance mode.") - .build()); - // 数据脱敏 - options.addOption(Option.builder(ARG_SHORT_MASK) - .longOpt("mask") - .hasArg() - .argName("json config") - .desc("Masking sensitive columns while exporting data.") - .build()); - } - - private static void addConnectDbOptions(Options options) { - // 添加主机选项 -h --host - options.addOption(Option.builder(ARG_SHORT_HOST) - .longOpt("host") - .hasArg() - .argName("host") - .desc("Connect to host.") - .build()); - // 添加用户名选项 -u --user - options.addOption(Option.builder(ARG_SHORT_USERNAME) - .longOpt("user") - .hasArg() - .argName("user") - .desc("User for login.") - .build()); - // 添加密码选项 -p --password - options.addOption(Option.builder(ARG_SHORT_PASSWORD) - .longOpt("password") - .hasArg() - .argName("password") - .desc("Password to use when connecting to server.") - .build()); - // 添加端口选项 -P --port - options.addOption(Option.builder(ARG_SHORT_PORT) - .longOpt("port") - .hasArg() - .argName("port") - .desc("Port number to use for connection.") - .build()); - // 添加数据库选项 -D --database - options.addOption(Option.builder(ARG_SHORT_DBNAME) - .longOpt("database") - .hasArg() - .argName("database") - .desc("Database to use.") - .build()); - // 添加负载均衡开关选项 -lb --loadbalance - options.addOption(Option.builder(ARG_SHORT_LOAD_BALANCE) - .longOpt("loadbalance") - .argName("loadbalance") - .desc("If using load balance.") - .build()); - // 添加连接参数选项 -param --connParam - options.addOption(Option.builder(ARG_SHORT_CONN_PARAM) - .longOpt("connParam") - .hasArg() - .argName("params") - .desc("Connection params") - .build()); - // 添加导出时选项 -O --orderby - options.addOption(Option.builder(ARG_SHORT_ORDER) - .longOpt("orderby") - .hasArg() - .argName("order by type") - .desc("asc or desc.") - .build()); - // 添加导入时选择文件夹选项 -dir --dir - options.addOption(Option.builder(ARG_SHORT_DIRECTORY) - .longOpt("dir") - .hasArg() - .argName("directory") - .desc("Directory path including files to import.") - .build()); - // 添加指定字符集选项 -cs --charset - options.addOption(Option.builder(ARG_SHORT_CHARSET) - .longOpt("charset") - .hasArg() - .argName("charset") - .desc("Define charset of files.") - .build()); - // 添加显示开启分库分表操作模式选项 -sharding --sharding - options.addOption(Option.builder(ARG_SHORT_ENABLE_SHARDING) - .longOpt("sharding") - .hasArg() - .argName("on | off") - .desc("Enable sharding mode [on | off].") - .build()); - // 添加排序列选项 -OC --orderCol - options.addOption(Option.builder(ARG_SHORT_ORDER_COLUMN) - .longOpt("orderCol") - .hasArg() - .argName("ordered column") - .desc("col1;col2;col3") - .build()); - // 添加指定列与顺序选项 -col --columns - options.addOption(Option.builder(ARG_SHORT_COLUMNS) - .longOpt("columns") - .hasArg() - .argName("export columns") - .desc("col1;col2;col3") - .build()); - // 添加在本地做归并选项 -local --local - options.addOption(Option.builder(ARG_SHORT_LOCAL_MERGE) - .longOpt("localmerge") - .desc("Use local merge sort.") - .build()); - // 添加使用sql函数更新选项 -func - options.addOption(Option.builder(ARG_SHORT_SQL_FUNC) - .longOpt("sqlfunc") - .desc("Use sql function to update.") - .build()); - // 不开启转义 -noesc - options.addOption(Option.builder(ARG_SHORT_NO_ESCAPE) - .longOpt("noescape") - .desc("Don't escape values.") - .build()); - // 连接池配置选项 - options.addOption(Option.builder(ARG_SHORT_MAX_CONN_NUM) - .longOpt("maxConnection") - .hasArg() - .desc("Max connection number limit.") - .build()); - options.addOption(Option.builder(ARG_SHORT_MIN_CONN_NUM) - .longOpt("minConnection") - .hasArg() - .desc("Mim connection number limit.") - .build()); - options.addOption(Option.builder(ARG_SHORT_MAX_WAIT) - .longOpt("connMaxWait") - .hasArg() - .desc("Max wait time(ms) when getting a connection.") - .build()); - options.addOption(Option.builder(ARG_SHORT_CONN_INIT_SQL) - .longOpt("initSqls") - .hasArg() - .desc("Connection init sqls.") - .build()); - options.addOption(Option.builder(ARG_SHORT_BATCH_SIZE) - .longOpt("batchSize") - .hasArg() - .desc("Batch size of emitted tuples.") - .build()); - options.addOption(Option.builder(ARG_SHORT_READ_BLOCK_SIZE) - .longOpt("readSize") - .hasArg() - .desc("Read block size in MB.") - .build()); - options.addOption(Option.builder(ARG_SHORT_RING_BUFFER_SIZE) - .longOpt("ringBufferSize") - .hasArg() - .desc("Ring buffer size.") - .build()); - } - /** * 打印帮助信息 */ public static void printHelp() { - formatter.printHelp(APP_NAME, options, true); + formatter.printHelp(ConfigConstant.APP_NAME, options, true); } private static CommandType lookup(String commandType) { @@ -1038,24 +736,24 @@ private static boolean parseFlag(String flag) { throw new IllegalArgumentException("Illegal flag string: " + flag + ". Should be ON or OFF"); } - public static boolean doHelpCmd(CommandLine commandLine) { - if (CommandUtil.isShowHelp(commandLine)) { + public static boolean doHelpCmd(ConfigResult ConfigResult) { + if (CommandUtil.isShowHelp(ConfigResult)) { printHelp(); return true; } - if (CommandUtil.isShowVersion(commandLine)) { - System.out.printf("%s: %s%n", ConfigConstant.APP_NAME, Version.getVersion()); + if (CommandUtil.isShowVersion(ConfigResult)) { + System.out.printf("%s: %s%n", APP_NAME, Version.getVersion()); return true; } return false; } - private static boolean isShowHelp(CommandLine result) { + private static boolean isShowHelp(ConfigResult result) { return result.hasOption(ARG_SHORT_HELP); } - private static boolean isShowVersion(CommandLine result) { + private static boolean isShowVersion(ConfigResult result) { return result.hasOption(ARG_SHORT_VERSION); } //endregion 命令行参数校验与帮助 diff --git a/batch-tool/src/main/java/cmd/ConfigArgOption.java b/batch-tool/src/main/java/cmd/ConfigArgOption.java new file mode 100644 index 0000000..bbf40e0 --- /dev/null +++ b/batch-tool/src/main/java/cmd/ConfigArgOption.java @@ -0,0 +1,160 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd; + +public class ConfigArgOption { + protected final String argShort; + protected final String argLong; + protected final String desc; + protected final String argName; + + private ConfigArgOption(String argShort, String argLong, String desc) { + this(argShort, argLong, desc, null); + } + + private ConfigArgOption(String argShort, String argLong, String desc, String argName) { + this.argShort = argShort; + this.argLong = argLong; + this.desc = desc; + this.argName = argName; + } + + private static ConfigArgOption of(String argShort, String argLong, String desc) { + return new ConfigArgOption(argShort, argLong, desc); + } + + private static ConfigArgOption of(String argShort, String argLong, String desc, String argName) { + return new ConfigArgOption(argShort, argLong, desc, argName); + } + + + public static final ConfigArgOption ARG_SHORT_HELP = + of("help", "help", "Help message."); + public static final ConfigArgOption ARG_SHORT_VERSION = + of("v", "version", "Show batch-tool version."); + public static final ConfigArgOption ARG_SHORT_CONFIG_FILE = + of("config", "configFile", "Use yaml config file.", "filepath"); + public static final ConfigArgOption ARG_SHORT_USERNAME = + of("u", "user", "User for login.", "username"); + public static final ConfigArgOption ARG_SHORT_PASSWORD = + of("p", "password", "Password of user.", "password"); + public static final ConfigArgOption ARG_SHORT_HOST = + of("h", "host", "Host of database.", "host"); + public static final ConfigArgOption ARG_SHORT_PORT = + of("P", "port", "Port number of database.", "port"); + public static final ConfigArgOption ARG_SHORT_DBNAME = + of("D", "database", "Database name.", "database"); + public static final ConfigArgOption ARG_SHORT_LOAD_BALANCE = + of("lb", "loadbalance", + "Use jdbc load balance, filling the arg in $host like 'host1:port1,host2:port2'."); + public static final ConfigArgOption ARG_SHORT_OPERATION = + of("o", "operation", "Batch operation type: export / import / delete / update.", "operation"); + public static final ConfigArgOption ARG_SHORT_ORDER = + of("O", "orderby", "Order by type: asc / desc.", "order"); + public static final ConfigArgOption ARG_SHORT_ORDER_COLUMN = + of("OC", "orderCol", "Ordered column names.", "col1;col2;col3"); + public static final ConfigArgOption ARG_SHORT_COLUMNS = + of("col", "columns", "Target columns for export.", "col1;col2;col3"); + public static final ConfigArgOption ARG_SHORT_TABLE = + of("t", "table", "Target table.", "tableName"); + public static final ConfigArgOption ARG_SHORT_SEP = + of("s", "sep", "Separator between fields (delimiter).", "separator char or string"); + public static final ConfigArgOption ARG_SHORT_PREFIX = + of("pre", "prefix", "Export file name prefix.", "prefix"); + public static final ConfigArgOption ARG_SHORT_FROM_FILE = + of("f", "file", "Source file(s).", "filepath1;filepath2"); + public static final ConfigArgOption ARG_SHORT_LINE = + of("L", "line", "Max line limit of one single export file.", "line count"); + public static final ConfigArgOption ARG_SHORT_FILE_NUM = + of("F", "filenum", "Fixed number of exported files.", "file count"); + public static final ConfigArgOption ARG_SHORT_HISTORY_FILE = + of("H", "historyFile", "History file name.", "filepath"); + public static final ConfigArgOption ARG_SHORT_WHERE = + of("w", "where", "Where condition: col1>99 AND col2<100 ...", "where condition"); + public static final ConfigArgOption ARG_SHORT_ENABLE_SHARDING = + of("sharding", "sharding", "Whether enable sharding mode.", "ON / OFF"); + public static final ConfigArgOption ARG_SHORT_WITH_HEADER = + of("header", "header", "Whether the header line is column names (default no)."); + public static final ConfigArgOption ARG_SHORT_DIRECTORY = + of("dir", "directory", "Directory path including files to import.", "directory path"); + public static final ConfigArgOption ARG_SHORT_CHARSET = + of("cs", "charset", "The charset of files.", "charset"); + public static final ConfigArgOption ARG_SHORT_IGNORE_AND_RESUME = + of("i", "ignore", "Flag of insert ignore and resume breakpoint."); + public static final ConfigArgOption ARG_SHORT_PRODUCER = + of("pro", "producer", "Configure number of producer threads (export / import).", "producer count"); + public static final ConfigArgOption ARG_SHORT_CONSUMER = + of("con", "consumer", "Configure number of consumer threads.", "consumer count"); + public static final ConfigArgOption ARG_SHORT_FORCE_CONSUMER = + of("fcon", "forceConsumer", "Configure if allow force consumer parallelism.", "parallelism"); + public static final ConfigArgOption ARG_SHORT_LOCAL_MERGE = + of("local", "localMerge", "Use local merge sort."); + public static final ConfigArgOption ARG_SHORT_SQL_FUNC = + of("func", "sqlFunc", "Use sql function to update."); + public static final ConfigArgOption ARG_SHORT_NO_ESCAPE = + of("noEsc", "noEscape", "Do not escape value for sql."); + public static final ConfigArgOption ARG_SHORT_MAX_CONN_NUM = + of("maxConn", "maxConnection", "Max connection count (druid).", "max connection"); + public static final ConfigArgOption ARG_SHORT_MAX_WAIT = + of("maxWait", "connMaxWait", "Max wait time when getting a connection.", "wait time(ms)"); + public static final ConfigArgOption ARG_SHORT_MIN_CONN_NUM = + of("minConn", "minConnection", "Min connection count (druid).", "min connection"); + public static final ConfigArgOption ARG_SHORT_CONN_PARAM = + of("param", "connParam", "Jdbc connection params.", "key1=val1&key2=val2"); + public static final ConfigArgOption ARG_SHORT_CONN_INIT_SQL = + of("initSqls", "initSqls", "Connection init sqls (druid).", "sqls"); + public static final ConfigArgOption ARG_SHORT_BATCH_SIZE = + of("batchsize", "batchSize", "Batch size of insert.", "size"); + public static final ConfigArgOption ARG_SHORT_READ_BLOCK_SIZE = + of("readsize", "readSize", "Read block size.", "size(MB)"); + public static final ConfigArgOption ARG_SHORT_RING_BUFFER_SIZE = + of("ringsize", "ringSize", "Ring buffer size.", "size (power of 2)"); + public static final ConfigArgOption ARG_SHORT_READ_FILE_ONLY = + of("rfonly", "readFileOnly", "Only read and process file, no sql execution."); + public static final ConfigArgOption ARG_SHORT_USING_IN = + of("in", "whereIn", "Using where cols in (values)."); + public static final ConfigArgOption ARG_SHORT_WITH_LAST_SEP = + of("lastSep", "withLastSep", "Whether line ends with separator."); + public static final ConfigArgOption ARG_SHORT_PARALLEL_MERGE = + of("para", "paraMerge", "Use parallel merge when doing order by export."); + public static final ConfigArgOption ARG_SHORT_QUOTE_ENCLOSE_MODE = + of("quote", "quoteMode", + "The mode of how field values are enclosed by double-quotes when exporting table.", + "AUTO (default) / FORCE / NONE"); + public static final ConfigArgOption ARG_SHORT_TPS_LIMIT = + of("tps", "tpsLimit", "Configure of tps limit (default -1: no limit).", "tps limit"); + public static final ConfigArgOption ARG_SHORT_WITH_DDL = + of("DDL", "DDL", "Export or import with DDL sql mode.", "NONE (default) / ONLY / WITH"); + public static final ConfigArgOption ARG_SHORT_COMPRESS = + of("comp", "compress", "Export or import compressed file.", "NONE (default) / GZIP"); + public static final ConfigArgOption ARG_SHORT_ENCRYPTION = + of("encrypt", "encrypt", "Export or import with encrypted file.", "NONE (default) / AES / SM4"); + public static final ConfigArgOption ARG_SHORT_KEY = + of("key", "secretKey", "Secret key used during encryption.", "string-type key"); + public static final ConfigArgOption ARG_SHORT_FILE_FORMAT = + of("format", "fileFormat", "File format.", "NONE (default) / TXT / CSV / XLS / XLSX"); + public static final ConfigArgOption ARG_SHORT_MAX_ERROR = + of("error", "maxError", "Max error count threshold, program exits when the limit is exceeded.", "max error count"); + public static final ConfigArgOption ARG_SHORT_PERF_MODE = + of("perf", "perfMode", "Use performance mode (at the sacrifice of compatibility.)"); + public static final ConfigArgOption ARG_SHORT_MASK = + of("mask", "mask", "Masking sensitive columns while exporting data.", "Json format config"); + + public boolean hasArg() { + return argName != null; + } +} diff --git a/batch-tool/src/main/java/cmd/ConfigResult.java b/batch-tool/src/main/java/cmd/ConfigResult.java new file mode 100644 index 0000000..de16111 --- /dev/null +++ b/batch-tool/src/main/java/cmd/ConfigResult.java @@ -0,0 +1,24 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd; + +public interface ConfigResult { + + boolean hasOption(ConfigArgOption option); + + String getOptionValue(ConfigArgOption option); +} diff --git a/batch-tool/src/main/java/cmd/YamlConfigResult.java b/batch-tool/src/main/java/cmd/YamlConfigResult.java new file mode 100644 index 0000000..da1024b --- /dev/null +++ b/batch-tool/src/main/java/cmd/YamlConfigResult.java @@ -0,0 +1,38 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd; + +import org.apache.commons.cli.CommandLine; + +public class YamlConfigResult implements ConfigResult { + + private final CommandLine commandLine; + + public YamlConfigResult(String yamlFilepath, CommandLine commandLine) { + this.commandLine = commandLine; + } + + @Override + public boolean hasOption(ConfigArgOption option) { + return commandLine.hasOption(option.argShort); + } + + @Override + public String getOptionValue(ConfigArgOption option) { + return commandLine.getOptionValue(option.argShort); + } +} diff --git a/batch-tool/src/main/java/model/config/BaseConfig.java b/batch-tool/src/main/java/model/config/BaseConfig.java index 108b7c0..51f811f 100644 --- a/batch-tool/src/main/java/model/config/BaseConfig.java +++ b/batch-tool/src/main/java/model/config/BaseConfig.java @@ -85,6 +85,8 @@ int bitCount() { private FileMode fileMode = new FileMode(); + private boolean isWithLastSep = false; + public BaseConfig(boolean shardingEnabled) { this.shardingEnabled = shardingEnabled; } @@ -172,6 +174,14 @@ public void setFileFormat(FileFormat fileFormat) { } } + public boolean isWithLastSep() { + return isWithLastSep; + } + + public void setWithLastSep(boolean withLastSep) { + isWithLastSep = withLastSep; + } + /** * 目前 压缩模式、加密、特殊文件格式三者配置互不兼容 */ diff --git a/batch-tool/src/main/java/model/config/ConfigConstant.java b/batch-tool/src/main/java/model/config/ConfigConstant.java index 00deb7a..52df86e 100644 --- a/batch-tool/src/main/java/model/config/ConfigConstant.java +++ b/batch-tool/src/main/java/model/config/ConfigConstant.java @@ -25,61 +25,6 @@ public class ConfigConstant { public static final String APP_NAME = "BatchTool"; - public static final String ARG_SHORT_HELP = "help"; - public static final String ARG_SHORT_VERSION = "v"; - public static final String ARG_SHORT_PORT = "P"; - public static final String ARG_SHORT_USERNAME = "u"; - public static final String ARG_SHORT_PASSWORD = "p"; - public static final String ARG_SHORT_HOST = "h"; - public static final String ARG_SHORT_DBNAME = "D"; - public static final String ARG_SHORT_LOAD_BALANCE = "lb"; - - public static final String ARG_SHORT_OPERATION = "o"; - public static final String ARG_SHORT_ORDER = "O"; - public static final String ARG_SHORT_ORDER_COLUMN = "OC"; - public static final String ARG_SHORT_COLUMNS = "col"; - public static final String ARG_SHORT_TABLE = "t"; - public static final String ARG_SHORT_SEP = "s"; - public static final String ARG_SHORT_PREFIX = "pre"; - public static final String ARG_SHORT_FROM_FILE = "f"; - public static final String ARG_SHORT_LINE = "L"; - public static final String ARG_SHORT_FILE_NUM = "F"; - public static final String ARG_SHORT_HISTORY_FILE = "H"; - public static final String ARG_SHORT_WHERE = "w"; - public static final String ARG_SHORT_ENABLE_SHARDING = "sharding"; - public static final String ARG_SHORT_WITH_HEADER = "header"; - public static final String ARG_SHORT_DIRECTORY = "dir"; - public static final String ARG_SHORT_CHARSET = "cs"; - public static final String ARG_SHORT_IGNORE_AND_RESUME = "i"; - public static final String ARG_SHORT_PRODUCER = "pro"; - public static final String ARG_SHORT_CONSUMER = "con"; - public static final String ARG_SHORT_FORCE_CONSUMER = "fcon"; - public static final String ARG_SHORT_LOCAL_MERGE = "local"; - public static final String ARG_SHORT_SQL_FUNC = "func"; - public static final String ARG_SHORT_NO_ESCAPE = "noesc"; - public static final String ARG_SHORT_MAX_CONN_NUM = "maxConn"; - public static final String ARG_SHORT_MAX_WAIT = "maxWait"; - public static final String ARG_SHORT_MIN_CONN_NUM = "minConn"; - public static final String ARG_SHORT_CONN_PARAM = "param"; - public static final String ARG_SHORT_CONN_INIT_SQL = "initSqls"; - public static final String ARG_SHORT_BATCH_SIZE = "batchsize"; - public static final String ARG_SHORT_READ_BLOCK_SIZE = "readsize"; - public static final String ARG_SHORT_RING_BUFFER_SIZE = "ringsize"; - public static final String ARG_SHORT_READ_FILE_ONLY = "rfonly"; - public static final String ARG_SHORT_USING_IN = "in"; - public static final String ARG_SHORT_WITH_LAST_SEP = "lastSep"; - public static final String ARG_SHORT_PARALLEL_MERGE = "para"; - public static final String ARG_SHORT_QUOTE_ENCLOSE_MODE = "quote"; - public static final String ARG_SHORT_TPS_LIMIT = "tps"; - public static final String ARG_SHORT_WITH_DDL = "DDL"; - public static final String ARG_SHORT_COMPRESS = "comp"; - public static final String ARG_SHORT_ENCRYPTION = "enc"; - public static final String ARG_SHORT_KEY = "key"; - public static final String ARG_SHORT_FILE_FORMAT = "format"; - public static final String ARG_SHORT_MAX_ERROR = "error"; - public static final String ARG_SHORT_PERF_MODE = "perf"; - public static final String ARG_SHORT_MASK = "mask"; - public static final int CPU_NUM = Runtime.getRuntime().availableProcessors(); /** * 默认分隔符 diff --git a/batch-tool/src/main/java/model/config/EncryptionMode.java b/batch-tool/src/main/java/model/config/EncryptionMode.java index 8064cfe..475060c 100644 --- a/batch-tool/src/main/java/model/config/EncryptionMode.java +++ b/batch-tool/src/main/java/model/config/EncryptionMode.java @@ -33,9 +33,9 @@ static EncryptionMode fromString(String encryptionMode) { switch (encryptionMode.toUpperCase()) { case "NONE": return NONE; - case "DEFAULT": // TODO fix default option case "CAESAR": return CAESAR; + case "DEFAULT": case "AES": case "AES-CBC": return AES_CBC; diff --git a/batch-tool/src/main/java/worker/export/BaseExportWorker.java b/batch-tool/src/main/java/worker/export/BaseExportWorker.java index 1f1462c..27af13d 100644 --- a/batch-tool/src/main/java/worker/export/BaseExportWorker.java +++ b/batch-tool/src/main/java/worker/export/BaseExportWorker.java @@ -65,6 +65,8 @@ public abstract class BaseExportWorker implements Runnable { protected ByteArrayOutputStream os; protected int bufferedRowNum = 0; // 已经缓存的行数 + protected boolean isWithLastSep = false; + protected BaseExportWorker(DataSource druid, TableTopology topology, TableFieldMetaInfo tableFieldMetaInfo, String separator, QuoteEncloseMode quoteEncloseMode) { @@ -124,6 +126,10 @@ protected void produceData() { } value = resultSet.getBytes(colNum); writeFieldValue(os, value, colNum - 1); + if (isWithLastSep) { + // 附加分隔符 + os.write(separator); + } // 附加换行符 os.write(FileUtil.SYS_NEW_LINE_BYTE); bufferedRowNum++; @@ -218,4 +224,8 @@ private void putDataMasker(String columnName, AbstractDataMasker dataMasker) { } throw new IllegalArgumentException("Unknown mask column: " + columnName); } + + public void setWithLastSep(boolean withLastSep) { + isWithLastSep = withLastSep; + } } diff --git a/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java b/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java index 6dc2932..3a26d06 100644 --- a/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java +++ b/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java @@ -64,6 +64,7 @@ public static DirectExportWorker buildDefaultDirectExportWorker(DataSource druid } directExportWorker.setWhereCondition(config.getWhereCondition()); directExportWorker.putDataMaskerMap(config.getColumnMaskerConfigMap()); + directExportWorker.setWithLastSep(config.isWithLastSep()); return directExportWorker; } @@ -97,12 +98,14 @@ public static DirectOrderExportWorker buildDirectOrderExportWorker(DataSource dr } BaseCipher cipher = BaseCipher.getCipher(config.getEncryptionConfig(), true); - return new DirectOrderExportWorker(druid, filePathPrefix, - tableFieldMetaInfo, - tableName, config.getOrderByColumnNameList(), maxLine, - config.getCharset(), - config.getSeparator(), - config.isAscending(), config.isWithHeader(), config.getQuoteEncloseMode(), - config.getCompressMode(), config.getFileFormat(), cipher); + DirectOrderExportWorker directOrderExportWorker = new DirectOrderExportWorker(druid, filePathPrefix, + tableFieldMetaInfo, + tableName, config.getOrderByColumnNameList(), maxLine, + config.getCharset(), + config.getSeparator(), + config.isAscending(), config.isWithHeader(), config.getQuoteEncloseMode(), + config.getCompressMode(), config.getFileFormat(), cipher); + directOrderExportWorker.setWithLastSep(config.isWithLastSep()); + return directOrderExportWorker; } } diff --git a/batch-tool/src/main/java/worker/util/ExportUtil.java b/batch-tool/src/main/java/worker/util/ExportUtil.java index 0acc426..f825957 100644 --- a/batch-tool/src/main/java/worker/util/ExportUtil.java +++ b/batch-tool/src/main/java/worker/util/ExportUtil.java @@ -46,7 +46,10 @@ public static String getDirectSql(TableTopology topology, private static String getDirectSql(TableTopology topology, List fieldMetaInfoList) { - + if (topology.getGroupName().isEmpty()) { + return String.format("select %s from %s;", formatFieldWithDateType(fieldMetaInfoList), + topology.getTableName()); + } return String.format(DIRECT_NODE_HINT + "select %s from %s;", topology.getGroupName(), formatFieldWithDateType(fieldMetaInfoList), topology.getTableName()); From fe8db3f1660f82ca2ab0f2d1bac6594cf5e6d411 Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Fri, 26 Aug 2022 10:52:38 +0800 Subject: [PATCH 06/12] update Q&A --- batch-tool/README.md | 2 +- batch-tool/docs/usage-details.md | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/batch-tool/README.md b/batch-tool/README.md index 68dfcad..555fe65 100644 --- a/batch-tool/README.md +++ b/batch-tool/README.md @@ -8,7 +8,7 @@ Batch Tool工具是专为 PolarDB-X数据库提供数据导入导出服务的工 在此基础上,还支持基于文本文件批量更新、删除等功能 (实验特性)。 ## 快速上手 -常见场景可参考文档 [usage-details](docs/usage-details.md)。 +常见场景与问题排查可参考文档 [usage-details](docs/usage-details.md)。 ### 参数介绍 命令行用法: diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index 81c332b..908792f 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -40,6 +40,7 @@ ### 进行数据脱敏 #### 对手机号进行掩码保护 +只展示前三位与末三位 `-D sbtest -o export -s , -t "customer" -mask "{ \"phone\": { \"type\": \"hiding\", \"show_region\" : \"0-2,8-10\" }"` @@ -72,5 +73,16 @@ ### 导入Excel文件 `-D sbtest_auto -o import -s , -t "sbtest1" -format XLSX -f "sbtest1_0.xlsx"` -# 报错排查 -1. \ No newline at end of file +# 常见问题排查 +1. 报错 **the server time zone value '' is unrecognized** + + **原因**:由于数据库时区与系统时区有差异导致的报错,需要在jdbc url中手动指定时区 + + **解决**:加入参数:`-param "serverTimezone=Asia/Shanghai"` +2. 报错 **Unable to get topology of table** + + **原因**:批量导出时默认以 PolarDB-X 的物理表拓扑进行分布式导出, +如果对普通 MySQL数据库进行导出,需要关闭 sharding 参数 + + **解决**:加入参数:`-sharding off` +3. From 42e3aa1d6a2def8405c655a5fd2736c5255e4dda Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Tue, 6 Sep 2022 16:52:59 +0800 Subject: [PATCH 07/12] support null separator and update Q&A --- batch-tool/docs/usage-details.md | 15 ++++++++++++++- .../src/main/java/model/config/BaseConfig.java | 3 +++ .../main/java/model/config/ConfigConstant.java | 5 +++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index 908792f..101b1fb 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -85,4 +85,17 @@ 如果对普通 MySQL数据库进行导出,需要关闭 sharding 参数 **解决**:加入参数:`-sharding off` -3. +3. 数据文件使用的分隔符是tab缩进,需要怎么输入`-s` 参数? + + **解决**:直接在shell中输入tab键,即`-s " "` + +5. 数据文件使用的分隔符是ascii控制字符(如`\x01`等),需要怎么输入`-s` 参数? + + ```text + 1^A123^A1123^A12321312 + 2^A123^A1123^A12321312 + 3^A123^A1123^A12321312 + ``` + > ^A 为 \x01 的Caret notation + + **解决**:输入`-s $'\x01'` 即可 \ No newline at end of file diff --git a/batch-tool/src/main/java/model/config/BaseConfig.java b/batch-tool/src/main/java/model/config/BaseConfig.java index 51f811f..ee6a623 100644 --- a/batch-tool/src/main/java/model/config/BaseConfig.java +++ b/batch-tool/src/main/java/model/config/BaseConfig.java @@ -96,6 +96,9 @@ public String getSeparator() { } public void setSeparator(String separator) { + if (separator.isEmpty()) { + separator = ConfigConstant.NULL_SEPARATOR; + } // 分隔符不能包含特殊字符 for (String illegalStr : ConfigConstant.ILLEGAL_SEPARATORS) { if (separator.contains(illegalStr)) { diff --git a/batch-tool/src/main/java/model/config/ConfigConstant.java b/batch-tool/src/main/java/model/config/ConfigConstant.java index 52df86e..620f2e4 100644 --- a/batch-tool/src/main/java/model/config/ConfigConstant.java +++ b/batch-tool/src/main/java/model/config/ConfigConstant.java @@ -31,6 +31,11 @@ public class ConfigConstant { */ public static final String DEFAULT_SEPARATOR = ","; + /** + * \x00 + */ + public static final String NULL_SEPARATOR = "\u0000"; + /** * 文件名/表名分隔符 */ From d1114cba1b0dcde88b04facb8e8e0812d1717acd Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Tue, 6 Sep 2022 20:57:33 +0800 Subject: [PATCH 08/12] support reading configuration from yaml file --- batch-tool/docs/usage-details.md | 9 +++++ batch-tool/pom.xml | 8 ++++ .../src/main/java/cmd/YamlConfigResult.java | 40 ++++++++++++++++++- batch-tool/src/main/java/util/FileUtil.java | 8 ++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index 101b1fb..b9b6ad4 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -45,6 +45,8 @@ \"phone\": { \"type\": \"hiding\", \"show_region\" : \"0-2,8-10\" }"` +编写复杂格式的json文件建议参考 使用yaml配置。 + ## 数据库表导入 ### 单表导入 `-D sbtest_auto -o import -s , -t sbtest2 -dir data-backup` @@ -73,6 +75,13 @@ ### 导入Excel文件 `-D sbtest_auto -o import -s , -t "sbtest1" -format XLSX -f "sbtest1_0.xlsx"` +## 使用yaml配置 +当有很多配置项需要设置时,使用命令行参数会很不方便编辑,此时建议使用yaml格式的配置文件,示例如下: +```yaml + + +``` + # 常见问题排查 1. 报错 **the server time zone value '' is unrecognized** diff --git a/batch-tool/pom.xml b/batch-tool/pom.xml index 0a47460..c1f65ff 100644 --- a/batch-tool/pom.xml +++ b/batch-tool/pom.xml @@ -34,6 +34,8 @@ 2.0.0.Final 1.60 3.0.5 + 1.30 + @@ -147,6 +149,12 @@ easyexcel ${easy-excel.version} + + + org.yaml + snakeyaml + ${snakeyaml.version} + diff --git a/batch-tool/src/main/java/cmd/YamlConfigResult.java b/batch-tool/src/main/java/cmd/YamlConfigResult.java index da1024b..16d13c0 100644 --- a/batch-tool/src/main/java/cmd/YamlConfigResult.java +++ b/batch-tool/src/main/java/cmd/YamlConfigResult.java @@ -17,22 +17,58 @@ package cmd; import org.apache.commons.cli.CommandLine; +import org.yaml.snakeyaml.Yaml; +import util.FileUtil; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.Map; public class YamlConfigResult implements ConfigResult { private final CommandLine commandLine; + private final Map argMap; public YamlConfigResult(String yamlFilepath, CommandLine commandLine) { this.commandLine = commandLine; + if (!FileUtil.canRead(yamlFilepath)) { + throw new IllegalArgumentException("Cannot access yaml config file: " + yamlFilepath); + } + try { + argMap = new Yaml().load(new FileInputStream(yamlFilepath)); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } } @Override public boolean hasOption(ConfigArgOption option) { - return commandLine.hasOption(option.argShort); + return commandLine.hasOption(option.argShort) || hasYamlOption(option); } @Override public String getOptionValue(ConfigArgOption option) { - return commandLine.getOptionValue(option.argShort); + // commandLine优先级高于yaml配置 + if (commandLine.hasOption(option.argShort)) { + return commandLine.getOptionValue(option.argShort); + } + + return getYamlOption(option); + } + + private String getYamlOption(ConfigArgOption option) { + String result = null; + if (argMap.containsKey(option.argShort)) { + result = String.valueOf(argMap.get(option.argShort)); + } + if (argMap.containsKey(option.argLong)) { + result = String.valueOf(argMap.get(option.argLong)); + } + return result; + } + + private boolean hasYamlOption(ConfigArgOption option) { + return argMap.containsKey(option.argLong) + || argMap.containsKey(option.argShort); } } diff --git a/batch-tool/src/main/java/util/FileUtil.java b/batch-tool/src/main/java/util/FileUtil.java index 5575e93..d720338 100644 --- a/batch-tool/src/main/java/util/FileUtil.java +++ b/batch-tool/src/main/java/util/FileUtil.java @@ -404,4 +404,12 @@ public static RandomAccessFile openRafForRead(File file) { throw new RuntimeException(e); } } + + public static boolean canRead(String filepath) { + File file = new File(filepath); + if (!file.exists() || !file.isFile() || !file.canRead()) { + return false; + } + return true; + } } From c51e5c2124a510640a667a9e61a0cbf19e506a1d Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Wed, 7 Sep 2022 15:07:22 +0800 Subject: [PATCH 09/12] refactor and unify boolean flag options --- batch-tool/docs/usage-details.md | 39 ++++- batch-tool/src/main/java/cmd/CommandUtil.java | 137 +++++++++++------- .../src/main/java/cmd/ConfigArgOption.java | 56 ++----- .../src/main/java/cmd/ConfigResult.java | 20 +++ batch-tool/src/main/java/cmd/FlagOption.java | 65 +++++++++ .../java/datasource/DatasourceConstant.java | 2 +- .../main/java/model/config/BaseConfig.java | 2 +- .../java/model/config/ConfigConstant.java | 7 +- .../java/worker/common/BaseWorkHandler.java | 3 +- .../java/worker/common/reader/CsvReader.java | 2 +- .../java/worker/common/reader/XlsxReader.java | 2 +- .../worker/export/DirectExportWorker.java | 4 +- .../worker/factory/ExportWorkerFactory.java | 12 +- .../java/worker/insert/ImportConsumer.java | 2 +- 14 files changed, 233 insertions(+), 120 deletions(-) create mode 100644 batch-tool/src/main/java/cmd/FlagOption.java diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index b9b6ad4..ccd818e 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -36,16 +36,36 @@ `-D sbtest_auto -o export -s , -t "sbtest1" -col "id;k;c"` ### 从单机MySQL中导出数据 -`-D sbtest -o export -s , -t "sbtest1" -sharding off` +`-D sbtest -o export -s , -t "sbtest1" -sharding false` ### 进行数据脱敏 #### 对手机号进行掩码保护 -只展示前三位与末三位 -`-D sbtest -o export -s , -t "customer" -mask "{ -\"phone\": { \"type\": \"hiding\", \"show_region\" : \"0-2,8-10\" +以 TPC-H 数据集的 cusomter 表为例,只展示手机号 c_phone 前三位与末四位 +`-D tpch_1g -o export -s , -t "customer" -mask "{ +\"c_phone\": { \"type\": \"hiding\", \"show_region\" : \"0-2\", \"show_end\": 4 }"` -编写复杂格式的json文件建议参考 使用yaml配置。 +原数据 +```text +c_custkey|c_name|c_address|c_nationkey|c_phone|c_acctbal|c_mktsegment|c_comment +1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag +2|Customer#000000002|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: care +3|Customer#000000003|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abo +4|Customer#000000004|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| requests. final, regular ideas sleep final acco +5|Customer#000000005|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole acco +``` + +脱敏后数据 +```text +c_custkey|c_name|c_address|c_nationkey|c_phone|c_acctbal|c_mktsegment|c_comment +1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-********2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag +2|Customer#000000002|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-********3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: care +3|Customer#000000003|MG9kdTD2WBHm|1|11-********3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abo +4|Customer#000000004|XxVSJsLAGtn|4|14-********5944|2866.83|MACHINERY| requests. final, regular ideas sleep final acco +5|Customer#000000005|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-********6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole acco +``` + +编写复杂格式的json文件建议参考 [使用yaml配置](#使用yaml配置)。 ## 数据库表导入 ### 单表导入 @@ -82,6 +102,8 @@ ``` +如果配置值包含[yaml特殊字符](https://yaml.org/spec/1.2.2/#53-indicator-characters)的话, 需要用引号括起来。 + # 常见问题排查 1. 报错 **the server time zone value '' is unrecognized** @@ -93,12 +115,12 @@ **原因**:批量导出时默认以 PolarDB-X 的物理表拓扑进行分布式导出, 如果对普通 MySQL数据库进行导出,需要关闭 sharding 参数 - **解决**:加入参数:`-sharding off` + **解决**:加入参数:`-sharding false` 3. 数据文件使用的分隔符是tab缩进,需要怎么输入`-s` 参数? **解决**:直接在shell中输入tab键,即`-s " "` -5. 数据文件使用的分隔符是ascii控制字符(如`\x01`等),需要怎么输入`-s` 参数? +4. 数据文件使用的分隔符是ascii控制字符(如`\x01`等),需要怎么输入`-s` 参数? ```text 1^A123^A1123^A12321312 @@ -107,4 +129,5 @@ ``` > ^A 为 \x01 的Caret notation - **解决**:输入`-s $'\x01'` 即可 \ No newline at end of file + **解决**:输入`-s $'\x01'` 即可。 + > 暂时无法处理NULL字符(`\x00`)作为分隔符,可以通过修改源代码解决。 \ No newline at end of file diff --git a/batch-tool/src/main/java/cmd/CommandUtil.java b/batch-tool/src/main/java/cmd/CommandUtil.java index 0900ade..fb8ef80 100644 --- a/batch-tool/src/main/java/cmd/CommandUtil.java +++ b/batch-tool/src/main/java/cmd/CommandUtil.java @@ -54,8 +54,60 @@ import java.util.Map; import java.util.stream.Collectors; -import static cmd.ConfigArgOption.*; -import static model.config.ConfigConstant.*; +import static cmd.ConfigArgOption.ARG_SHORT_BATCH_SIZE; +import static cmd.ConfigArgOption.ARG_SHORT_CHARSET; +import static cmd.ConfigArgOption.ARG_SHORT_COLUMNS; +import static cmd.ConfigArgOption.ARG_SHORT_COMPRESS; +import static cmd.ConfigArgOption.ARG_SHORT_CONFIG_FILE; +import static cmd.ConfigArgOption.ARG_SHORT_CONN_INIT_SQL; +import static cmd.ConfigArgOption.ARG_SHORT_CONN_PARAM; +import static cmd.ConfigArgOption.ARG_SHORT_CONSUMER; +import static cmd.ConfigArgOption.ARG_SHORT_DBNAME; +import static cmd.ConfigArgOption.ARG_SHORT_DIRECTORY; +import static cmd.ConfigArgOption.ARG_SHORT_ENCRYPTION; +import static cmd.ConfigArgOption.ARG_SHORT_FILE_FORMAT; +import static cmd.ConfigArgOption.ARG_SHORT_FILE_NUM; +import static cmd.ConfigArgOption.ARG_SHORT_FORCE_CONSUMER; +import static cmd.ConfigArgOption.ARG_SHORT_FROM_FILE; +import static cmd.ConfigArgOption.ARG_SHORT_HELP; +import static cmd.ConfigArgOption.ARG_SHORT_HISTORY_FILE; +import static cmd.ConfigArgOption.ARG_SHORT_HOST; +import static cmd.ConfigArgOption.ARG_SHORT_KEY; +import static cmd.ConfigArgOption.ARG_SHORT_LINE; +import static cmd.ConfigArgOption.ARG_SHORT_MASK; +import static cmd.ConfigArgOption.ARG_SHORT_MAX_CONN_NUM; +import static cmd.ConfigArgOption.ARG_SHORT_MAX_ERROR; +import static cmd.ConfigArgOption.ARG_SHORT_MAX_WAIT; +import static cmd.ConfigArgOption.ARG_SHORT_MIN_CONN_NUM; +import static cmd.ConfigArgOption.ARG_SHORT_OPERATION; +import static cmd.ConfigArgOption.ARG_SHORT_ORDER; +import static cmd.ConfigArgOption.ARG_SHORT_ORDER_COLUMN; +import static cmd.ConfigArgOption.ARG_SHORT_PASSWORD; +import static cmd.ConfigArgOption.ARG_SHORT_PORT; +import static cmd.ConfigArgOption.ARG_SHORT_PREFIX; +import static cmd.ConfigArgOption.ARG_SHORT_PRODUCER; +import static cmd.ConfigArgOption.ARG_SHORT_QUOTE_ENCLOSE_MODE; +import static cmd.ConfigArgOption.ARG_SHORT_READ_BLOCK_SIZE; +import static cmd.ConfigArgOption.ARG_SHORT_RING_BUFFER_SIZE; +import static cmd.ConfigArgOption.ARG_SHORT_SEP; +import static cmd.ConfigArgOption.ARG_SHORT_TABLE; +import static cmd.ConfigArgOption.ARG_SHORT_TPS_LIMIT; +import static cmd.ConfigArgOption.ARG_SHORT_USERNAME; +import static cmd.ConfigArgOption.ARG_SHORT_VERSION; +import static cmd.ConfigArgOption.ARG_SHORT_WHERE; +import static cmd.ConfigArgOption.ARG_SHORT_WITH_DDL; +import static cmd.FlagOption.ARG_SHORT_ENABLE_SHARDING; +import static cmd.FlagOption.ARG_SHORT_IGNORE_AND_RESUME; +import static cmd.FlagOption.ARG_SHORT_LOAD_BALANCE; +import static cmd.FlagOption.ARG_SHORT_LOCAL_MERGE; +import static cmd.FlagOption.ARG_SHORT_NO_ESCAPE; +import static cmd.FlagOption.ARG_SHORT_PARALLEL_MERGE; +import static cmd.FlagOption.ARG_SHORT_PERF_MODE; +import static cmd.FlagOption.ARG_SHORT_READ_FILE_ONLY; +import static cmd.FlagOption.ARG_SHORT_SQL_FUNC; +import static cmd.FlagOption.ARG_SHORT_USING_IN; +import static cmd.FlagOption.ARG_SHORT_WITH_HEADER; +import static cmd.FlagOption.ARG_SHORT_WITH_LAST_SEP; /** * 从命令行输入解析配置 @@ -143,7 +195,7 @@ public static DataSourceConfig getDataSourceConfigFromCmd(ConfigResult result) { .connParam(getConnParam(result)) .initSqls(getInitSqls(result)); - if (result.hasOption(ARG_SHORT_LOAD_BALANCE)) { + if (getLoadBalance(result)) { configBuilder.loadBalanceEnabled(true); } else { configBuilder.port(result.getOptionValue(ARG_SHORT_PORT)) @@ -152,6 +204,10 @@ public static DataSourceConfig getDataSourceConfigFromCmd(ConfigResult result) { return configBuilder.build(); } + private static boolean getLoadBalance(ConfigResult result) { + return result.getBooleanFlag(ARG_SHORT_LOAD_BALANCE); + } + private static int getMaxWait(ConfigResult result) { if (result.hasOption(ARG_SHORT_MAX_WAIT)) { return Integer.parseInt(result.getOptionValue(ARG_SHORT_MAX_WAIT)); @@ -233,8 +289,7 @@ private static BaseOperateCommand initCommand(ConfigResult result) { private static void afterInitCommand(BaseOperateCommand command, ConfigResult result) { if (result.hasOption(ARG_SHORT_ENABLE_SHARDING)) { - boolean shardingEnabled = parseFlag(result.getOptionValue(ARG_SHORT_ENABLE_SHARDING)); - command.setShardingEnabled(shardingEnabled); + command.setShardingEnabled(result.getBooleanFlag(ARG_SHORT_ENABLE_SHARDING)); } } @@ -244,7 +299,7 @@ private static List getTableNames(ConfigResult result) { } String tableNameStr = result.getOptionValue(ARG_SHORT_TABLE); return Lists.newArrayList( - StringUtils.split(tableNameStr, CMD_SEPARATOR)); + StringUtils.split(tableNameStr, ConfigConstant.CMD_SEPARATOR)); } private static List getColumnNames(ConfigResult result) { @@ -253,7 +308,7 @@ private static List getColumnNames(ConfigResult result) { } String columnNameStr = result.getOptionValue(ARG_SHORT_COLUMNS); return Lists.newArrayList( - StringUtils.split(columnNameStr, CMD_SEPARATOR)); + StringUtils.split(columnNameStr, ConfigConstant.CMD_SEPARATOR)); } private static BaseOperateCommand parseImportCommand(ConfigResult result) { @@ -299,21 +354,19 @@ private static Charset getCharset(ConfigResult result) { if (result.hasOption(ARG_SHORT_CHARSET)) { String charset = result.getOptionValue(ARG_SHORT_CHARSET); return Charset.forName(charset); - } else { - return ConfigConstant.DEFAULT_CHARSET; } + return ConfigConstant.DEFAULT_CHARSET; } private static boolean getWithHeader(ConfigResult result) { - return result.hasOption(ARG_SHORT_WITH_HEADER); + return result.getBooleanFlag(ARG_SHORT_WITH_HEADER); } private static CompressMode getCompressMode(ConfigResult result) { if (result.hasOption(ARG_SHORT_COMPRESS)) { return CompressMode.fromString(result.getOptionValue(ARG_SHORT_COMPRESS)); - } else { - return ConfigConstant.DEFAULT_COMPRESS_MODE; } + return ConfigConstant.DEFAULT_COMPRESS_MODE; } private static EncryptionConfig getEncryptionConfig(ConfigResult result) { @@ -321,31 +374,28 @@ private static EncryptionConfig getEncryptionConfig(ConfigResult result) { String encryptionMode = result.getOptionValue(ARG_SHORT_ENCRYPTION); String key = result.getOptionValue(ARG_SHORT_KEY); return EncryptionConfig.parse(encryptionMode, key); - } else { - return DEFAULT_ENCRYPTION_CONFIG; } + return ConfigConstant.DEFAULT_ENCRYPTION_CONFIG; } private static int getReadBlockSizeInMb(ConfigResult result) { if (result.hasOption(ARG_SHORT_READ_BLOCK_SIZE)) { return Integer.parseInt( result.getOptionValue(ARG_SHORT_READ_BLOCK_SIZE)); - } else { - return ConfigConstant.DEFAULT_READ_BLOCK_SIZE_IN_MB; } + return ConfigConstant.DEFAULT_READ_BLOCK_SIZE_IN_MB; } private static boolean getWithLastSep(ConfigResult result) { - return result.hasOption(ARG_SHORT_WITH_LAST_SEP); + return result.getBooleanFlag(ARG_SHORT_WITH_LAST_SEP); } private static FileFormat getFileFormat(ConfigResult result) { if (result.hasOption(ARG_SHORT_FILE_FORMAT)) { String fileFormat = result.getOptionValue(ARG_SHORT_FILE_FORMAT); return FileFormat.fromString(fileFormat); - } else { - return DEFAULT_FILE_FORMAT; } + return ConfigConstant.DEFAULT_FILE_FORMAT; } //endregion 读写文件相关配置 @@ -386,13 +436,13 @@ private static void setOrderBy(ConfigResult result, ExportConfig exportConfig) { if (!result.hasOption(ARG_SHORT_ORDER_COLUMN)) { throw new IllegalArgumentException("Order column name cannot be empty"); } - if (result.hasOption(ARG_SHORT_LOCAL_MERGE)) { + if (result.getBooleanFlag(ARG_SHORT_LOCAL_MERGE)) { exportConfig.setLocalMerge(true); } exportConfig .setAscending(!ConfigConstant.ORDER_BY_TYPE_DESC.equals(result.getOptionValue(ARG_SHORT_ORDER))); List columnNameList = Arrays.asList(StringUtils.split(result.getOptionValue(ARG_SHORT_ORDER_COLUMN), - CMD_SEPARATOR)); + ConfigConstant.CMD_SEPARATOR)); exportConfig.setOrderByColumnNameList(columnNameList); exportConfig.setParallelMerge(getParaMerge(result)); } @@ -438,7 +488,7 @@ private static void setFileNum(ConfigResult result, ExportConfig exportConfig) { } private static boolean getParaMerge(ConfigResult result) { - return result.hasOption(ARG_SHORT_PARALLEL_MERGE); + return result.getBooleanFlag(ARG_SHORT_PARALLEL_MERGE); } //endregion 导出相关设置 @@ -517,11 +567,11 @@ private static String getUseColumns(ConfigResult result) { } private static boolean getWhereInEnabled(ConfigResult result) { - return result.hasOption(ARG_SHORT_USING_IN); + return result.getBooleanFlag(ARG_SHORT_USING_IN); } private static boolean getReadAndProcessFileOnly(ConfigResult result) { - return result.hasOption(ARG_SHORT_READ_FILE_ONLY); + return result.getBooleanFlag(ARG_SHORT_READ_FILE_ONLY); } private static String getDbName(ConfigResult result) { @@ -556,7 +606,7 @@ private static QuoteEncloseMode getQuoteEncloseMode(ConfigResult result) { if (result.hasOption(ARG_SHORT_QUOTE_ENCLOSE_MODE)) { return QuoteEncloseMode.parseMode(result.getOptionValue(ARG_SHORT_QUOTE_ENCLOSE_MODE)); } else { - return DEFAULT_QUOTE_ENCLOSE_MODE; + return ConfigConstant.DEFAULT_QUOTE_ENCLOSE_MODE; } } @@ -575,10 +625,10 @@ private static boolean getForceParallelism(ConfigResult result) { private static List getFileRecordList(ConfigResult result) { if (result.hasOption(ARG_SHORT_FROM_FILE)) { String filePathListStr = result.getOptionValue(ARG_SHORT_FROM_FILE); - return Arrays.stream(StringUtils.split(filePathListStr, CMD_SEPARATOR)) + return Arrays.stream(StringUtils.split(filePathListStr, ConfigConstant.CMD_SEPARATOR)) .filter(StringUtils::isNotBlank) .map(s -> { - String[] strs = StringUtils.split(s, CMD_FILE_LINE_SEPARATOR); + String[] strs = StringUtils.split(s, ConfigConstant.CMD_FILE_LINE_SEPARATOR); if (strs.length == 1) { String fileAbsPath = FileUtil.getFileAbsPath(strs[0]); return new FileLineRecord(fileAbsPath); @@ -611,7 +661,7 @@ private static int getTpsLimit(ConfigResult result) { } private static boolean getInsertIgnoreAndResumeEnabled(ConfigResult result) { - return result.hasOption(ARG_SHORT_IGNORE_AND_RESUME); + return result.getBooleanFlag(ARG_SHORT_IGNORE_AND_RESUME); } private static DdlMode getDdlMode(ConfigResult result) { @@ -624,17 +674,15 @@ private static DdlMode getDdlMode(ConfigResult result) { private static int getMaxErrorCount(ConfigResult result) { if (result.hasOption(ARG_SHORT_MAX_ERROR)) { return Integer.parseInt(result.getOptionValue(ARG_SHORT_MAX_ERROR)); - } else { - return DEFAULT_MAX_ERROR_COUNT; } + return ConfigConstant.DEFAULT_MAX_ERROR_COUNT; } private static String getHistoryFile(ConfigResult result) { if (result.hasOption(ARG_SHORT_HISTORY_FILE)) { return result.getOptionValue(ARG_SHORT_HISTORY_FILE); - } else { - return null; } + return null; } private static String getWhereCondition(ConfigResult result) { @@ -642,11 +690,11 @@ private static String getWhereCondition(ConfigResult result) { } private static boolean getSqlEscapeEnabled(ConfigResult result) { - return !result.hasOption(ARG_SHORT_NO_ESCAPE); + return !result.getBooleanFlag(ARG_SHORT_NO_ESCAPE); } private static boolean getFuncEnabled(ConfigResult result) { - return result.hasOption(ARG_SHORT_SQL_FUNC); + return result.getBooleanFlag(ARG_SHORT_SQL_FUNC); } //endregion 写入数据库操作的设置 @@ -669,7 +717,7 @@ private static void setBatchSize(ConfigResult result) { } private static void setPerfMode(ConfigResult result) { - GlobalVar.IN_PERF_MODE = result.hasOption(ARG_SHORT_PERF_MODE); + GlobalVar.IN_PERF_MODE = result.getBooleanFlag(ARG_SHORT_PERF_MODE); } //endregion 全局相关设置 @@ -719,23 +767,6 @@ private static CommandType lookup(String commandType) { throw new IllegalArgumentException("Do not support command " + commandType); } - /** - * 解析 ON | OFF | TRUE | FALSE 字符串 - */ - private static boolean parseFlag(String flag) { - if (StringUtils.isEmpty(flag)) { - return false; - } - flag = StringUtils.strip(flag); - if (flag.equalsIgnoreCase("ON") || flag.equalsIgnoreCase("TRUE")) { - return true; - } - if (flag.equalsIgnoreCase("OFF") || flag.equalsIgnoreCase("FALSE")) { - return false; - } - throw new IllegalArgumentException("Illegal flag string: " + flag + ". Should be ON or OFF"); - } - public static boolean doHelpCmd(ConfigResult ConfigResult) { if (CommandUtil.isShowHelp(ConfigResult)) { printHelp(); @@ -743,7 +774,7 @@ public static boolean doHelpCmd(ConfigResult ConfigResult) { } if (CommandUtil.isShowVersion(ConfigResult)) { - System.out.printf("%s: %s%n", APP_NAME, Version.getVersion()); + System.out.printf("%s: %s%n", ConfigConstant.APP_NAME, Version.getVersion()); return true; } return false; diff --git a/batch-tool/src/main/java/cmd/ConfigArgOption.java b/batch-tool/src/main/java/cmd/ConfigArgOption.java index bbf40e0..4635bc4 100644 --- a/batch-tool/src/main/java/cmd/ConfigArgOption.java +++ b/batch-tool/src/main/java/cmd/ConfigArgOption.java @@ -22,26 +22,21 @@ public class ConfigArgOption { protected final String desc; protected final String argName; - private ConfigArgOption(String argShort, String argLong, String desc) { - this(argShort, argLong, desc, null); - } - - private ConfigArgOption(String argShort, String argLong, String desc, String argName) { + protected ConfigArgOption(String argShort, String argLong, String desc, String argName) { this.argShort = argShort; this.argLong = argLong; this.desc = desc; this.argName = argName; } - + private static ConfigArgOption of(String argShort, String argLong, String desc) { - return new ConfigArgOption(argShort, argLong, desc); + return new ConfigArgOption(argShort, argLong, desc, null); } private static ConfigArgOption of(String argShort, String argLong, String desc, String argName) { return new ConfigArgOption(argShort, argLong, desc, argName); } - public static final ConfigArgOption ARG_SHORT_HELP = of("help", "help", "Help message."); public static final ConfigArgOption ARG_SHORT_VERSION = @@ -58,13 +53,10 @@ private static ConfigArgOption of(String argShort, String argLong, String desc, of("P", "port", "Port number of database.", "port"); public static final ConfigArgOption ARG_SHORT_DBNAME = of("D", "database", "Database name.", "database"); - public static final ConfigArgOption ARG_SHORT_LOAD_BALANCE = - of("lb", "loadbalance", - "Use jdbc load balance, filling the arg in $host like 'host1:port1,host2:port2'."); public static final ConfigArgOption ARG_SHORT_OPERATION = of("o", "operation", "Batch operation type: export / import / delete / update.", "operation"); public static final ConfigArgOption ARG_SHORT_ORDER = - of("O", "orderby", "Order by type: asc / desc.", "order"); + of("O", "orderby", "Order by type: asc / desc.", "asc | desc"); public static final ConfigArgOption ARG_SHORT_ORDER_COLUMN = of("OC", "orderCol", "Ordered column names.", "col1;col2;col3"); public static final ConfigArgOption ARG_SHORT_COLUMNS = @@ -85,29 +77,17 @@ private static ConfigArgOption of(String argShort, String argLong, String desc, of("H", "historyFile", "History file name.", "filepath"); public static final ConfigArgOption ARG_SHORT_WHERE = of("w", "where", "Where condition: col1>99 AND col2<100 ...", "where condition"); - public static final ConfigArgOption ARG_SHORT_ENABLE_SHARDING = - of("sharding", "sharding", "Whether enable sharding mode.", "ON / OFF"); - public static final ConfigArgOption ARG_SHORT_WITH_HEADER = - of("header", "header", "Whether the header line is column names (default no)."); public static final ConfigArgOption ARG_SHORT_DIRECTORY = of("dir", "directory", "Directory path including files to import.", "directory path"); public static final ConfigArgOption ARG_SHORT_CHARSET = of("cs", "charset", "The charset of files.", "charset"); - public static final ConfigArgOption ARG_SHORT_IGNORE_AND_RESUME = - of("i", "ignore", "Flag of insert ignore and resume breakpoint."); - public static final ConfigArgOption ARG_SHORT_PRODUCER = + public static final ConfigArgOption ARG_SHORT_PRODUCER = of("pro", "producer", "Configure number of producer threads (export / import).", "producer count"); public static final ConfigArgOption ARG_SHORT_CONSUMER = of("con", "consumer", "Configure number of consumer threads.", "consumer count"); public static final ConfigArgOption ARG_SHORT_FORCE_CONSUMER = of("fcon", "forceConsumer", "Configure if allow force consumer parallelism.", "parallelism"); - public static final ConfigArgOption ARG_SHORT_LOCAL_MERGE = - of("local", "localMerge", "Use local merge sort."); - public static final ConfigArgOption ARG_SHORT_SQL_FUNC = - of("func", "sqlFunc", "Use sql function to update."); - public static final ConfigArgOption ARG_SHORT_NO_ESCAPE = - of("noEsc", "noEscape", "Do not escape value for sql."); - public static final ConfigArgOption ARG_SHORT_MAX_CONN_NUM = + public static final ConfigArgOption ARG_SHORT_MAX_CONN_NUM = of("maxConn", "maxConnection", "Max connection count (druid).", "max connection"); public static final ConfigArgOption ARG_SHORT_MAX_WAIT = of("maxWait", "connMaxWait", "Max wait time when getting a connection.", "wait time(ms)"); @@ -123,34 +103,24 @@ private static ConfigArgOption of(String argShort, String argLong, String desc, of("readsize", "readSize", "Read block size.", "size(MB)"); public static final ConfigArgOption ARG_SHORT_RING_BUFFER_SIZE = of("ringsize", "ringSize", "Ring buffer size.", "size (power of 2)"); - public static final ConfigArgOption ARG_SHORT_READ_FILE_ONLY = - of("rfonly", "readFileOnly", "Only read and process file, no sql execution."); - public static final ConfigArgOption ARG_SHORT_USING_IN = - of("in", "whereIn", "Using where cols in (values)."); - public static final ConfigArgOption ARG_SHORT_WITH_LAST_SEP = - of("lastSep", "withLastSep", "Whether line ends with separator."); - public static final ConfigArgOption ARG_SHORT_PARALLEL_MERGE = - of("para", "paraMerge", "Use parallel merge when doing order by export."); - public static final ConfigArgOption ARG_SHORT_QUOTE_ENCLOSE_MODE = + public static final ConfigArgOption ARG_SHORT_QUOTE_ENCLOSE_MODE = of("quote", "quoteMode", - "The mode of how field values are enclosed by double-quotes when exporting table.", - "AUTO (default) / FORCE / NONE"); + "The mode of how field values are enclosed by double-quotes when exporting table (default AUTO).", + "AUTO | FORCE | NONE"); public static final ConfigArgOption ARG_SHORT_TPS_LIMIT = of("tps", "tpsLimit", "Configure of tps limit (default -1: no limit).", "tps limit"); public static final ConfigArgOption ARG_SHORT_WITH_DDL = - of("DDL", "DDL", "Export or import with DDL sql mode.", "NONE (default) / ONLY / WITH"); + of("DDL", "DDL", "Export or import with DDL sql mode (default NONE).", "NONE | ONLY | WITH"); public static final ConfigArgOption ARG_SHORT_COMPRESS = - of("comp", "compress", "Export or import compressed file.", "NONE (default) / GZIP"); + of("comp", "compress", "Export or import compressed file (default NONE).", "NONE | GZIP"); public static final ConfigArgOption ARG_SHORT_ENCRYPTION = - of("encrypt", "encrypt", "Export or import with encrypted file.", "NONE (default) / AES / SM4"); + of("encrypt", "encrypt", "Export or import with encrypted file (default NONE).", "NONE | AES | SM4"); public static final ConfigArgOption ARG_SHORT_KEY = of("key", "secretKey", "Secret key used during encryption.", "string-type key"); public static final ConfigArgOption ARG_SHORT_FILE_FORMAT = - of("format", "fileFormat", "File format.", "NONE (default) / TXT / CSV / XLS / XLSX"); + of("format", "fileFormat", "File format (default NONE).", "NONE | TXT | CSV | XLS | XLSX"); public static final ConfigArgOption ARG_SHORT_MAX_ERROR = of("error", "maxError", "Max error count threshold, program exits when the limit is exceeded.", "max error count"); - public static final ConfigArgOption ARG_SHORT_PERF_MODE = - of("perf", "perfMode", "Use performance mode (at the sacrifice of compatibility.)"); public static final ConfigArgOption ARG_SHORT_MASK = of("mask", "mask", "Masking sensitive columns while exporting data.", "Json format config"); diff --git a/batch-tool/src/main/java/cmd/ConfigResult.java b/batch-tool/src/main/java/cmd/ConfigResult.java index de16111..f857d2f 100644 --- a/batch-tool/src/main/java/cmd/ConfigResult.java +++ b/batch-tool/src/main/java/cmd/ConfigResult.java @@ -16,9 +16,29 @@ package cmd; +import org.apache.commons.lang3.StringUtils; + public interface ConfigResult { boolean hasOption(ConfigArgOption option); String getOptionValue(ConfigArgOption option); + + default boolean getBooleanFlag(FlagOption option) { + String flag = getOptionValue(option); + if (StringUtils.isEmpty(flag)) { + if (option.defaultValue == null) { + throw new IllegalArgumentException("Empty value of option: --" + option.argLong); + } + return option.defaultValue; + } + flag = StringUtils.strip(flag); + if (flag.equalsIgnoreCase("TRUE")) { + return true; + } + if (flag.equalsIgnoreCase("FALSE")) { + return false; + } + throw new IllegalArgumentException("Illegal flag string: " + flag + ". Should be TRUE or FALSE"); + } } diff --git a/batch-tool/src/main/java/cmd/FlagOption.java b/batch-tool/src/main/java/cmd/FlagOption.java new file mode 100644 index 0000000..39a3d7c --- /dev/null +++ b/batch-tool/src/main/java/cmd/FlagOption.java @@ -0,0 +1,65 @@ +/* + * Copyright [2013-2021], Alibaba Group Holding Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd; + + +public class FlagOption extends ConfigArgOption { + + public Boolean defaultValue = null; + + private FlagOption(String argShort, String argLong, String desc, String argName, boolean defaultValue) { + super(argShort, argLong, desc, argName); + this.defaultValue = defaultValue; + } + + /** + * Boolean flag option + */ + private static FlagOption of(String argShort, String argLong, String desc) { + return of(argShort, argLong, desc, null); + } + + private static FlagOption of(String argShort, String argLong, String desc, Boolean defaultValue) { + return new FlagOption(argShort, argLong, desc, "true | false", defaultValue); + } + + public static final FlagOption ARG_SHORT_LOAD_BALANCE = + FlagOption.of("lb", "loadbalance", + "Use jdbc load balance, filling the arg in $host like 'host1:port1,host2:port2' (default false).", false); + public static final FlagOption ARG_SHORT_ENABLE_SHARDING = + of("sharding", "sharding", "Whether enable sharding mode (default value depends on operation)."); + public static final FlagOption ARG_SHORT_WITH_HEADER = + of("header", "header", "Whether the header line is column names (default false).", false); + public static final FlagOption ARG_SHORT_IGNORE_AND_RESUME = + of("i", "ignore", "Flag of insert ignore and resume breakpoint (default false).", false); + public static final FlagOption ARG_SHORT_LOCAL_MERGE = + of("local", "localMerge", "Use local merge sort (default false).", false); + public static final FlagOption ARG_SHORT_SQL_FUNC = + of("func", "sqlFunc", "Use sql function to update (default false).", false); + public static final FlagOption ARG_SHORT_NO_ESCAPE = + of("noEsc", "noEscape", "Do not escape value for sql (default false).", false); + public static final FlagOption ARG_SHORT_READ_FILE_ONLY = + of("rfonly", "readFileOnly", "Only read and process file, no sql execution (default false).", false); + public static final FlagOption ARG_SHORT_USING_IN = + of("in", "whereIn", "Using where cols in (values).", false); + public static final FlagOption ARG_SHORT_WITH_LAST_SEP = + of("lastSep", "withLastSep", "Whether line ends with separator (default false).", false); + public static final FlagOption ARG_SHORT_PARALLEL_MERGE = + of("para", "paraMerge", "Use parallel merge when doing order by export (default false).", false); + public static final FlagOption ARG_SHORT_PERF_MODE = + of("perf", "perfMode", "Use performance mode at the sacrifice of compatibility (default false).", false); +} diff --git a/batch-tool/src/main/java/datasource/DatasourceConstant.java b/batch-tool/src/main/java/datasource/DatasourceConstant.java index 0e80f0b..c5baef4 100644 --- a/batch-tool/src/main/java/datasource/DatasourceConstant.java +++ b/batch-tool/src/main/java/datasource/DatasourceConstant.java @@ -18,7 +18,7 @@ public class DatasourceConstant { - public static final int MAX_CONN_NUM = 1024; + public static final int MAX_CONN_NUM = 218; public static final int MIN_CONN_NUM = 32; /** diff --git a/batch-tool/src/main/java/model/config/BaseConfig.java b/batch-tool/src/main/java/model/config/BaseConfig.java index ee6a623..582d48f 100644 --- a/batch-tool/src/main/java/model/config/BaseConfig.java +++ b/batch-tool/src/main/java/model/config/BaseConfig.java @@ -97,7 +97,7 @@ public String getSeparator() { public void setSeparator(String separator) { if (separator.isEmpty()) { - separator = ConfigConstant.NULL_SEPARATOR; + throw new IllegalArgumentException("Separator cannot be empty"); } // 分隔符不能包含特殊字符 for (String illegalStr : ConfigConstant.ILLEGAL_SEPARATORS) { diff --git a/batch-tool/src/main/java/model/config/ConfigConstant.java b/batch-tool/src/main/java/model/config/ConfigConstant.java index 620f2e4..b8b8b7d 100644 --- a/batch-tool/src/main/java/model/config/ConfigConstant.java +++ b/batch-tool/src/main/java/model/config/ConfigConstant.java @@ -107,8 +107,6 @@ public class ConfigConstant { */ public static final int DEFAULT_MAX_ERROR_COUNT = 0; - public static final boolean DEFAULT_WITH_HEADER = false; - public static final String BROKEN_LINE_FILE_NAME = "err-data"; public static final String ORDER_BY_TYPE_ASC = "asc"; @@ -126,7 +124,8 @@ public class ConfigConstant { * OpenCSV库不支持直接读取一行 需读取出字段再用该魔法值拼接 * FIXME */ - public static final String MAGIC_CSV_SEP = "|@|"; + public static final String MAGIC_CSV_SEP1 = "|@|"; + public static final String MAGIC_CSV_SEP2 = "^@^"; /** * 仅导出时默认开启基于分库分表的模式 @@ -137,6 +136,6 @@ public class ConfigConstant { public static final List ILLEGAL_SEPARATORS = new ArrayList() {{ add("\""); add("\\"); - add(MAGIC_CSV_SEP); + add(MAGIC_CSV_SEP1); }}; } diff --git a/batch-tool/src/main/java/worker/common/BaseWorkHandler.java b/batch-tool/src/main/java/worker/common/BaseWorkHandler.java index 9d6eb3f..85ec703 100644 --- a/batch-tool/src/main/java/worker/common/BaseWorkHandler.java +++ b/batch-tool/src/main/java/worker/common/BaseWorkHandler.java @@ -18,7 +18,6 @@ import com.google.common.util.concurrent.RateLimiter; import com.lmax.disruptor.WorkHandler; -import jdk.nashorn.internal.objects.Global; import model.ConsumerExecutionContext; import model.config.ConfigConstant; import model.config.GlobalVar; @@ -44,7 +43,7 @@ protected void initLocalVars() { return; } if (consumerContext.isUseMagicSeparator()) { - this.sep = ConfigConstant.MAGIC_CSV_SEP; + this.sep = ConfigConstant.MAGIC_CSV_SEP1; hasEscapedQuote = true; } else { this.sep = consumerContext.getSeparator(); diff --git a/batch-tool/src/main/java/worker/common/reader/CsvReader.java b/batch-tool/src/main/java/worker/common/reader/CsvReader.java index b202445..998059d 100644 --- a/batch-tool/src/main/java/worker/common/reader/CsvReader.java +++ b/batch-tool/src/main/java/worker/common/reader/CsvReader.java @@ -66,7 +66,7 @@ protected void readData() { try { for (String[] fields; (fields = reader.readNext()) != null; ) { localProcessingBlockIndex++; - String line = String.join(ConfigConstant.MAGIC_CSV_SEP, fields); + String line = String.join(ConfigConstant.MAGIC_CSV_SEP1, fields); appendToLineBuffer(line); } emitLineBuffer(); diff --git a/batch-tool/src/main/java/worker/common/reader/XlsxReader.java b/batch-tool/src/main/java/worker/common/reader/XlsxReader.java index 78431a9..82640b1 100644 --- a/batch-tool/src/main/java/worker/common/reader/XlsxReader.java +++ b/batch-tool/src/main/java/worker/common/reader/XlsxReader.java @@ -74,7 +74,7 @@ public void invoke(Map map, AnalysisContext analysisContext) { private void appendData(Collection values) { localProcessingBlockIndex++; - String line = String.join(ConfigConstant.MAGIC_CSV_SEP, values); + String line = String.join(ConfigConstant.MAGIC_CSV_SEP1, values); appendToLineBuffer(line); } diff --git a/batch-tool/src/main/java/worker/export/DirectExportWorker.java b/batch-tool/src/main/java/worker/export/DirectExportWorker.java index 3700dd4..92b5702 100644 --- a/batch-tool/src/main/java/worker/export/DirectExportWorker.java +++ b/batch-tool/src/main/java/worker/export/DirectExportWorker.java @@ -216,7 +216,9 @@ private void beforeRun() { } private void afterRun() { - countDownLatch.countDown(); + if (countDownLatch != null) { + countDownLatch.countDown(); + } if (permitted != null) { permitted.release(); } diff --git a/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java b/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java index 3a26d06..518871e 100644 --- a/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java +++ b/batch-tool/src/main/java/worker/factory/ExportWorkerFactory.java @@ -62,9 +62,7 @@ public static DirectExportWorker buildDefaultDirectExportWorker(DataSource druid default: throw new UnsupportedOperationException("Do not support direct export when fixed file num"); } - directExportWorker.setWhereCondition(config.getWhereCondition()); - directExportWorker.putDataMaskerMap(config.getColumnMaskerConfigMap()); - directExportWorker.setWithLastSep(config.isWithLastSep()); + afterInit(directExportWorker, config); return directExportWorker; } @@ -105,7 +103,13 @@ public static DirectOrderExportWorker buildDirectOrderExportWorker(DataSource dr config.getSeparator(), config.isAscending(), config.isWithHeader(), config.getQuoteEncloseMode(), config.getCompressMode(), config.getFileFormat(), cipher); - directOrderExportWorker.setWithLastSep(config.isWithLastSep()); + afterInit(directOrderExportWorker, config); return directOrderExportWorker; } + + public static void afterInit(DirectExportWorker worker, ExportConfig config) { + worker.setWhereCondition(config.getWhereCondition()); + worker.putDataMaskerMap(config.getColumnMaskerConfigMap()); + worker.setWithLastSep(config.isWithLastSep()); + } } diff --git a/batch-tool/src/main/java/worker/insert/ImportConsumer.java b/batch-tool/src/main/java/worker/insert/ImportConsumer.java index 90e5550..4e9ba48 100644 --- a/batch-tool/src/main/java/worker/insert/ImportConsumer.java +++ b/batch-tool/src/main/java/worker/insert/ImportConsumer.java @@ -54,7 +54,7 @@ protected void fillLocalBuffer(StringBuilder stringBuilder, List values) values, consumerContext.isSqlEscapeEnabled(), hasEscapedQuote); } catch (DatabaseException e) { // 在split预处理过后仍存在的问题 - logger.error(StringUtils.join(values, ConfigConstant.MAGIC_CSV_SEP)); + logger.error(StringUtils.join(values, ConfigConstant.MAGIC_CSV_SEP1)); throw new RuntimeException(e); } From def513691b8f35792c35ecb114c4848bcfdef63e Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Wed, 7 Sep 2022 15:15:52 +0800 Subject: [PATCH 10/12] update usage-details with yaml config example --- batch-tool/docs/usage-details.md | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index ccd818e..424d321 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -15,7 +15,7 @@ -F 1 两张表将分别导出为两个单独的文件 `-D sbtest_auto -o export -s , -t "sbtest1;sbtest2" -F 1` -### 整库导出 60 +### 整库导出 -F 1 库中每张表都分别导出为单独的一个文件 `-D sbtest_auto -o export -s , -F 1` @@ -45,7 +45,7 @@ \"c_phone\": { \"type\": \"hiding\", \"show_region\" : \"0-2\", \"show_end\": 4 }"` -原数据 +**原数据** ```text c_custkey|c_name|c_address|c_nationkey|c_phone|c_acctbal|c_mktsegment|c_comment 1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag @@ -55,7 +55,7 @@ c_custkey|c_name|c_address|c_nationkey|c_phone|c_acctbal|c_mktsegment|c_comment 5|Customer#000000005|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole acco ``` -脱敏后数据 +**脱敏后数据** ```text c_custkey|c_name|c_address|c_nationkey|c_phone|c_acctbal|c_mktsegment|c_comment 1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-********2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag @@ -97,9 +97,31 @@ c_custkey|c_name|c_address|c_nationkey|c_phone|c_acctbal|c_mktsegment|c_comment ## 使用yaml配置 当有很多配置项需要设置时,使用命令行参数会很不方便编辑,此时建议使用yaml格式的配置文件,示例如下: -```yaml +**命令行参数**:`-configFile export.yaml` +**expory.yaml 文件** +```yaml +host: xxxx +port: 3306 +user: root +password: xxxxxx +database: tpch_1g +operation: export +sep: "|" +table: customer +filenum: 1 +orderby: asc +orderCol: c_custkey +header: true +mask: >- + { + "c_phone": { + "type": "hiding", + "show_region": "0-2", + "show_end": 4 + } + } ``` 如果配置值包含[yaml特殊字符](https://yaml.org/spec/1.2.2/#53-indicator-characters)的话, 需要用引号括起来。 From e62c052439a77fdd49a11215ebd9f58de8b8babb Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Wed, 7 Sep 2022 15:31:17 +0800 Subject: [PATCH 11/12] fix getYamlOption with null --- batch-tool/src/main/java/cmd/FlagOption.java | 2 +- batch-tool/src/main/java/cmd/YamlConfigResult.java | 9 +++++---- batch-tool/src/main/java/model/config/BaseConfig.java | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/batch-tool/src/main/java/cmd/FlagOption.java b/batch-tool/src/main/java/cmd/FlagOption.java index 39a3d7c..f3b1cb9 100644 --- a/batch-tool/src/main/java/cmd/FlagOption.java +++ b/batch-tool/src/main/java/cmd/FlagOption.java @@ -21,7 +21,7 @@ public class FlagOption extends ConfigArgOption { public Boolean defaultValue = null; - private FlagOption(String argShort, String argLong, String desc, String argName, boolean defaultValue) { + private FlagOption(String argShort, String argLong, String desc, String argName, Boolean defaultValue) { super(argShort, argLong, desc, argName); this.defaultValue = defaultValue; } diff --git a/batch-tool/src/main/java/cmd/YamlConfigResult.java b/batch-tool/src/main/java/cmd/YamlConfigResult.java index 16d13c0..f2ecc54 100644 --- a/batch-tool/src/main/java/cmd/YamlConfigResult.java +++ b/batch-tool/src/main/java/cmd/YamlConfigResult.java @@ -58,11 +58,12 @@ public String getOptionValue(ConfigArgOption option) { private String getYamlOption(ConfigArgOption option) { String result = null; - if (argMap.containsKey(option.argShort)) { - result = String.valueOf(argMap.get(option.argShort)); + Object o; + if ((o = argMap.get(option.argShort)) != null) { + result = String.valueOf(o); } - if (argMap.containsKey(option.argLong)) { - result = String.valueOf(argMap.get(option.argLong)); + if ((o = argMap.get(option.argLong)) != null) { + result = String.valueOf(o); } return result; } diff --git a/batch-tool/src/main/java/model/config/BaseConfig.java b/batch-tool/src/main/java/model/config/BaseConfig.java index 582d48f..51f0a15 100644 --- a/batch-tool/src/main/java/model/config/BaseConfig.java +++ b/batch-tool/src/main/java/model/config/BaseConfig.java @@ -66,7 +66,7 @@ int bitCount() { /** * 第一行是否为字段名 */ - protected boolean isWithHeader = ConfigConstant.DEFAULT_WITH_HEADER; + protected boolean isWithHeader; protected boolean shardingEnabled; From bfbb226841016bb00f8ca0606b6023d35aab9ea6 Mon Sep 17 00:00:00 2001 From: F-ca7 <627955292@qq.com> Date: Wed, 7 Sep 2022 16:12:48 +0800 Subject: [PATCH 12/12] fix command option and update README --- batch-tool/README.md | 169 +++++++++--------- batch-tool/docs/usage-details.md | 2 +- batch-tool/src/main/java/cmd/CommandUtil.java | 11 +- 3 files changed, 90 insertions(+), 92 deletions(-) diff --git a/batch-tool/README.md b/batch-tool/README.md index 555fe65..1ac4cff 100644 --- a/batch-tool/README.md +++ b/batch-tool/README.md @@ -10,92 +10,87 @@ Batch Tool工具是专为 PolarDB-X数据库提供数据导入导出服务的工 ## 快速上手 常见场景与问题排查可参考文档 [usage-details](docs/usage-details.md)。 ### 参数介绍 -命令行用法: +命令行用法:`java -jar batch-tool.jar --help` ``` - usage: BatchTool [-batchsize ] [-con ] [-cs - ] [-D ] [-dir ] [-f ] [-F - ] [-fcon ] [-func] [-h ] [-H - ] [-header] [-help] [-i] [-in] [-initSqls ] - [-L ] [-lastSep] [-lb] [-local] [-maxConn ] [-maxWait - ] [-minConn ] [-noesc] [-O ] [-o - ] [-OC ] [-p ] [-P ] - [-para] [-param ] [-pre ] [-pro ] - [-quote ] [-readsize ] [-rfonly] [-ringsize - ] [-s ] [-t ] [-tps ] [-u ] [-v] - [-w ] - -batchsize,--batchSize Batch size of emitted - tuples. - -con,--consumer Configure number of - consumer threads. - -cs,--charset Define charset of files. - -D,--database Database to use. - -dir,--dir Directory path including - files to import. - -f,--from Source file(s), separated - by ; . - -F,--filenum Fixed number of exported - files. - -fcon,--force consumer Configure if allow force - consumer parallelism. - -func,--sqlfunc Use sql function to update. - -h,--host Connect to host. - -H,--historyFile Configure of historyfile - name. - -header,--header Whether the header line is - column names. - -help,--help Help message. - -i,--ignoreandresume Flag of insert ignore and - resume breakpoint. - -in,--wherein Using where ... in (...) - -initSqls,--initSqls Connection init sqls. - -L,--line Max line limit of exported - files. - -lastSep,--withLastSep Whether line ends with - separator. - -lb,--loadbalance If using load balance. - -local,--localmerge o local merge sort. - -maxConn,--maxConnection Max connection number - limit. - -maxWait,--connMaxWait Max wait time(ms) when - getting a connection. - -minConn,--minConnection Mim connection number - limit. - -noesc,--noescape Don't escape values. - -O,--orderby asc or desc. - -o,--operation Batch operation type: - export / import / delete / - update. - -OC,--orderCol col1;col2;col3. - -p,--password Password to use when - connecting to server. - -P,--port Port number to use for - connection. - -para,--paraMerge Using parallel merge when - doing order by export. - -param,--connParam Connection params - -pre,--prefix Export file name prefix. - -pro,--producer Configure number of - producer threads (export / - import). - -quote,--quoteMode The mode of how field - values are enclosed by - double-quotes when - exporting table. Default - value is auto. - -readsize,--readSize Read block size in MB. - -rfonly,--rfonly Only read and process file, - no sql execution. - -ringsize,--ringBufferSize Ring buffer size. - -s,--sep Separator between fields - (delimiter). - -t,--table
Target table. - -tps,--tpsLimit Configure of tps limit, - default -1: no limit. - -u,--user User for login. - -v,--version Show version - -w,--where Where condition: col1>99 - AND col2<100 ... + usage: BatchTool [-batchsize ] [-col ] [-comp ] [-con ] + [-config ] [-cs ] [-D ] [-DDL ] [-dir ] [-encrypt ] [-error ] [-f ] [-F ] [-fcon ] [-format ] [-func ] [-h + ] [-H ] [-header ] [-help] [-i ] [-in ] + [-initSqls ] [-key ] [-L ] [-lastSep ] [-lb ] [-local ] [-mask ] [-maxConn ] [-maxWait + ] [-minConn ] [-noEsc ] [-o ] [-O ] + [-OC ] [-p ] [-P ] [-para ] [-param + ] [-perf ] [-pre ] [-pro ] [-quote ] [-readsize ] [-rfonly ] [-ringsize ] [-s + ] [-sharding ] [-t ] [-tps ] [-u + ] [-v] [-w ] + -batchsize,--batchSize Batch size of insert. + -col,--columns Target columns for export. + -comp,--compress Export or import compressed file (default NONE). + -con,--consumer Configure number of consumer threads. + -config,--configFile Use yaml config file. + -cs,--charset The charset of files. + -D,--database Database name. + -DDL,--DDL Export or import with DDL sql mode (default NONE). + -dir,--directory Directory path including files to import. + -encrypt,--encrypt Export or import with encrypted file (default NONE). + -error,--maxError Max error count threshold, program exits when the + limit is exceeded. + -f,--file Source file(s). + -F,--filenum Fixed number of exported files. + -fcon,--forceConsumer Configure if allow force consumer parallelism. + -format,--fileFormat File format (default NONE). + -func,--sqlFunc Use sql function to update (default false). + -h,--host Host of database. + -H,--historyFile History file name. + -header,--header Whether the header line is column names (default + false). + -help,--help Help message. + -i,--ignore Flag of insert ignore and resume breakpoint (default + false). + -in,--whereIn Using where cols in (values). + -initSqls,--initSqls Connection init sqls (druid). + -key,--secretKey Secret key used during encryption. + -L,--line Max line limit of one single export file. + -lastSep,--withLastSep Whether line ends with separator (default false). + -lb,--loadbalance Use jdbc load balance, filling the arg in $host like + 'host1:port1,host2:port2' (default false). + -local,--localMerge Use local merge sort (default false). + -mask,--mask Masking sensitive columns while exporting data. + -maxConn,--maxConnection Max connection count (druid). + -maxWait,--connMaxWait Max wait time when getting a connection. + -minConn,--minConnection Min connection count (druid). + -noEsc,--noEscape Do not escape value for sql (default false). + -o,--operation Batch operation type: export / import / delete / + update. + -O,--orderby Order by type: asc / desc. + -OC,--orderCol Ordered column names. + -p,--password Password of user. + -P,--port Port number of database. + -para,--paraMerge Use parallel merge when doing order by export + (default false). + -param,--connParam Jdbc connection params. + -perf,--perfMode Use performance mode at the sacrifice of compatibility + (default false). + -pre,--prefix Export file name prefix. + -pro,--producer Configure number of producer threads (export / + import). + -quote,--quoteMode The mode of how field values are enclosed by + double-quotes when exporting table (default AUTO). + -readsize,--readSize Read block size. + -rfonly,--readFileOnly Only read and process file, no sql execution (default + false). + -ringsize,--ringSize Ring buffer size. + -s,--sep Separator between fields (delimiter). + -sharding,--sharding Whether enable sharding mode (default value depends on + operation). + -t,--table Target table. + -tps,--tpsLimit Configure of tps limit (default -1: no limit). + -u,--user User for login. + -v,--version Show batch-tool version. + -w,--where Where condition: col1>99 AND col2<100 ... ``` 命令主要分别为两个类别: @@ -110,6 +105,8 @@ Batch Tool工具是专为 PolarDB-X数据库提供数据导入导出服务的工 - 文件数量、文件行数等导出配置 - insert ingore、断点续传等导入配置 - where、order by等sql条件 + - 压缩算法、加密算法、脱敏算法 + - 文件格式:csv、excel、txt等 - 批处理性能参数 - 生产者、消费者并行度设置 - ringBuffer缓冲区、批数量、读取文件块等大小设置 @@ -161,8 +158,8 @@ Batch Tool工具是专为 PolarDB-X数据库提供数据导入导出服务的工 - [x] 对接新分区表 - [ ] 调优实践 - [x] 指定字段(包括顺序)的导入导出 -- [ ] 简单的数据清洗,如trim尾部空格、日期时间格式等 -- [ ] 基于SQL函数的数据清洗,以及AES加解密函数调用 +- [ ] 简单的数据清洗,如:trim尾部空格、日期时间格式等 +- [x] 数据脱敏功能,如:掩码、哈希、加密、取整等 - [ ] 可视化监控 - [x] 错误情况下的断点记录(精确到行/块) - [x] 限流功能 diff --git a/batch-tool/docs/usage-details.md b/batch-tool/docs/usage-details.md index 424d321..05461b2 100644 --- a/batch-tool/docs/usage-details.md +++ b/batch-tool/docs/usage-details.md @@ -151,5 +151,5 @@ mask: >- ``` > ^A 为 \x01 的Caret notation - **解决**:输入`-s $'\x01'` 即可。 + **解决**:输入`-s $'\x01'` 即可。 > 暂时无法处理NULL字符(`\x00`)作为分隔符,可以通过修改源代码解决。 \ No newline at end of file diff --git a/batch-tool/src/main/java/cmd/CommandUtil.java b/batch-tool/src/main/java/cmd/CommandUtil.java index fb8ef80..d9e5f3e 100644 --- a/batch-tool/src/main/java/cmd/CommandUtil.java +++ b/batch-tool/src/main/java/cmd/CommandUtil.java @@ -120,16 +120,17 @@ public class CommandUtil { static { formatter.setWidth(110); - addCommandOptions(); + addCommandOptions(ConfigArgOption.class); + addCommandOptions(FlagOption.class); } - private static void addCommandOptions() { - Field[] fields = ConfigArgOption.class.getFields(); + private static void addCommandOptions(Class clazz) { + Field[] fields = clazz.getFields(); try { for (Field field : fields) { if (Modifier.isStatic(field.getModifiers()) - && field.getType() == ConfigArgOption.class) { - ConfigArgOption option = (ConfigArgOption) field.get(ConfigArgOption.class); + && field.getType() == clazz) { + ConfigArgOption option = (ConfigArgOption) field.get(clazz); addConfigOption(option); } }