diff --git a/README.md b/README.md index 0a5ddcb..9b4cf7e 100644 --- a/README.md +++ b/README.md @@ -332,6 +332,26 @@ Properties are categorized into four sections: system, repository, source, and t Default: 0000000000000000000000.0000000000000000000000 +#### batch-offset-size + + This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison. + + Default: 0 + +#### batch-compare-size + + This configuration indicates how many Hash values will be generated. + + Default: 2000 + +"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000. + +#### batch-check-size + + This configuration indicates how many "check validations" are to be performed. + + Default: 1000 + ### Repository #### repo-dbname diff --git a/pgcompare.properties.sample b/pgcompare.properties.sample index 46cc81f..00e4ba2 100644 --- a/pgcompare.properties.sample +++ b/pgcompare.properties.sample @@ -58,6 +58,21 @@ log-level = INFO # default: true database-sort = true +# This configuration indicates that the hash value will be generated starting from the (n + 1)th data item for comparison.. +# default: 0 +batch-start-size = 0 + +# This configuration indicates how many "hash values" will be generated. +# default: 2000 +batch-compare-size = 2000 + +#"batch-start-size" & "batch-compare-size": These two configurations are used for conducting sample queries on the data, so as to perform the "hash comparison" when generating it. For instance, only the data ranging from 1001 to 2000 or from 5001 to 10000 can be compared. + +# This configuration indicates how many "check validations" are to be performed. +# default: 1000 +batch-check-size = 1000 + + ################################## # repository ################################## diff --git a/src/main/java/com/crunchydata/config/Settings.java b/src/main/java/com/crunchydata/config/Settings.java index aec2a80..6e6423f 100644 --- a/src/main/java/com/crunchydata/config/Settings.java +++ b/src/main/java/com/crunchydata/config/Settings.java @@ -123,6 +123,9 @@ public static Properties setDefaults() { defaultProps.setProperty("observer-vacuum","true"); defaultProps.setProperty("stage-table-parallel","0"); defaultProps.setProperty("standard-number-format","0000000000000000000000.0000000000000000000000"); + defaultProps.setProperty("batch-start-size","0"); + defaultProps.setProperty("batch-compare-size","2000"); + defaultProps.setProperty("batch-check-size","1000"); // Repository diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java index 5848b57..d003122 100644 --- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java @@ -89,6 +89,9 @@ public void run() { DecimalFormat formatter = new DecimalFormat("#,###"); int loadRowCount = Integer.parseInt(Props.getProperty("batch-progress-report-size")); int observerRowCount = Integer.parseInt(Props.getProperty("observer-throttle-size")); + + int batchCompareSize = Integer.parseInt(Props.getProperty("batch-compare-size")); + int batchStartSize = Integer.parseInt(Props.getProperty("batch-start-size")); // Database resources Connection conn = null; @@ -116,9 +119,33 @@ public void run() { } if (!pkList.isEmpty() && Props.getProperty("database-sort").equals("true")) { + if (batchStartSize >= 0) { + sql += " AND " + pkList + ">" + batchStartSize; + } sql += " ORDER BY " + pkList; } + + if (batchCompareSize > 0) { + String dbType = Props.getProperty(targetType + "-type"); + + switch (dbType.toLowerCase()) { + case "oracle": + case "db2": + case "mssql": + sql += " OFFSET 0 ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY"; + break; + case "mysql": + case "mariadb": + case "postgres": + case "snowflake": + sql += " LIMIT " + batchCompareSize; + break; + default: + sql += " LIMIT " + batchCompareSize; + } + } + //conn.setAutoCommit(false); stmt = conn.prepareStatement(sql); stmt.setFetchSize(fetchSize); diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java index 72e5d77..25067f3 100644 --- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java @@ -83,9 +83,16 @@ public static JSONObject checkRows (Connection repoConn, Connection sourceConn, PreparedStatement stmt = null; ResultSet rs = null; + String SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT; + int batchCheckSize = Integer.parseInt(Props.getProperty("batch-check-size")); try { - stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS); + if (batchCheckSize > 0) { + SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT = SQL_REPO_SELECT_OUTOFSYNC_ROWS + " LIMIT " + batchCheckSize; + stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT); + } else { + stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS); + } stmt.setObject(1, dct.getTid()); stmt.setObject(2, dct.getTid()); rs = stmt.executeQuery();