From 60fb68f19e4830bb5b69185aebb760ed4167e22c Mon Sep 17 00:00:00 2001 From: WJX20 <1837862986@qq.com> Date: Tue, 11 Nov 2025 14:49:58 +0800 Subject: [PATCH 1/8] Added three new configuration parameters: "batch-compare-size", "batch-offset-size", "batch-check-size" --- .../core/threading/DataComparisonThread.java | 24 +++++++++++++++++++ .../core/threading/DataValidationThread.java | 10 +++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java index 5848b57..339c192 100644 --- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java @@ -29,6 +29,7 @@ import com.crunchydata.model.DataComparisonTableMap; import com.crunchydata.model.DataComparisonResult; import com.crunchydata.util.*; +import org.apache.commons.lang3.StringUtils; import static com.crunchydata.service.DatabaseConnectionService.getConnection; import static com.crunchydata.util.HashingUtils.getMd5; @@ -81,6 +82,8 @@ public void run() { int totalRows = 0; int reportedRows = 0; // Track rows already reported to database int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size")); + int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size")); + int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size")); int fetchSize = Integer.parseInt(Props.getProperty("batch-fetch-size")); boolean useLoaderThreads = Integer.parseInt(Props.getProperty("loader-threads")) > 0; boolean observerThrottle = Boolean.parseBoolean(Props.getProperty("observer-throttle")); @@ -119,6 +122,27 @@ public void run() { sql += " ORDER BY " + pkList; } + String batchCompareSize = Props.getProperty("batch-compare-size"); + String batchOffsetSize = Props.getProperty("batch-offset-size"); + + if (StringUtils.isNotEmpty(batchCompareSize) && StringUtils.isNotEmpty(batchOffsetSize)) { + String dbType = Props.getProperty(targetType + "-type"); + + switch (dbType.toLowerCase()) { + case "oracle": + case "db2": + case "mssql": + sql += " OFFSET " + batchOffsetSize + " ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY"; + break; + case "mysql": + case "postgres": + sql += " LIMIT " + batchCompareSize + " OFFSET " + batchOffsetSize; + break; + default: + sql += " LIMIT " + batchCompareSize; + } + } + //conn.setAutoCommit(false); stmt = conn.prepareStatement(sql); stmt.setFetchSize(fetchSize); diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java index 72e5d77..38a9e85 100644 --- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java @@ -35,6 +35,7 @@ import com.crunchydata.util.DataProcessingUtils; import com.crunchydata.util.LoggingUtils; +import org.apache.commons.lang3.StringUtils; import org.json.JSONArray; import org.json.JSONObject; @@ -83,9 +84,16 @@ public static JSONObject checkRows (Connection repoConn, Connection sourceConn, PreparedStatement stmt = null; ResultSet rs = null; + String SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT; + String batchCheckSize = Props.getProperty("batch-check-size"); try { - stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS); + if (StringUtils.isNotEmpty(batchCheckSize)) { + SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT = SQL_REPO_SELECT_OUTOFSYNC_ROWS + " LIMIT " + batchCheckSize; + stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT); + } else { + stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS); + } stmt.setObject(1, dct.getTid()); stmt.setObject(2, dct.getTid()); rs = stmt.executeQuery(); From 83eb16465069d5b52aae3bbce233f94ae1fe274e Mon Sep 17 00:00:00 2001 From: wjxKOI <105590180+WJX20@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:06:06 +0800 Subject: [PATCH 2/8] Document batch-offset-size, batch-compare-size, and batch-check-size Added configuration options for batch processing. --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 0a5ddcb..bd7873d 100644 --- a/README.md +++ b/README.md @@ -332,6 +332,26 @@ Properties are categorized into four sections: system, repository, source, and t Default: 0000000000000000000000.0000000000000000000000 +#### batch-offset-size + + This configuration indicates from which data line the hash value comparison begins to be generated. + + batch-offset-size Default: 0 + +#### batch-compare-size + + This configuration indicates how many Hash values will be generated. + + Default: 2000 + +These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1000 to 2000 or from 5000 to 10000. + +#### batch-check-size + + This configuration indicates how many "check validations" are to be performed. + + Default: 1000 + ### Repository #### repo-dbname From 3e15dfe96fdf45e49d5974542ec5c27652e9d6bc Mon Sep 17 00:00:00 2001 From: wjxKOI <105590180+WJX20@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:10:04 +0800 Subject: [PATCH 3/8] Fix formatting for batch-offset-size default value --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd7873d..953bd99 100644 --- a/README.md +++ b/README.md @@ -336,7 +336,7 @@ Properties are categorized into four sections: system, repository, source, and t This configuration indicates from which data line the hash value comparison begins to be generated. - batch-offset-size Default: 0 + Default: 0 #### batch-compare-size From 9bf2da450b9769bbbe2b9e06cac55edd6c6d5978 Mon Sep 17 00:00:00 2001 From: WJX20 <1837862986@qq.com> Date: Tue, 11 Nov 2025 15:24:55 +0800 Subject: [PATCH 4/8] Modify default values of configuration --- pgcompare.properties.sample | 15 +++++++++++++++ .../java/com/crunchydata/config/Settings.java | 3 +++ .../core/threading/DataComparisonThread.java | 1 + 3 files changed, 19 insertions(+) diff --git a/pgcompare.properties.sample b/pgcompare.properties.sample index 46cc81f..0d3c1db 100644 --- a/pgcompare.properties.sample +++ b/pgcompare.properties.sample @@ -58,6 +58,21 @@ log-level = INFO # default: true database-sort = true +# This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison. +# default: 0 +batch-offset-size = 0 + +# This configuration indicates how many "hash values" will be generated. +# default: 2000 +batch-compare-size = 2000 + +#"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000. + +# This configuration indicates how many "check validations" are to be performed. +# default: 1000 +batch-check-size = 1000 + + ################################## # repository ################################## diff --git a/src/main/java/com/crunchydata/config/Settings.java b/src/main/java/com/crunchydata/config/Settings.java index aec2a80..83a7fd9 100644 --- a/src/main/java/com/crunchydata/config/Settings.java +++ b/src/main/java/com/crunchydata/config/Settings.java @@ -123,6 +123,9 @@ public static Properties setDefaults() { defaultProps.setProperty("observer-vacuum","true"); defaultProps.setProperty("stage-table-parallel","0"); defaultProps.setProperty("standard-number-format","0000000000000000000000.0000000000000000000000"); + defaultProps.setProperty("batch-offset-size","0"); + defaultProps.setProperty("batch-compare-size","2000"); + defaultProps.setProperty("batch-check-size","1000"); // Repository diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java index 339c192..075a54c 100644 --- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java @@ -136,6 +136,7 @@ public void run() { break; case "mysql": case "postgres": + case "snowflake": sql += " LIMIT " + batchCompareSize + " OFFSET " + batchOffsetSize; break; default: From 5366cb44af08f1685b1e686271413ac825945a27 Mon Sep 17 00:00:00 2001 From: wjxKOI <105590180+WJX20@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:28:32 +0800 Subject: [PATCH 5/8] Update README with clearer configuration descriptions Clarified the descriptions for batch-offset-size and batch-compare-size configurations in the README. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 953bd99..9b4cf7e 100644 --- a/README.md +++ b/README.md @@ -334,7 +334,7 @@ Properties are categorized into four sections: system, repository, source, and t #### batch-offset-size - This configuration indicates from which data line the hash value comparison begins to be generated. + This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison. Default: 0 @@ -344,7 +344,7 @@ Properties are categorized into four sections: system, repository, source, and t Default: 2000 -These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1000 to 2000 or from 5000 to 10000. +"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000. #### batch-check-size From 6b2c06ba2f7eaee320aafb03425efb5dd4ad7d07 Mon Sep 17 00:00:00 2001 From: WJX20 <1837862986@qq.com> Date: Tue, 11 Nov 2025 15:45:33 +0800 Subject: [PATCH 6/8] Remove redundant names --- .../com/crunchydata/core/threading/DataComparisonThread.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java index 075a54c..44a7a8e 100644 --- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java @@ -82,8 +82,6 @@ public void run() { int totalRows = 0; int reportedRows = 0; // Track rows already reported to database int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size")); - int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size")); - int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size")); int fetchSize = Integer.parseInt(Props.getProperty("batch-fetch-size")); boolean useLoaderThreads = Integer.parseInt(Props.getProperty("loader-threads")) > 0; boolean observerThrottle = Boolean.parseBoolean(Props.getProperty("observer-throttle")); From 6867170ed21792ff8be2aeb36dd7b1e45c6e2a41 Mon Sep 17 00:00:00 2001 From: WJX20 <1837862986@qq.com> Date: Fri, 14 Nov 2025 17:08:30 +0800 Subject: [PATCH 7/8] Remove the condition of "checking that configuration parameters are empty". Remove offset,Using the method of directly specifying the initial value --- pgcompare.properties.sample | 6 +++--- .../java/com/crunchydata/config/Settings.java | 2 +- .../core/threading/DataComparisonThread.java | 15 ++++++++++----- .../core/threading/DataValidationThread.java | 4 ++-- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pgcompare.properties.sample b/pgcompare.properties.sample index 0d3c1db..00e4ba2 100644 --- a/pgcompare.properties.sample +++ b/pgcompare.properties.sample @@ -58,15 +58,15 @@ log-level = INFO # default: true database-sort = true -# This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison. +# This configuration indicates that the hash value will be generated starting from the (n + 1)th data item for comparison.. # default: 0 -batch-offset-size = 0 +batch-start-size = 0 # This configuration indicates how many "hash values" will be generated. # default: 2000 batch-compare-size = 2000 -#"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000. +#"batch-start-size" & "batch-compare-size": These two configurations are used for conducting sample queries on the data, so as to perform the "hash comparison" when generating it. For instance, only the data ranging from 1001 to 2000 or from 5001 to 10000 can be compared. # This configuration indicates how many "check validations" are to be performed. # default: 1000 diff --git a/src/main/java/com/crunchydata/config/Settings.java b/src/main/java/com/crunchydata/config/Settings.java index 83a7fd9..6e6423f 100644 --- a/src/main/java/com/crunchydata/config/Settings.java +++ b/src/main/java/com/crunchydata/config/Settings.java @@ -123,7 +123,7 @@ public static Properties setDefaults() { defaultProps.setProperty("observer-vacuum","true"); defaultProps.setProperty("stage-table-parallel","0"); defaultProps.setProperty("standard-number-format","0000000000000000000000.0000000000000000000000"); - defaultProps.setProperty("batch-offset-size","0"); + defaultProps.setProperty("batch-start-size","0"); defaultProps.setProperty("batch-compare-size","2000"); defaultProps.setProperty("batch-check-size","1000"); diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java index 44a7a8e..3bf0dc1 100644 --- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java @@ -90,6 +90,9 @@ public void run() { DecimalFormat formatter = new DecimalFormat("#,###"); int loadRowCount = Integer.parseInt(Props.getProperty("batch-progress-report-size")); int observerRowCount = Integer.parseInt(Props.getProperty("observer-throttle-size")); + + int batchCompareSize = Integer.parseInt(Props.getProperty("batch-compare-size")); + int batchStartSize = Integer.parseInt(Props.getProperty("batch-start-size")); // Database resources Connection conn = null; @@ -117,25 +120,27 @@ public void run() { } if (!pkList.isEmpty() && Props.getProperty("database-sort").equals("true")) { + if (batchStartSize >= 0) { + sql += " AND " + pkList + ">" + batchStartSize; + } sql += " ORDER BY " + pkList; } - String batchCompareSize = Props.getProperty("batch-compare-size"); - String batchOffsetSize = Props.getProperty("batch-offset-size"); - if (StringUtils.isNotEmpty(batchCompareSize) && StringUtils.isNotEmpty(batchOffsetSize)) { + if (batchCompareSize > 0) { String dbType = Props.getProperty(targetType + "-type"); switch (dbType.toLowerCase()) { case "oracle": case "db2": case "mssql": - sql += " OFFSET " + batchOffsetSize + " ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY"; + sql += " OFFSET 0 ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY"; break; case "mysql": + case "mariadb": case "postgres": case "snowflake": - sql += " LIMIT " + batchCompareSize + " OFFSET " + batchOffsetSize; + sql += " LIMIT " + batchCompareSize; break; default: sql += " LIMIT " + batchCompareSize; diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java index 38a9e85..42722b4 100644 --- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java @@ -85,10 +85,10 @@ public static JSONObject checkRows (Connection repoConn, Connection sourceConn, PreparedStatement stmt = null; ResultSet rs = null; String SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT; - String batchCheckSize = Props.getProperty("batch-check-size"); + int batchCheckSize = Integer.parseInt(Props.getProperty("batch-check-size")); try { - if (StringUtils.isNotEmpty(batchCheckSize)) { + if (batchCheckSize > 0) { SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT = SQL_REPO_SELECT_OUTOFSYNC_ROWS + " LIMIT " + batchCheckSize; stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT); } else { From 197b937448f2d93280336b4167c35e838d358b38 Mon Sep 17 00:00:00 2001 From: WJX20 <1837862986@qq.com> Date: Fri, 14 Nov 2025 17:11:47 +0800 Subject: [PATCH 8/8] comment remove --- .../com/crunchydata/core/threading/DataComparisonThread.java | 1 - .../com/crunchydata/core/threading/DataValidationThread.java | 1 - 2 files changed, 2 deletions(-) diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java index 3bf0dc1..d003122 100644 --- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java @@ -29,7 +29,6 @@ import com.crunchydata.model.DataComparisonTableMap; import com.crunchydata.model.DataComparisonResult; import com.crunchydata.util.*; -import org.apache.commons.lang3.StringUtils; import static com.crunchydata.service.DatabaseConnectionService.getConnection; import static com.crunchydata.util.HashingUtils.getMd5; diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java index 42722b4..25067f3 100644 --- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java +++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java @@ -35,7 +35,6 @@ import com.crunchydata.util.DataProcessingUtils; import com.crunchydata.util.LoggingUtils; -import org.apache.commons.lang3.StringUtils; import org.json.JSONArray; import org.json.JSONObject;