From 60fb68f19e4830bb5b69185aebb760ed4167e22c Mon Sep 17 00:00:00 2001
From: WJX20 <1837862986@qq.com>
Date: Tue, 11 Nov 2025 14:49:58 +0800
Subject: [PATCH 1/8] Added three new configuration parameters:
 "batch-compare-size", "batch-offset-size", "batch-check-size"

---
 .../core/threading/DataComparisonThread.java  | 24 +++++++++++++++++++
 .../core/threading/DataValidationThread.java  | 10 +++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
index 5848b57..339c192 100644
--- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
@@ -29,6 +29,7 @@
 import com.crunchydata.model.DataComparisonTableMap;
 import com.crunchydata.model.DataComparisonResult;
 import com.crunchydata.util.*;
+import org.apache.commons.lang3.StringUtils;
 
 import static com.crunchydata.service.DatabaseConnectionService.getConnection;
 import static com.crunchydata.util.HashingUtils.getMd5;
@@ -81,6 +82,8 @@ public void run() {
         int totalRows = 0;
         int reportedRows = 0; // Track rows already reported to database
         int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size"));
+        int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size"));
+        int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size"));
         int fetchSize = Integer.parseInt(Props.getProperty("batch-fetch-size"));
         boolean useLoaderThreads = Integer.parseInt(Props.getProperty("loader-threads")) > 0;
         boolean observerThrottle = Boolean.parseBoolean(Props.getProperty("observer-throttle"));
@@ -119,6 +122,27 @@ public void run() {
                 sql += " ORDER BY " + pkList;
             }
 
+            String batchCompareSize = Props.getProperty("batch-compare-size");
+            String batchOffsetSize = Props.getProperty("batch-offset-size");
+
+            if (StringUtils.isNotEmpty(batchCompareSize) && StringUtils.isNotEmpty(batchOffsetSize)) {
+                String dbType = Props.getProperty(targetType + "-type");
+
+                switch (dbType.toLowerCase()) {
+                    case "oracle":
+                    case "db2":
+                    case "mssql":
+                        sql += " OFFSET " + batchOffsetSize + " ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY";
+                        break;
+                    case "mysql":
+                    case "postgres":
+                        sql += " LIMIT " + batchCompareSize + " OFFSET " + batchOffsetSize;
+                        break;
+                    default:
+                        sql += " LIMIT " + batchCompareSize;
+                }
+            }
+
             //conn.setAutoCommit(false);
             stmt = conn.prepareStatement(sql);
             stmt.setFetchSize(fetchSize);
diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
index 72e5d77..38a9e85 100644
--- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
@@ -35,6 +35,7 @@
 import com.crunchydata.util.DataProcessingUtils;
 import com.crunchydata.util.LoggingUtils;
 
+import org.apache.commons.lang3.StringUtils;
 import org.json.JSONArray;
 import org.json.JSONObject;
 
@@ -83,9 +84,16 @@ public static JSONObject checkRows (Connection repoConn, Connection sourceConn,
 
         PreparedStatement stmt = null;
         ResultSet rs = null;
+        String SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT;
+        String batchCheckSize = Props.getProperty("batch-check-size");
         
         try {
-            stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS);
+            if (StringUtils.isNotEmpty(batchCheckSize)) {
+                SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT = SQL_REPO_SELECT_OUTOFSYNC_ROWS + " LIMIT " + batchCheckSize;
+                stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT);
+            } else {
+                stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS);
+            }
             stmt.setObject(1, dct.getTid());
             stmt.setObject(2, dct.getTid());
             rs = stmt.executeQuery();

From 83eb16465069d5b52aae3bbce233f94ae1fe274e Mon Sep 17 00:00:00 2001
From: wjxKOI <105590180+WJX20@users.noreply.github.com>
Date: Tue, 11 Nov 2025 15:06:06 +0800
Subject: [PATCH 2/8] Document batch-offset-size, batch-compare-size, and
 batch-check-size

Added configuration options for batch processing.
---
 README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/README.md b/README.md
index 0a5ddcb..bd7873d 100644
--- a/README.md
+++ b/README.md
@@ -332,6 +332,26 @@ Properties are categorized into four sections: system, repository, source, and t
   
   Default: 0000000000000000000000.0000000000000000000000
 
+#### batch-offset-size
+  
+  This configuration indicates from which data line the hash value comparison begins to be generated. 
+  
+  batch-offset-size Default: 0
+  
+#### batch-compare-size
+  
+  This configuration indicates how many Hash values will be generated. 
+  
+  Default: 2000
+  
+These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1000 to 2000 or from 5000 to 10000.
+
+#### batch-check-size
+  
+  This configuration indicates how many "check validations" are to be performed. 
+  
+  Default: 1000
+
 ### Repository
 
 #### repo-dbname

From 3e15dfe96fdf45e49d5974542ec5c27652e9d6bc Mon Sep 17 00:00:00 2001
From: wjxKOI <105590180+WJX20@users.noreply.github.com>
Date: Tue, 11 Nov 2025 15:10:04 +0800
Subject: [PATCH 3/8] Fix formatting for batch-offset-size default value

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bd7873d..953bd99 100644
--- a/README.md
+++ b/README.md
@@ -336,7 +336,7 @@ Properties are categorized into four sections: system, repository, source, and t
   
   This configuration indicates from which data line the hash value comparison begins to be generated. 
   
-  batch-offset-size Default: 0
+  Default: 0
   
 #### batch-compare-size
   

From 9bf2da450b9769bbbe2b9e06cac55edd6c6d5978 Mon Sep 17 00:00:00 2001
From: WJX20 <1837862986@qq.com>
Date: Tue, 11 Nov 2025 15:24:55 +0800
Subject: [PATCH 4/8] Modify default values of configuration

---
 pgcompare.properties.sample                       | 15 +++++++++++++++
 .../java/com/crunchydata/config/Settings.java     |  3 +++
 .../core/threading/DataComparisonThread.java      |  1 +
 3 files changed, 19 insertions(+)

diff --git a/pgcompare.properties.sample b/pgcompare.properties.sample
index 46cc81f..0d3c1db 100644
--- a/pgcompare.properties.sample
+++ b/pgcompare.properties.sample
@@ -58,6 +58,21 @@ log-level = INFO
 # default: true
 database-sort = true
 
+# This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison.
+# default: 0
+batch-offset-size = 0
+
+# This configuration indicates how many "hash values" will be generated.
+# default: 2000
+batch-compare-size = 2000
+
+#"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000.
+
+# This configuration indicates how many "check validations" are to be performed.
+# default: 1000
+batch-check-size = 1000
+
+
 ##################################
 # repository
 ##################################
diff --git a/src/main/java/com/crunchydata/config/Settings.java b/src/main/java/com/crunchydata/config/Settings.java
index aec2a80..83a7fd9 100644
--- a/src/main/java/com/crunchydata/config/Settings.java
+++ b/src/main/java/com/crunchydata/config/Settings.java
@@ -123,6 +123,9 @@ public static Properties setDefaults() {
         defaultProps.setProperty("observer-vacuum","true");
         defaultProps.setProperty("stage-table-parallel","0");
         defaultProps.setProperty("standard-number-format","0000000000000000000000.0000000000000000000000");
+        defaultProps.setProperty("batch-offset-size","0");
+        defaultProps.setProperty("batch-compare-size","2000");
+        defaultProps.setProperty("batch-check-size","1000");
 
 
         // Repository
diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
index 339c192..075a54c 100644
--- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
@@ -136,6 +136,7 @@ public void run() {
                         break;
                     case "mysql":
                     case "postgres":
+                    case "snowflake":
                         sql += " LIMIT " + batchCompareSize + " OFFSET " + batchOffsetSize;
                         break;
                     default:

From 5366cb44af08f1685b1e686271413ac825945a27 Mon Sep 17 00:00:00 2001
From: wjxKOI <105590180+WJX20@users.noreply.github.com>
Date: Tue, 11 Nov 2025 15:28:32 +0800
Subject: [PATCH 5/8] Update README with clearer configuration descriptions

Clarified the descriptions for batch-offset-size and batch-compare-size configurations in the README.
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 953bd99..9b4cf7e 100644
--- a/README.md
+++ b/README.md
@@ -334,7 +334,7 @@ Properties are categorized into four sections: system, repository, source, and t
 
 #### batch-offset-size
   
-  This configuration indicates from which data line the hash value comparison begins to be generated. 
+  This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison. 
   
   Default: 0
   
@@ -344,7 +344,7 @@ Properties are categorized into four sections: system, repository, source, and t
   
   Default: 2000
   
-These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1000 to 2000 or from 5000 to 10000.
+"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000.
 
 #### batch-check-size
   

From 6b2c06ba2f7eaee320aafb03425efb5dd4ad7d07 Mon Sep 17 00:00:00 2001
From: WJX20 <1837862986@qq.com>
Date: Tue, 11 Nov 2025 15:45:33 +0800
Subject: [PATCH 6/8] Remove redundant names

---
 .../com/crunchydata/core/threading/DataComparisonThread.java    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
index 075a54c..44a7a8e 100644
--- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
@@ -82,8 +82,6 @@ public void run() {
         int totalRows = 0;
         int reportedRows = 0; // Track rows already reported to database
         int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size"));
-        int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size"));
-        int batchCommitSize = Integer.parseInt(Props.getProperty("batch-commit-size"));
         int fetchSize = Integer.parseInt(Props.getProperty("batch-fetch-size"));
         boolean useLoaderThreads = Integer.parseInt(Props.getProperty("loader-threads")) > 0;
         boolean observerThrottle = Boolean.parseBoolean(Props.getProperty("observer-throttle"));

From 6867170ed21792ff8be2aeb36dd7b1e45c6e2a41 Mon Sep 17 00:00:00 2001
From: WJX20 <1837862986@qq.com>
Date: Fri, 14 Nov 2025 17:08:30 +0800
Subject: [PATCH 7/8] Remove the condition of "checking that configuration
 parameters are empty". Remove offset,Using the method of directly specifying
 the initial value

---
 pgcompare.properties.sample                       |  6 +++---
 .../java/com/crunchydata/config/Settings.java     |  2 +-
 .../core/threading/DataComparisonThread.java      | 15 ++++++++++-----
 .../core/threading/DataValidationThread.java      |  4 ++--
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/pgcompare.properties.sample b/pgcompare.properties.sample
index 0d3c1db..00e4ba2 100644
--- a/pgcompare.properties.sample
+++ b/pgcompare.properties.sample
@@ -58,15 +58,15 @@ log-level = INFO
 # default: true
 database-sort = true
 
-# This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison.
+# This configuration indicates that the hash value will be generated starting from the (n + 1)th data item for comparison..
 # default: 0
-batch-offset-size = 0
+batch-start-size = 0
 
 # This configuration indicates how many "hash values" will be generated.
 # default: 2000
 batch-compare-size = 2000
 
-#"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000.
+#"batch-start-size" & "batch-compare-size": These two configurations are used for conducting sample queries on the data, so as to perform the "hash comparison" when generating it. For instance, only the data ranging from 1001 to 2000 or from 5001 to 10000 can be compared.
 
 # This configuration indicates how many "check validations" are to be performed.
 # default: 1000
diff --git a/src/main/java/com/crunchydata/config/Settings.java b/src/main/java/com/crunchydata/config/Settings.java
index 83a7fd9..6e6423f 100644
--- a/src/main/java/com/crunchydata/config/Settings.java
+++ b/src/main/java/com/crunchydata/config/Settings.java
@@ -123,7 +123,7 @@ public static Properties setDefaults() {
         defaultProps.setProperty("observer-vacuum","true");
         defaultProps.setProperty("stage-table-parallel","0");
         defaultProps.setProperty("standard-number-format","0000000000000000000000.0000000000000000000000");
-        defaultProps.setProperty("batch-offset-size","0");
+        defaultProps.setProperty("batch-start-size","0");
         defaultProps.setProperty("batch-compare-size","2000");
         defaultProps.setProperty("batch-check-size","1000");
 
diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
index 44a7a8e..3bf0dc1 100644
--- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
@@ -90,6 +90,9 @@ public void run() {
         DecimalFormat formatter = new DecimalFormat("#,###");
         int loadRowCount = Integer.parseInt(Props.getProperty("batch-progress-report-size"));
         int observerRowCount = Integer.parseInt(Props.getProperty("observer-throttle-size"));
+
+        int batchCompareSize = Integer.parseInt(Props.getProperty("batch-compare-size"));
+        int batchStartSize = Integer.parseInt(Props.getProperty("batch-start-size"));
         
         // Database resources
         Connection conn = null;
@@ -117,25 +120,27 @@ public void run() {
             }
 
             if (!pkList.isEmpty() && Props.getProperty("database-sort").equals("true")) {
+                if (batchStartSize >= 0) {
+                    sql += " AND " + pkList + ">" + batchStartSize;
+                }
                 sql += " ORDER BY " + pkList;
             }
 
-            String batchCompareSize = Props.getProperty("batch-compare-size");
-            String batchOffsetSize = Props.getProperty("batch-offset-size");
 
-            if (StringUtils.isNotEmpty(batchCompareSize) && StringUtils.isNotEmpty(batchOffsetSize)) {
+            if (batchCompareSize > 0) {
                 String dbType = Props.getProperty(targetType + "-type");
 
                 switch (dbType.toLowerCase()) {
                     case "oracle":
                     case "db2":
                     case "mssql":
-                        sql += " OFFSET " + batchOffsetSize + " ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY";
+                        sql += " OFFSET 0 ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY";
                         break;
                     case "mysql":
+                    case "mariadb":
                     case "postgres":
                     case "snowflake":
-                        sql += " LIMIT " + batchCompareSize + " OFFSET " + batchOffsetSize;
+                        sql += " LIMIT " + batchCompareSize;
                         break;
                     default:
                         sql += " LIMIT " + batchCompareSize;
diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
index 38a9e85..42722b4 100644
--- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
@@ -85,10 +85,10 @@ public static JSONObject checkRows (Connection repoConn, Connection sourceConn,
         PreparedStatement stmt = null;
         ResultSet rs = null;
         String SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT;
-        String batchCheckSize = Props.getProperty("batch-check-size");
+        int batchCheckSize = Integer.parseInt(Props.getProperty("batch-check-size"));
         
         try {
-            if (StringUtils.isNotEmpty(batchCheckSize)) {
+            if (batchCheckSize > 0) {
                 SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT = SQL_REPO_SELECT_OUTOFSYNC_ROWS + " LIMIT " + batchCheckSize;
                 stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT);
             } else {

From 197b937448f2d93280336b4167c35e838d358b38 Mon Sep 17 00:00:00 2001
From: WJX20 <1837862986@qq.com>
Date: Fri, 14 Nov 2025 17:11:47 +0800
Subject: [PATCH 8/8] comment remove

---
 .../com/crunchydata/core/threading/DataComparisonThread.java     | 1 -
 .../com/crunchydata/core/threading/DataValidationThread.java     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
index 3bf0dc1..d003122 100644
--- a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
@@ -29,7 +29,6 @@
 import com.crunchydata.model.DataComparisonTableMap;
 import com.crunchydata.model.DataComparisonResult;
 import com.crunchydata.util.*;
-import org.apache.commons.lang3.StringUtils;
 
 import static com.crunchydata.service.DatabaseConnectionService.getConnection;
 import static com.crunchydata.util.HashingUtils.getMd5;
diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
index 42722b4..25067f3 100644
--- a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
+++ b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
@@ -35,7 +35,6 @@
 import com.crunchydata.util.DataProcessingUtils;
 import com.crunchydata.util.LoggingUtils;
 
-import org.apache.commons.lang3.StringUtils;
 import org.json.JSONArray;
 import org.json.JSONObject;