CrunchyData · WJX20 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/README.md b/README.md
@@ -332,6 +332,26 @@ Properties are categorized into four sections: system, repository, source, and t
 
   Default: 0000000000000000000000.0000000000000000000000
 
+#### batch-offset-size
+
+  This configuration indicates that the first n data entries will be skipped, and the hash values will be generated starting from the (n + 1)th data entry for comparison. 
+
+  Default: 0
+
+#### batch-compare-size
+
+  This configuration indicates how many Hash values will be generated. 
+
+  Default: 2000
+
+"batch-offset-size" & "batch-compare-size": These two configurations are used to paginate the data for querying when generating "hash comparison". For instance, only compare the data ranging from 1001 to 2000 or from 5001 to 10000.
+
+#### batch-check-size
+
+  This configuration indicates how many "check validations" are to be performed. 
+
+  Default: 1000
+
 ### Repository
 
 #### repo-dbname

diff --git a/pgcompare.properties.sample b/pgcompare.properties.sample
@@ -58,6 +58,21 @@ log-level = INFO
 # default: true
 database-sort = true
 
+# This configuration indicates that the hash value will be generated starting from the (n + 1)th data item for comparison..
+# default: 0
+batch-start-size = 0
+
+# This configuration indicates how many "hash values" will be generated.
+# default: 2000
+batch-compare-size = 2000
+
+#"batch-start-size" & "batch-compare-size": These two configurations are used for conducting sample queries on the data, so as to perform the "hash comparison" when generating it. For instance, only the data ranging from 1001 to 2000 or from 5001 to 10000 can be compared.
+
+# This configuration indicates how many "check validations" are to be performed.
+# default: 1000
+batch-check-size = 1000
+
+
 ##################################
 # repository
 ##################################

diff --git a/src/main/java/com/crunchydata/config/Settings.java b/src/main/java/com/crunchydata/config/Settings.java
@@ -123,6 +123,9 @@ public static Properties setDefaults() {
         defaultProps.setProperty("observer-vacuum","true");
         defaultProps.setProperty("stage-table-parallel","0");
         defaultProps.setProperty("standard-number-format","0000000000000000000000.0000000000000000000000");
+        defaultProps.setProperty("batch-start-size","0");
+        defaultProps.setProperty("batch-compare-size","2000");
+        defaultProps.setProperty("batch-check-size","1000");
 
 
         // Repository

diff --git a/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java b/src/main/java/com/crunchydata/core/threading/DataComparisonThread.java
@@ -89,6 +89,9 @@ public void run() {
         DecimalFormat formatter = new DecimalFormat("#,###");
         int loadRowCount = Integer.parseInt(Props.getProperty("batch-progress-report-size"));
         int observerRowCount = Integer.parseInt(Props.getProperty("observer-throttle-size"));
+
+        int batchCompareSize = Integer.parseInt(Props.getProperty("batch-compare-size"));
+        int batchStartSize = Integer.parseInt(Props.getProperty("batch-start-size"));
 
         // Database resources
         Connection conn = null;
@@ -116,9 +119,33 @@ public void run() {
             }
 
             if (!pkList.isEmpty() && Props.getProperty("database-sort").equals("true")) {
+                if (batchStartSize >= 0) {
+                    sql += " AND " + pkList + ">" + batchStartSize;
+                }
                 sql += " ORDER BY " + pkList;
             }
 
+
+            if (batchCompareSize > 0) {
+                String dbType = Props.getProperty(targetType + "-type");
+
+                switch (dbType.toLowerCase()) {
+                    case "oracle":
+                    case "db2":
+                    case "mssql":
+                        sql += " OFFSET 0 ROWS FETCH NEXT " + batchCompareSize + " ROWS ONLY";
+                        break;
+                    case "mysql":
+                    case "mariadb":
+                    case "postgres":
+                    case "snowflake":
+                        sql += " LIMIT " + batchCompareSize;
+                        break;
+                    default:
+                        sql += " LIMIT " + batchCompareSize;
+                }
+            }
+
             //conn.setAutoCommit(false);
             stmt = conn.prepareStatement(sql);
             stmt.setFetchSize(fetchSize);

diff --git a/src/main/java/com/crunchydata/core/threading/DataValidationThread.java b/src/main/java/com/crunchydata/core/threading/DataValidationThread.java
@@ -83,9 +83,16 @@ public static JSONObject checkRows (Connection repoConn, Connection sourceConn,
 
         PreparedStatement stmt = null;
         ResultSet rs = null;
+        String SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT;
+        int batchCheckSize = Integer.parseInt(Props.getProperty("batch-check-size"));
 
         try {
-            stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS);
+            if (batchCheckSize > 0) {
+                SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT = SQL_REPO_SELECT_OUTOFSYNC_ROWS + " LIMIT " + batchCheckSize;
+                stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS_LIMIT);
+            } else {
+                stmt = repoConn.prepareStatement(SQL_REPO_SELECT_OUTOFSYNC_ROWS);
+            }
             stmt.setObject(1, dct.getTid());
             stmt.setObject(2, dct.getTid());
             rs = stmt.executeQuery();