From 7840c77f02f0318484e2f093bffb58849f8ed250 Mon Sep 17 00:00:00 2001 From: capgar Date: Wed, 25 Feb 2026 18:45:00 -0500 Subject: [PATCH] Added explanations for multiple Useful Queries pages, fixed some non-functional queries --- ...nity-kb-database-size-table-column-size.md | 33 +++++++++++- .../compare_query_log_for_2_intervals.md | 5 +- .../ingestion-rate-part_log.md | 10 +++- .../table-meta-in-zookeeper.md | 54 +++++++++++-------- 4 files changed, 74 insertions(+), 28 deletions(-) diff --git a/content/en/altinity-kb-useful-queries/altinity-kb-database-size-table-column-size.md b/content/en/altinity-kb-useful-queries/altinity-kb-database-size-table-column-size.md index fdfc18aae3..d513282c1f 100644 --- a/content/en/altinity-kb-useful-queries/altinity-kb-database-size-table-column-size.md +++ b/content/en/altinity-kb-useful-queries/altinity-kb-database-size-table-column-size.md @@ -11,6 +11,8 @@ keywords: ### Table size +> Returns table size, compression rates, and row and part counts, by table + ```sql SELECT database, @@ -30,6 +32,8 @@ ORDER BY size DESC; ### Table size + inner MatView (Atomic) +> As above, but resolves Materialized View inner table names (for Materialized Views created using implicit inner table) + ```sql SELECT p.database, @@ -51,6 +55,8 @@ ORDER BY size DESC; ### Column size +> Returns size, compression rate, row counts, and average row size for each column (by db and table) + ```sql SELECT database, @@ -74,6 +80,8 @@ ORDER BY size DESC; ### Projection size +> Returns size, compression rate, row counts, and average row size for each projection ("name"), by db and table + ```sql SELECT database, @@ -95,6 +103,8 @@ ORDER BY size DESC; ### Projection column size +> Returns size, compression rate, row counts, and average row size for each projection ("name"), by db and table, and column + ```sql SELECT database, @@ -114,6 +124,8 @@ ORDER BY size DESC; ## Understanding the columns data properties: +> For each column in a table, unique value counts, min/max, and top 5 most frequent values + ```sql SELECT count(), @@ -130,6 +142,13 @@ FORMAT Vertical; ## Understanding the ingest pattern: +> For parts which are recently created and are unmerged, returns row, size, and count information by db and table. + +- High count, low rows: lots of small parts +- High countif(NOT active) relative to count(): merges are keeping up +- Low countIf(NOT active) relative to count(): merges may be falling behind +- uniqExact(partition): how many partitions are being written to + ```sql SELECT database, @@ -154,6 +173,8 @@ ORDER BY count() DESC ## part_log +> For the past day, returns per-second part lifecycle metrics over 30 minute buckets + ```sql WITH 30 * 60 AS frame_size SELECT @@ -176,7 +197,11 @@ ORDER BY database ASC, table ASC, m ASC - +``` + +> For the past day, returns per-second insert throughput metrics, by db and table, over 30 minute buckets + +```sql WITH 30 * 60 AS frame_size SELECT toStartOfInterval(event_time, toIntervalSecond(frame_size)) AS m, @@ -200,6 +225,8 @@ ORDER BY ## Understanding the partitioning +> Partition distribution analysis, aggregating system.parts metrics by partition. The quantiles results can indicate whether there is skewed distribution of data between partitions. + ```sql SELECT database, @@ -235,6 +262,8 @@ FORMAT Vertical ## Subcolumns sizes +Returns column-level storage metricsk, including subcolumns (JSON, tuples, maps, etc - if present) + ```sql WITH if( @@ -259,7 +288,7 @@ SELECT sum(rows) AS rows_cnt, round(usize / rows_cnt, 2) AS avg_row_size FROM system.parts_columns -WHERE (active = 1) AND (database LIKE '%') AND (`table` LIKE '%) +WHERE (active = 1) AND (database LIKE '%') AND (`table` LIKE '%') GROUP BY table_, colunm_, diff --git a/content/en/altinity-kb-useful-queries/compare_query_log_for_2_intervals.md b/content/en/altinity-kb-useful-queries/compare_query_log_for_2_intervals.md index fe537c77a9..e80fd41082 100644 --- a/content/en/altinity-kb-useful-queries/compare_query_log_for_2_intervals.md +++ b/content/en/altinity-kb-useful-queries/compare_query_log_for_2_intervals.md @@ -3,8 +3,9 @@ title: "Compare query_log for 2 intervals" linkTitle: "Compare query_log for 2 intervals" weight: 100 description: >- + Compare query performance across different time periods --- - +> Looks at unique query shapes (by normalized_query_hash) which occurred within two different time intervals ("before" and "after"), and returns performance metrics for each query pattern which performed worse in the "after" interval. ``` WITH toStartOfInterval(event_time, INTERVAL 5 MINUTE) = '2023-06-30 13:00:00' as before, @@ -67,7 +68,7 @@ LIMIT 10 FORMAT Vertical ``` - +> Looks at the system.query_log in a window (in this case, 3 days) prior to and following a specified timestamp of interest. Returns performance metrics for each query pattern which performed worse after that timestamp. ``` WITH toDateTime('2024-02-09 00:00:00') as timestamp_of_issue, diff --git a/content/en/altinity-kb-useful-queries/ingestion-rate-part_log.md b/content/en/altinity-kb-useful-queries/ingestion-rate-part_log.md index 92d1a7ae6c..4a6688a9bb 100644 --- a/content/en/altinity-kb-useful-queries/ingestion-rate-part_log.md +++ b/content/en/altinity-kb-useful-queries/ingestion-rate-part_log.md @@ -7,6 +7,9 @@ description: >- --- ## Insert rate + +> Returns aggregated insert metrics, per table, for the current day (by default), including parts per insert, rows/bytes per insert, and rows/bytes per part. + ```sql select database, table, time_bucket, max(number_of_parts_per_insert) max_parts_pi, @@ -55,6 +58,9 @@ ORDER BY time_bucket, database, table ASC ``` ## New parts per partition + +> Returns new part counts and average rows per table for the current day (by default) + ```sql select database, table, event_type, partition_id, count() c, round(avg(rows)) from system.part_log where event_date >= today() and event_type = 'NewPart' @@ -64,7 +70,9 @@ order by c desc ## Too fast inserts -It should not be more often than 1 new part per table per second (60 inserts per minute) +> Returns new part counts and average rows by minute by table + +Should not be more often than 1 new part per table per second (60 inserts per minute) One insert can create several parts because of partitioning and materialized views attached. ```sql diff --git a/content/en/altinity-kb-useful-queries/table-meta-in-zookeeper.md b/content/en/altinity-kb-useful-queries/table-meta-in-zookeeper.md index ddcfd873a2..819b252ae1 100644 --- a/content/en/altinity-kb-useful-queries/table-meta-in-zookeeper.md +++ b/content/en/altinity-kb-useful-queries/table-meta-in-zookeeper.md @@ -8,36 +8,44 @@ description: >- ## Compare table metadata of different replicas in zookeeper -> Metadata on replica is not up to date with common metadata in Zookeeper +> Check if a table is consistent across all zookeeper replicas. From each replica, returns metdadata, columns, and is_active nodes. Checks whether each replica's value matches the previous replica's value, and flags any mismatches (looks_good = 0). ```sql -SELECT *, if( neighbor(name, -1) == name and name != 'is_active', neighbor(value, -1) == value , 1) as looks_good -FROM ( SELECT - name, - path, - ctime, - mtime, - value -FROM system.zookeeper -WHERE (path IN ( - SELECT arrayJoin(groupUniqArray(if(path LIKE '%/replicas', concat(path, '/', name), path))) + *, + if( + prev_name = name AND name != 'is_active', + prev_value = value, + 1 + ) AS looks_good +FROM ( + SELECT + name, + path, + ctime, + mtime, + value, + lagInFrame(name) OVER w AS prev_name, + lagInFrame(value) OVER w AS prev_value FROM system.zookeeper - WHERE path IN ( - SELECT arrayJoin([zookeeper_path, concat(zookeeper_path, '/replicas')]) - FROM system.replicas - WHERE table = 'test_repl' - ) -)) AND (name IN ('metadata', 'columns', 'is_active')) -ORDER BY - name = 'is_active', - name ASC, - path ASC + WHERE (path IN ( + SELECT arrayJoin(groupUniqArray(if(path LIKE '%/replicas', concat(path, '/', name), path))) + FROM system.zookeeper + WHERE path IN ( + SELECT arrayJoin([zookeeper_path, concat(zookeeper_path, '/replicas')]) + FROM system.replicas + WHERE table = 'test_repl' + ) + )) AND (name IN ('metadata', 'columns', 'is_active')) + WINDOW w AS (ORDER BY name = 'is_active', name ASC, path ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ) ``` -vs. +> Returns a table's create_table_query, and the last time the table's metadata was modified ```sql -SELECT metadata_modification_time, create_table_query FROM system.tables WHERE name = 'test_repl' +SELECT metadata_modification_time, create_table_query +FROM system.tables +WHERE name = 'test_repl' ```