From 775dfab60f4e6df4e6f5e1a1e2ef71e240230adb Mon Sep 17 00:00:00 2001
From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com>
Date: Tue, 26 Aug 2025 17:59:45 +0300
Subject: [PATCH 1/3] Remove others aggregation from pg_stat_statements metric
 in prometheus

- Removed UNION ALL clause that aggregated queries beyond top 500
- This allows all individual queries to be tracked without artificial grouping
- Improves granularity of pg_stat_statements monitoring data
---
 config/pgwatch-prometheus/metrics.yml | 48 ---------------------------
 1 file changed, 48 deletions(-)

diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml
index 406e236..5d0ffae 100644
--- a/config/pgwatch-prometheus/metrics.yml
+++ b/config/pgwatch-prometheus/metrics.yml
@@ -901,30 +901,6 @@ metrics:
           temp_bytes_read::int8 as temp_bytes_read,
           temp_bytes_written::int8 as temp_bytes_written
           from ranked_statements
-          where rn <= 500
-        union all
-        select
-          'tail_dummy_user' as tag_user,
-          current_database() as tag_datname,
-          -1 as tag_queryid,
-          sum(calls)::int8 as calls,
-          sum(plans_total)::int8 as plans_total,
-          sum(exec_time_total)::int8 as exec_time_total,
-          sum(plan_time_total)::int8 as plan_time_total,
-          sum(rows)::int8 as rows,
-          sum(shared_bytes_hit_total)::int8 as shared_bytes_hit_total,
-          sum(shared_bytes_read_total)::int8 as shared_bytes_read_total,
-          sum(shared_bytes_dirtied_total)::int8 as shared_bytes_dirtied_total,
-          sum(shared_bytes_written_total)::int8 as shared_bytes_written_total,
-          sum(block_read_total)::int8 as block_read_total,
-          sum(block_write_total)::int8 as block_write_total,
-          sum(wal_records)::int8 as wal_records,
-          sum(wal_fpi)::int8 as wal_fpi,
-          sum(wal_bytes)::int8 as wal_bytes,
-          sum(temp_bytes_read)::int8 as temp_bytes_read,
-          sum(temp_bytes_written)::int8 as temp_bytes_written
-        from ranked_statements
-        where rn > 500
       17: |
         WITH ranked_statements as (
           select
@@ -973,30 +949,6 @@ metrics:
           temp_bytes_read::int8 as temp_bytes_read,
           temp_bytes_written::int8 as temp_bytes_written
           from ranked_statements
-          where rn <= 500
-        union all
-        select
-          'tail_dummy_user' as tag_user,
-          current_database() as tag_datname,
-          -1 as tag_queryid,
-          sum(calls)::int8 as calls,
-          sum(plans_total)::int8 as plans_total,
-          sum(exec_time_total)::int8 as exec_time_total,
-          sum(plan_time_total)::int8 as plan_time_total,
-          sum(rows)::int8 as rows,
-          sum(shared_bytes_hit_total)::int8 as shared_bytes_hit_total,
-          sum(shared_bytes_read_total)::int8 as shared_bytes_read_total,
-          sum(shared_bytes_dirtied_total)::int8 as shared_bytes_dirtied_total,
-          sum(shared_bytes_written_total)::int8 as shared_bytes_written_total,
-          sum(block_read_total)::int8 as block_read_total,
-          sum(block_write_total)::int8 as block_write_total,
-          sum(wal_records)::int8 as wal_records,
-          sum(wal_fpi)::int8 as wal_fpi,
-          sum(wal_bytes)::int8 as wal_bytes,
-          sum(temp_bytes_read)::int8 as temp_bytes_read,
-          sum(temp_bytes_written)::int8 as temp_bytes_written
-        from ranked_statements
-        where rn > 500
     gauges:
       - calls
       - plans_total
-- 
GitLab


From 6a9a33d9f1e24a311876536cc53164970324ada7 Mon Sep 17 00:00:00 2001
From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com>
Date: Thu, 28 Aug 2025 16:19:58 +0300
Subject: [PATCH 2/3] Added bgwriter and vacuum graphs

---
 ...Dashboard_1_Node_performance_overview.json | 331 +++++++++++++++++-
 .../Dashboard_7_Autovacuum_and_bloat.json     | 317 ++++++++++++++---
 ...ble_Stats => Dashboard_9_Table_Stats.json} |   0
 3 files changed, 579 insertions(+), 69 deletions(-)
 rename config/grafana/dashboards/{Dashboard_9_Table_Stats => Dashboard_9_Table_Stats.json} (100%)

diff --git a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json
index 600fb55..b5d9ad8 100644
--- a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json
+++ b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json
@@ -18,7 +18,7 @@
   "editable": true,
   "fiscalYearStartMonth": 0,
   "graphTooltip": 1,
-  "id": 2,
+  "id": 8,
   "links": [],
   "panels": [
     {
@@ -1157,7 +1157,38 @@
           },
           "unit": "percent"
         },
-        "overrides": []
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byFrameRefID",
+              "options": "A"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "green",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byFrameRefID",
+              "options": "B"
+            },
+            "properties": [
+              {
+                "id": "color",
+                "value": {
+                  "fixedColor": "red",
+                  "mode": "fixed"
+                }
+              }
+            ]
+          }
+        ]
       },
       "gridPos": {
         "h": 8,
@@ -1185,6 +1216,20 @@
       },
       "pluginVersion": "12.0.2",
       "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "P7A0D6631BB10B34F"
+          },
+          "editorMode": "code",
+          "expr": "sum(irate(pgwatch_db_stats_xact_rollback{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/(sum(irate(pgwatch_db_stats_xact_commit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))+sum(irate(pgwatch_db_stats_xact_rollback{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))) * 100",
+          "hide": false,
+          "instant": false,
+          "interval": "20",
+          "legendFormat": "Rollbacks",
+          "range": true,
+          "refId": "B"
+        },
         {
           "editorMode": "code",
           "expr": "sum(irate(pgwatch_db_stats_xact_commit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/(sum(irate(pgwatch_db_stats_xact_commit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))+sum(irate(pgwatch_db_stats_xact_rollback{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))) * 100",
@@ -1194,7 +1239,7 @@
           "refId": "A"
         }
       ],
-      "title": "Commit ratio",
+      "title": "Commit vs Rollback ratio",
       "type": "timeseries"
     },
     {
@@ -3791,7 +3836,269 @@
           "refId": "A"
         }
       ],
-      "title": "Age of the oldest multi-transaction ID",
+      "title": "Age of the oldest multi-transaction ID that has not been frozen",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "P7A0D6631BB10B34F"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "binBps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 10,
+        "w": 24,
+        "x": 0,
+        "y": 149
+      },
+      "id": 41,
+      "options": {
+        "legend": {
+          "calcs": [
+            "min",
+            "max",
+            "mean"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "rate(pgwatch_bgwriter_buffers_checkpoint{datname=\"$db_name\"}[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "interval": "20",
+          "legendFormat": "Size of buffers cleaned by the checkpointer",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "P7A0D6631BB10B34F"
+          },
+          "editorMode": "code",
+          "expr": "irate(pgwatch_bgwriter_buffers_clean[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}",
+          "hide": false,
+          "instant": false,
+          "interval": "20",
+          "legendFormat": "Size of buffers cleaned by the background writer",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "P7A0D6631BB10B34F"
+          },
+          "editorMode": "code",
+          "expr": "irate(pgwatch_bgwriter_buffers_backend[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}",
+          "hide": false,
+          "instant": false,
+          "interval": "20",
+          "legendFormat": "Size of buffers cleaned directly by a backend.",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "bgwriter and checkpointer",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "P7A0D6631BB10B34F"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "axisSoftMax": 7,
+            "axisSoftMin": 1,
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "points",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "decimals": 0,
+          "mappings": [
+            {
+              "options": {
+                "1": {
+                  "index": 0,
+                  "text": "Initial"
+                },
+                "2": {
+                  "index": 1,
+                  "text": "Scanning heap"
+                },
+                "3": {
+                  "index": 2,
+                  "text": "Vacuuming indexes"
+                },
+                "4": {
+                  "index": 3,
+                  "text": "Vacuuming heap"
+                },
+                "5": {
+                  "index": 4,
+                  "text": "Cleaning up indexes"
+                },
+                "6": {
+                  "index": 5,
+                  "text": "Truncating heap"
+                },
+                "7": {
+                  "index": 6,
+                  "text": "Final cleanup"
+                }
+              },
+              "type": "value"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 24,
+        "x": 0,
+        "y": 159
+      },
+      "id": 42,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "(\n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"7\"}) * 0 + 7 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"6\"}) * 0 + 6 or\n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"5\"}) * 0 + 5 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"4\"}) * 0 + 4 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"3\"}) * 0 + 3 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"2\"}) * 0 + 2 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"1\"}) * 0 + 1\n)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "interval": "",
+          "legendFormat": "{{schema_name}}.{{table_name}} - {{vacuum_mode}}",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "Vacuum timeline",
       "type": "timeseries"
     },
     {
@@ -3803,7 +4110,7 @@
         "h": 3,
         "w": 24,
         "x": 0,
-        "y": 149
+        "y": 170
       },
       "id": 40,
       "options": {
@@ -3827,8 +4134,8 @@
     "list": [
       {
         "current": {
-          "text": "local",
-          "value": "local"
+          "text": "default",
+          "value": "default"
         },
         "definition": "label_values(pgwatch_settings_configured,cluster)",
         "label": "Cluster name",
@@ -3845,8 +4152,8 @@
       },
       {
         "current": {
-          "text": "node-01",
-          "value": "node-01"
+          "text": "prod-db",
+          "value": "prod-db"
         },
         "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)",
         "label": "Node name",
@@ -3882,12 +4189,12 @@
     ]
   },
   "time": {
-    "from": "now-30m",
+    "from": "now-2d",
     "to": "now"
   },
   "timepicker": {},
   "timezone": "utc",
-      "title": "01. Single node performance overview (high-level)",
+  "title": "01. Single node performance overview (high-level)",
   "uid": "f90500a0-a12e-4081-a2f0-07ed96f27915",
-  "version": 2
+  "version": 7
 }
\ No newline at end of file
diff --git a/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json b/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json
index 0f9e858..21b981e 100644
--- a/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json
+++ b/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json
@@ -1,65 +1,268 @@
 {
-    "annotations": {
-      "list": [
-        {
-          "builtIn": 1,
-          "datasource": {
-            "type": "grafana",
-            "uid": "-- Grafana --"
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 6,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "P7A0D6631BB10B34F"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "axisSoftMax": 7,
+            "axisSoftMin": 1,
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "points",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
           },
-          "enable": true,
-          "hide": true,
-          "iconColor": "rgba(0, 211, 255, 1)",
-          "name": "Annotations & Alerts",
-          "type": "dashboard"
+          "decimals": 0,
+          "mappings": [
+            {
+              "options": {
+                "1": {
+                  "index": 0,
+                  "text": "Initial"
+                },
+                "2": {
+                  "index": 1,
+                  "text": "Scanning heap"
+                },
+                "3": {
+                  "index": 2,
+                  "text": "Vacuuming indexes"
+                },
+                "4": {
+                  "index": 3,
+                  "text": "Vacuuming heap"
+                },
+                "5": {
+                  "index": 4,
+                  "text": "Cleaning up indexes"
+                },
+                "6": {
+                  "index": 5,
+                  "text": "Truncating heap"
+                },
+                "7": {
+                  "index": 6,
+                  "text": "Final cleanup"
+                }
+              },
+              "type": "value"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 14,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "12.0.2",
+      "targets": [
+        {
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "(\n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"7\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 7 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"6\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 6 or\n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"5\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 5 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"4\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 4 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"3\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 3 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"2\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 2 or \n    group by (schema_name, table_name, vacuum_mode) (\n        pgwatch_pg_vacuum_progress_index_vacuum_count{phase=\"1\", schema_name=~\"$schema_name\", table_name=~\"$table_name\"}) * 0 + 1\n)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "interval": "",
+          "legendFormat": "{{schema_name}}.{{table_name}} - {{vacuum_mode}}",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
         }
-      ]
-    },
-    "editable": true,
-    "fiscalYearStartMonth": 0,
-    "graphTooltip": 0,
-    "id": 9,
-    "links": [],
-    "panels": [
+      ],
+      "title": "Vacuum timeline",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 41,
+  "tags": [],
+  "templating": {
+    "list": [
       {
-        "fieldConfig": {
-          "defaults": {},
-          "overrides": []
+        "current": {
+          "text": "default",
+          "value": "default"
         },
-        "gridPos": {
-          "h": 8,
-          "w": 12,
-          "x": 0,
-          "y": 0
+        "definition": "label_values(pgwatch_settings_configured,cluster)",
+        "label": "Cluster name",
+        "name": "cluster_name",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(pgwatch_settings_configured,cluster)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
         },
-        "id": 1,
-        "options": {
-          "code": {
-            "language": "plaintext",
-            "showLineNumbers": false,
-            "showMiniMap": false
-          },
-          "content": "# Coming soon...",
-          "mode": "markdown"
+        "refresh": 1,
+        "regex": "",
+        "type": "query"
+      },
+      {
+        "current": {
+          "text": "prod-db",
+          "value": "prod-db"
+        },
+        "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)",
+        "label": "Node name",
+        "name": "node_name",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "type": "query"
+      },
+      {
+        "current": {
+          "text": "postgres",
+          "value": "postgres"
+        },
+        "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)",
+        "label": "DB name",
+        "name": "db_name",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "type": "query"
+      },
+      {
+        "allValue": ".+",
+        "current": {
+          "text": "public",
+          "value": "public"
+        },
+        "definition": "label_values(pgwatch_table_stats_tx_freeze_age,schema)",
+        "includeAll": true,
+        "name": "schema_name",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(pgwatch_table_stats_tx_freeze_age,schema)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "type": "query"
+      },
+      {
+        "allValue": ".+",
+        "current": {
+          "text": "All",
+          "value": [
+            "$__all"
+          ]
+        },
+        "definition": "label_values(pgwatch_table_stats_tx_freeze_age{schema=~\"$schema_name\"},table_name)",
+        "includeAll": true,
+        "multi": true,
+        "name": "table_name",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(pgwatch_table_stats_tx_freeze_age{schema=~\"$schema_name\"},table_name)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
         },
-        "pluginVersion": "12.0.2",
-        "title": "",
-        "type": "text"
+        "refresh": 1,
+        "regex": "",
+        "type": "query"
       }
-    ],
-    "preload": false,
-    "schemaVersion": 41,
-    "tags": [],
-    "templating": {
-      "list": []
-    },
-    "time": {
-      "from": "now-6h",
-      "to": "now"
-    },
-    "timepicker": {},
-    "timezone": "browser",
-    "title": "07. Autovacuum and bloat  -- \"Metrics are collected (part of health check); dashboard – TODO\"",
-    "uid": "caffad19-4605-41fe-87f7-484ab67200e8",
-    "version": 1
-  }
\ No newline at end of file
+    ]
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "07. Autovacuum and bloat  -- \"Metrics are collected (part of health check); dashboard – TODO\"",
+  "uid": "caffad19-4605-41fe-87f7-484ab67200e8",
+  "version": 5
+}
\ No newline at end of file
diff --git a/config/grafana/dashboards/Dashboard_9_Table_Stats b/config/grafana/dashboards/Dashboard_9_Table_Stats.json
similarity index 100%
rename from config/grafana/dashboards/Dashboard_9_Table_Stats
rename to config/grafana/dashboards/Dashboard_9_Table_Stats.json
-- 
GitLab


From 842f289d989f296caceb34ea77c7fb0f7286cc6a Mon Sep 17 00:00:00 2001
From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com>
Date: Tue, 2 Sep 2025 16:53:23 +0300
Subject: [PATCH 3/3] docs: add PostgresAI monitoring reference documentation

Add comprehensive monitoring reference guide that documents:
- PostgresAI monitoring architecture and components
- Detailed dashboard descriptions and key metrics
- Complete graph inventory across all 9 dashboards
- Updated to follow PostgresAI documentation standards:
  * Sentence-style capitalization throughout
  * Consistent terminology (Postgres vs PostgreSQL)
  * Professional formatting and structure
---
 MONITORING_REFERENCE.md | 206 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 MONITORING_REFERENCE.md

diff --git a/MONITORING_REFERENCE.md b/MONITORING_REFERENCE.md
new file mode 100644
index 0000000..65148da
--- /dev/null
+++ b/MONITORING_REFERENCE.md
@@ -0,0 +1,206 @@
+# PostgresAI monitoring reference documentation
+
+## Overview
+
+PostgresAI monitoring is a comprehensive Postgres database monitoring solution built on pgwatch, Grafana, and Prometheus. This system provides real-time insights into Postgres database performance, health, and operations through a set of specialized dashboards.
+
+## Architecture
+
+The monitoring stack consists of:
+- **pgwatch**: Postgres monitoring agent that collects metrics
+- **Grafana**: Visualization and dashboard platform
+- **Flask Backend**: Additional API services for enhanced functionality
+- **prometheus and Postgres**: Storage for metrics and query texts
+
+## Dashboard Reference
+
+### Dashboard 1: Node Performance Overview
+**Purpose**: High-level overview of Postgres database performance and health
+
+**Key Metrics**:
+- **Active session history**: Database wait events by type (CPU, locks, I/O)
+- **Sessions**: Connection states (Active, Idle, Idle-in-transaction, Waiting)
+- **Transactions**: Commit vs rollback ratios and rates
+- **Query performance**: Calls, execution time, and latency metrics
+- **Buffer cache**: Hit ratios and I/O patterns
+- **WAL activity**: Write-ahead log generation and archiving
+
+### Dashboard 2: Aggregated Query Analysis
+**Purpose**: Identify top-performing and problematic queries across the database
+
+**Key Metrics**:
+- **Detailed table view**: Table of stats for each query from pg_stat_statements
+- **Top queries by calls**: Most frequently executed queries
+- **Top queries by execution time**: Queries consuming most total time
+- **Top queries by latency**: Slowest individual query executions
+- **I/O analysis**: Queries with highest disk read/write activity
+- **Buffer usage**: Queries with best/worst cache efficiency
+- **Temp file usage**: Queries spilling to disk for sorting/hashing
+- **WAL generation**: Queries generating most write-ahead log data
+
+
+### Dashboard 3: Single Query Analysis
+**Purpose**: Deep-dive analysis of individual queries by query ID
+
+**Key Metrics**:
+- **Execution Timeline**: Calls and execution time over time
+- **Wait Events**: Specific wait types for this query
+- **Resource Usage**: Buffer hits, disk I/O, WAL generation
+- **Performance Metrics**: Latency, rows returned, temp file usage
+- **Per-Call Analysis**: Average metrics per query execution
+
+
+### Dashboard 4: Wait sampling dashboard
+**Purpose**: Detailed analysis of database wait events and blocking
+
+**Key Metrics**:
+- **Active session history**: All wait events including background processes
+- **Active session history by event type**: Detailed categorization by event type
+- **Active session history by event type and event**: Wait events correlated with specific queries
+
+### Dashboard 5: Backup stats
+**Purpose**: Monitor backup and recovery processes
+
+**Key Metrics**:
+- **Archive success and errors**: Rate of successful WAL archives versus failed archive attempts
+- **Archive lag**: Amount of WAL data in bytes that has been generated but not yet archived
+- **WAL archive success rate**: Percentage of successful WAL archive operations
+
+### Dashboard 7: Autovacuum and bloat
+**Purpose**: Monitor Postgres maintenance processes and table health
+
+**Key Metrics**:
+- **Vacuum Timeline**: Autovacuum progress through different phases
+
+
+### Dashboard 8: Index health
+**Purpose**: Monitor index performance and maintenance needs
+
+**Key Metrics**:
+- **Index Bloat**
+- **Index Size**
+
+
+### Dashboard 9: Table stats
+**Purpose**: Monitor table-level operations and data patterns
+
+**Key Metrics**:
+- **CRUD operations**: Insert, update, delete rates by table
+
+
+## Complete Graph Inventory
+
+### Dashboard 1: Node Performance Overview (36 graphs)
+1. **Active session history** - Shows database wait events by type (CPU, locks, I/O) to identify performance bottlenecks
+2. **Host stats** - Displays system-level metrics like CPU, memory, and disk usage
+3. **Postgres stats** - Core Postgres instance metrics and version information
+4. **Sessions** - Connection states (Active, Idle, Idle-in-transaction, Waiting) with max_connections limit
+5. **Non-idle sessions** - Active database connections excluding idle ones for workload monitoring
+6. **Calls (pg_stat_statements)** - Total SQL statement executions per second across all queries
+7. **Transactions** - Transaction commit vs rollback rates and overall transaction activity
+9. **Commit vs rollback ratio** - Ratio of successful vs failed transactions indicating application health
+10. **Statements total time (pg_stat_statements)** - Total execution time per second for all SQL statements
+11. **Statements time per call (pg_stat_statements) aka latency** - Average execution time per query call (key latency metric)
+12. **Total rows (pg_stat_statements)** - Total rows returned per second across all queries
+13. **Rows per call (pg_stat_statements)** - Average rows returned per query execution
+14. **blk_read_time vs blk_write_time (s/s) (pg_stat_statements)** - Time spent reading/writing disk blocks per second
+15. **blk_read_time vs blk_write_time per call (pg_stat_statements)** - Average disk I/O time per query execution
+16. **shared_blks_hit (bytes) (pg_stat_statements)** - Data read from shared buffer cache (good performance indicator)
+17. **shared_blks_hit (bytes) per call (pg_stat_statements)** - Average cache hits per query execution
+18. **shared_blks_read (bytes) (pg_stat_statements)** - Data read from disk (cache misses - expensive operations)
+19. **shared_blks_read (bytes) per call (pg_stat_statements)** - Average disk reads per query execution
+20. **shared_blks_written (bytes) (pg_stat_statements)** - Data written from buffers to disk per second
+21. **shared_blks_written (bytes) per call (pg_stat_statements)** - Average buffer writes per query execution
+22. **shared_blks_dirtied (bytes) (pg_stat_statements)** - Buffer blocks modified (dirtied) per second
+23. **shared_blks_dirtied (bytes) per call (pg_stat_statements)** - Average buffer modifications per query
+24. **shared_blks_read_ratio (pg_stat_statements)** - Cache miss ratio (< 10-20% indicates good cache efficiency)
+25. **WAL bytes (pg_current_wal_lsn)** - Write-ahead log generation rate (affects replication and recovery)
+26. **WAL bytes per call (pg_current_wal_lsn)** - Average WAL generation per query execution
+27. **WAL fpi (pg_stat_statements)** - WAL full page images generated per second
+28. **WAL fpi per call (pg_current_wal_lsn)** - Average full page images per query execution
+29. **temp_bytes_read vs temp_bytes_written (pg_stat_statements)** - Temporary file I/O operations
+30. **temp_bytes_read vs temp_bytes_written per call (pg_stat_statements)** - Average temp file usage per query
+31. **Locks by mode** - Active locks by type (AccessShareLock, RowExclusiveLock, etc.)
+32. **Longest non-idle transaction age, > 1 min** - Age of oldest active transaction (>1min threshold)
+33. **Age of the oldest transaction ID that has not been frozen** - Transaction ID age (watch for wraparound issues)
+34. **Age of the oldest multi-transaction ID that has not been frozen** - Multi-transaction ID age monitoring
+35. **bgwriter and checkpointer** - Background writer vs checkpointer activity comparison
+36. **Vacuum timeline** - VACUUM operation progress through different phases
+
+### Dashboard 2: Aggregated Query Analysis (25 graphs)
+1. **Detailed table view (pg_stat_statements)** - Tabular view of query performance metrics with sorting and filtering
+2. **Top $top_n queries analysis (pg_stat_statements)** - Overview of most significant queries by multiple metrics
+3. **Top $top_n statements by calls (pg_stat_statements)** - Most frequently executed queries (call frequency)
+4. **Top $top_n statements by execution time (pg_stat_statements)** - Queries consuming most total execution time
+5. **Top $top_n statements by execution time per call (pg_stat_statements)** - Slowest individual query executions
+6. **Top $top_n statements by planning time (pg_stat_statements)** - Queries with highest total query planning time
+7. **Top $top_n statements by planning time per call (pg_stat_statements)** - Queries with slowest planning per execution
+8. **Top $top_n statements by rows (pg_stat_statements)** - Queries returning most total rows
+9. **Top $top_n statements by rows per call (pg_stat_statements)** - Queries with highest average rows per execution
+10. **Top $top_n statements by shared_blks_hit (in bytes) (pg_stat_statements)** - Queries with best cache efficiency (most hits)
+11. **Top $top_n statements by shared_blks_hit (in bytes) per call (pg_stat_statements)** - Best average cache hits per query
+12. **Top $top_n statements by shared_blks_read (in bytes) (pg_stat_statements)** - Queries causing most disk reads (worst cache performance)
+13. **Top $top_n statements by shared_blks_read (in bytes) per call (pg_stat_statements)** - Highest average disk reads per query
+14. **Top $top_n statements by shared_blks_written (in bytes) (pg_stat_statements)** - Queries writing most data to buffers
+15. **Top $top_n statements by shared_blks_written (in bytes) per call (pg_stat_statements)** - Highest average buffer writes per query
+16. **Top $top_n statements by shared_blks_dirtied (in bytes) per call (pg_stat_statements)** - Queries modifying most buffer data
+17. **Top $top_n statements by WAL bytes (pg_stat_statements)** - Queries generating most write-ahead log data
+18. **Top $top_n statements by WAL bytes per call (pg_stat_statements)** - Highest average WAL generation per query
+19. **Top $top_n statements by WAL fpi (pg_stat_statements)** - Queries generating most WAL full page images
+20. **Top $top_n statements by WAL fpi per call (pg_stat_statements)** - Highest average FPI generation per query
+21. **Top $top_n statements by temp bytes read (pg_stat_statements)** - Queries reading most from temporary files
+22. **Top $top_n statements by temp bytes read per call (pg_stat_statements)** - Highest average temp file reads per query
+23. **Top $top_n statements by temp bytes written (pg_stat_statements)** - Queries writing most to temporary files
+24. **Top $top_n statements by temp bytes written per call (pg_stat_statements)** - Highest average temp file writes per query
+25. **Query Analysis panels (multiple instances)** - Drill-down analysis panels for individual queries
+
+### Dashboard 3: Single Query Analysis (17 graphs)
+1. **Active session history** - Wait events specifically for the selected query ID
+2. **Calls (pg_stat_statements)** - Execution frequency of the specific query over time
+3. **Execution time (pg_stat_statements)** - Total execution time for the specific query per second
+4. **Execution time per call (pg_stat_statements)** - Average execution time per call for the specific query
+5. **Rows (pg_stat_statements)** - Total rows returned by the specific query per second
+6. **Rows per call (pg_stat_statements)** - Average rows returned per execution of the specific query
+7. **shared_blks_hit (in bytes) (pg_stat_statements)** - Cache efficiency for the specific query (bytes from memory)
+8. **shared_blks_hit (in bytes) per call (pg_stat_statements)** - Average cache hits per execution of the specific query
+9. **WAL bytes (pg_stat_statements)** - WAL generation rate for the specific query
+10. **WAL bytes per call (pg_stat_statements)** - Average WAL generation per execution of the specific query
+11. **WAL fpi (in bytes) (pg_stat_statements)** - Full page images generated by the specific query
+12. **WAL fpi per call (pg_stat_statements)** - Average FPI generation per execution of the specific query
+13. **Temp bytes read (pg_stat_statements)** - Temporary file reads for the specific query
+14. **Temp bytes read per call (pg_stat_statements)** - Average temp file reads per execution of the specific query
+15. **Temp bytes written (pg_stat_statements)** - Temporary file writes for the specific query
+16. **Temp bytes written per call (pg_stat_statements)** - Average temp file writes per execution of the specific query
+17. **Query Analysis panels (multiple instances)** - Detailed analysis panels for the selected query
+
+### Dashboard 4: Wait sampling dashboard (4 graphs)
+1. **Active session history** - Comprehensive view of all database wait events including background processes
+2. **Active session history by event type** - Wait events grouped by category (CPU, I/O, locks, etc.)
+3. **Active session history by event type and event** - Detailed breakdown with specific event names and query IDs
+4. **Query Analysis** - Drill-down analysis for queries associated with wait events
+
+### Dashboard 5: Backup stats (3 graphs)
+1. **Archive success and errors** - Rate of successful vs failed WAL archive operations
+2. **WAL archive success rate** - Percentage of successful archive operations (should be 100%)
+3. **Archive lag** - Amount of WAL data waiting to be archived (data loss window)
+
+### Dashboard 7: Autovacuum and bloat (1 graph)
+1. **Vacuum timeline** - Progress of VACUUM operations through phases (scanning, vacuuming, cleaning, etc.)
+
+### Dashboard 8: Index health (6 graphs)
+1. **Detailed index view** - Tabular view of all indexes with bloat, size, and usage statistics
+2. **Top $top_n index analysis** - Overview of most problematic indexes by various metrics
+3. **Top $top_n indexes by estimated bloat %** - Indexes with highest percentage of wasted space
+4. **Top $top_n indexes by estimated bloat size** - Indexes with largest absolute amount of wasted space
+5. **Top $top_n indexes by size** - Largest indexes by total size (memory and disk impact)
+6. **Query Analysis panels (multiple instances)** - Detailed analysis for index-related queries
+
+### Dashboard 9: Table stats (7 graphs)
+1. **Tuple operations** - Total CRUD operations (insert, update, delete, hot update) across all tables
+2. **Tuple operations (%)** - Percentage breakdown of different operation types
+3. **Number of inserted tuples by table** - Insert rates for individual tables over time
+4. **Number of updated tuples by table** - Update rates for individual tables (watch for bloat impact)
+5. **Number of hot updated tuples by table** - HOT updates by table (efficient updates avoiding index updates)
+6. **Number of deleted tuples by table** - Delete rates by table (triggers vacuum operations)
+7. **Table details panels (multiple instances)** - Detailed statistics and metrics for individual tables
+
-- 
GitLab