Materialized CTEs: Mistakes
Module: Subqueries & CTEs
-- Assuming CTE always materializes (PostgreSQL 12+)
WITH expensive_agg AS (
SELECT department, AVG(salary) as avg_sal
FROM employees -- 1M rows
GROUP BY department
)
SELECT * FROM expensive_agg WHERE avg_sal > 70000
UNION ALL
SELECT * FROM expensive_agg WHERE avg_sal <= 70000;
-- PostgreSQL 12+ inlines this by default!
-- Aggregation runs TWICE (scans 1M rows twice)
-- Explicitly materialize when CTE used multiple times
WITH expensive_agg AS MATERIALIZED (
SELECT department, AVG(salary) as avg_sal
FROM employees -- 1M rows
GROUP BY department
)
SELECT * FROM expensive_agg WHERE avg_sal > 70000
UNION ALL
SELECT * FROM expensive_agg WHERE avg_sal <= 70000;
-- MATERIALIZED forces single execution
-- Aggregation runs ONCE (scans 1M rows once)
PostgreSQL 12+ changed default behavior from MATERIALIZED to NOT MATERIALIZED (inlining). This is great for simple CTEs but terrible for expensive CTEs referenced multiple times. Without explicit MATERIALIZED hint, the aggregation runs twice. Always check execution plan and add MATERIALIZED when CTE is reused.
Use EXPLAIN ANALYZE to check if CTE executes multiple times. Add MATERIALIZED if so.
High
Query takes 2x longer than expected. Execution plan shows CTE executing multiple times.
-- Materializing single-use CTE (adds overhead)
WITH recent_orders AS MATERIALIZED (
SELECT * FROM orders
WHERE order_date >= CURRENT_DATE - INTERVAL '7 days'
)
SELECT customer_id, COUNT(*) as order_count
FROM recent_orders -- Used only once!
GROUP BY customer_id;
-- Materialization overhead: 3.2s
-- Don't materialize single-use CTEs
WITH recent_orders AS NOT MATERIALIZED (
SELECT * FROM orders
WHERE order_date >= CURRENT_DATE - INTERVAL '7 days'
)