Postgres

Last updated at 11/26/2024

Views and Materialized Views

Cache an expensive query with a materialized view with historical data (except for the latest two days), then union it with the live data. This way, we get the best of both worlds: fast query times and up-to-date data.

-- create the materialized view
CREATE MATERIALIZED VIEW historical_data AS
SELECT id, avg(duration) as avg_duration
FROM logs
WHERE timestamp < NOW() - INTERVAL '2 days';

-- combine hisotical data with live data
SELECT id, avg(duration) as avg_duration
FROM logs
WHERE timestamp >= NOW() - INTERVAL '2 days'
UNION ALL
SELECT id, avg_duration
FROM historical_data;

-- refresh the materialized view every some time
REFRESH MATERIALIZED VIEW historical_data;

Working with Data Duplication

Pick the latest record per group with DISTINCT ON and ORDER BY

-- get the highest score for each student and the associated subject
SELECT DISTINCT ON (name) name, subject, score
FROM student_scores
ORDER BY name, score DESC;

-- get the latest request_duration for each URL
SELECT DISTINCT ON (url) url, request_duration
FROM logs
ORDER BY url, timestamp DESC

Add a is_duplicate flag with window functions

CREATE VIEW table_with_duplicates AS (
  SELECT id,
    ROW_NUMBER() OVER (
      PARTITION BY column1, column2, ...
      ORDER BY created_at DESC
    ) > 1 AS is_duplicate
  from table_name
)

-- then delete the duplicates
DELETE FROM table_with_duplicates
WHERE id in (
  SELECT id
  FROM table_with_duplicates
  WHERE is_duplicate
)