Handy PostgreSQL Snippets
A collection of PostgreSQL snippets we have found handy!
Generate lowercase table names and column alter statements
With PostgreSQL if you create a table with quotes, it becomes case sensitive. This will rename them so they are lowered.
Change table names
select
'ALTER TABLE ' || psat.schemaname || '."' || psat.relname ||'" RENAME TO "' || lower(psat.relname) ||'";'
from
pg_catalog.pg_stat_all_tables psat
where
schemaname = 'public'
and psat.relname <> lower(psat.relname)
Change column names
select
'ALTER TABLE ' || psat.schemaname || '."' || psat.relname ||'" RENAME COLUMN "' || pa.attname || '" TO "' || lower(pa.attname) ||'";'
from
pg_catalog.pg_stat_all_tables psat,
pg_catalog.pg_attribute pa
where
psat.relid = pa.attrelid
and schemaname = 'public'
and pa.attname <> lower(pa.attname)
Close all connections to a database
This is handy if you are developing locally, or need to restore a production backup, but you can't find everything accessing the application. Run the query with the delete right after it if the apps auto reconnect.
SELECT
pg_terminate_backend(pg_stat_activity.pid)
FROM
pg_stat_activity
WHERE
pg_stat_activity.datname = '<db_name>'
AND pid <> pg_backend_pid();
If you are trying to recret a database, this works nicely too
#!/bin/bash
# usage: ./recreate_database <dbname>
psql -h <host> postgres postgres -c "UPDATE pg_database SET datallowconn = 'false' WHERE datname = '$1';"
psql -h <host> postgres postgres -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$1';"
dropdb -h <host> -U postgres $1
createdb -h <host> -U postgres $1
Change all permissions on a database to a user
Run this as a bash script like database_permissions.sh
as the postgres
#!/bin/bash
# usage: ./database_permissions.sh database username
for tbl in `psql -qAt -c "select tablename from pg_tables where schemaname = 'public';" $1` ; do psql -c "alter table \"$tbl\" owner to \"$2\"" $1 ; done
for tbl in `psql -qAt -c "select sequence_name from information_schema.sequences where sequence_schema = 'public';" $1` ; do psql -c "alter sequence \"$tbl\" owner to \"$2\"" $1 ; done
for tbl in `psql -qAt -c "select table_name from information_schema.views where table_schema = 'public';" $1` ; do psql -c "alter view \"$tbl\" owner to \"$2\"" $1 ; done
for tbl in `psql -qAt -c "select matviewname from pg_matviews where schemaname = 'public';" $1` ; do psql -c "alter materialized view \"$tbl\" owner to \"$2\"" $1 ; done
Performance
Add pg_stat_statements
to shared_preload_libraries
inside postgresql.conf
CREATE EXTENSION pg_stat_statements;
Columns
select query,calls,total_time,min_time,max_time,mean_time,stddev_time,rows from pg_stat_statements;
To reset stats
select pg_stat_statements_reset();
Most CPU usage queries
SELECT datname, query,
round(total_time::numeric, 2) AS total_time,
calls,
round(mean_time::numeric, 2) AS mean,
round((100 * total_time / sum(total_time::numeric) OVER ())::numeric, 2) AS percentage_cpu
FROM pg_stat_statements
JOIN pg_database ON dbid = pg_database.oid
ORDER BY total_time DESC
LIMIT 20;
Running queries
-- show running queries (pre 9.2)
SELECT procpid, age(clock_timestamp(), query_start), usename, current_query
FROM pg_stat_activity
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start desc;
-- show running queries (9.2)
SELECT pid, age(clock_timestamp(), query_start), usename, query
FROM pg_stat_activity
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start desc;
Kill running query
SELECT pg_cancel_backend(procpid);
Kill idle query
SELECT pg_terminate_backend(procpid);
Cache hit rate
Should not be less then 0.99
SELECT sum(heap_blks_read) as heap_read, sum(heap_blks_hit) as heap_hit, (sum(heap_blks_hit) - sum(heap_blks_read)) / sum(heap_blks_hit) as ratio
FROM pg_statio_user_tables;
Table index usage rates
Should not be less then 0.99
SELECT relname, 100 * idx_scan / (seq_scan + idx_scan) percent_of_times_index_used, n_live_tup rows_in_table
FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;
How many indexes are in cache
SELECT sum(idx_blks_read) as idx_read, sum(idx_blks_hit) as idx_hit, (sum(idx_blks_hit) - sum(idx_blks_read)) / sum(idx_blks_hit) as ratio
FROM pg_statio_user_indexes;
Find tables with missing primary keys
https://www.postgresonline.com/article_pfriendly/65.html
SELECT c.table_schema, c.table_name, c.table_type
FROM information_schema.tables c
WHERE c.table_type = 'BASE TABLE' AND c.table_schema NOT IN('information_schema', 'pg_catalog')
AND
NOT EXISTS (SELECT cu.table_name
FROM information_schema.key_column_usage cu
WHERE cu.table_schema = c.table_schema AND
cu.table_name = c.table_name)
ORDER BY c.table_schema, c.table_name;
Find all tables missing primary keys and have no unique indexes
https://www.postgresonline.com/article_pfriendly/65.html
SELECT c.table_schema, c.table_name, c.table_type
FROM information_schema.tables c
WHERE c.table_schema NOT IN('information_schema', 'pg_catalog') AND c.table_type = 'BASE TABLE'
AND NOT EXISTS(SELECT i.tablename
FROM pg_catalog.pg_indexes i
WHERE i.schemaname = c.table_schema
AND i.tablename = c.table_name AND indexdef LIKE '%UNIQUE%')
AND
NOT EXISTS (SELECT cu.table_name
FROM information_schema.key_column_usage cu
WHERE cu.table_schema = c.table_schema AND
cu.table_name = c.table_name)
ORDER BY c.table_schema, c.table_name;
Find database size usage
Index Sizes
SELECT
pt.tablename AS TableName
,t.indexname AS IndexName
,to_char(pc.reltuples, '999,999,999,999') AS TotalRows
,pg_size_pretty(pg_relation_size(quote_ident(pt.tablename)::text)) AS TableSize
,pg_size_pretty(pg_relation_size(quote_ident(t.indexrelname)::text)) AS IndexSize
,to_char(t.idx_scan, '999,999,999,999') AS TotalNumberOfScan
,to_char(t.idx_tup_read, '999,999,999,999') AS TotalTupleRead
,to_char(t.idx_tup_fetch, '999,999,999,999') AS TotalTupleFetched
FROM pg_tables AS pt
LEFT OUTER JOIN pg_class AS pc
ON pt.tablename=pc.relname
LEFT OUTER JOIN
(
SELECT
pc.relname AS TableName
,pc2.relname AS IndexName
,psai.idx_scan
,psai.idx_tup_read
,psai.idx_tup_fetch
,psai.indexrelname
FROM pg_index AS pi
JOIN pg_class AS pc
ON pc.oid = pi.indrelid
JOIN pg_class AS pc2
ON pc2.oid = pi.indexrelid
JOIN pg_stat_all_indexes AS psai
ON pi.indexrelid = psai.indexrelid
)AS T
ON pt.tablename = T.TableName
WHERE pt.schemaname='public'
ORDER BY 1;
Table Sizes
SELECT nspname || '.' || relname AS "relation",
pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size"
FROM pg_class C
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE nspname NOT IN ('pg_catalog', 'information_schema')
AND C.relkind <> 'i'
AND nspname !~ '^pg_toast'
ORDER BY pg_total_relation_size(C.oid) DESC;
Find tables with a high sequential scan
If the table is large, this might mean an index isn't being used
SELECT relname AS table_name, seq_scan, seq_tup_read, idx_scan
FROM pg_stat_user_tables
WHERE seq_scan > 0
ORDER BY seq_scan DESC;
Find indexes that are never used
This slows down writes as it needs to insert in all the indexes for a table.
SELECT relname AS table_name, indexrelname AS index_name, idx_scan, idx_tup_read, idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY table_name;
See connection count
Use this to detect if it's using connection pooling.
SELECT COUNT(*) AS connection_count
FROM pg_stat_activity
WHERE backend_start >= NOW() - INTERVAL '120 minutes';
or with a bit more detail
SELECT pid, usename, application_name, client_addr, backend_start
FROM pg_stat_activity
WHERE backend_start >= NOW() - INTERVAL '200 minutes';
or to see how many active connections there are. We have Active, Idle and NULL
SELECT pid, usename, application_name, client_addr, backend_start at time zone 'Australia/Adelaide', state, query, *
FROM pg_stat_activity
WHERE state in ('active', 'idle')
ORDER BY backend_start ASC;
Update all sequences to be inline with the max value of the ID
This assumes a lot of things about your database, but for standard tables with an ID that is an integer, it should work.
with sequences as (
select *
from (
select table_schema,
table_name,
column_name,
pg_get_serial_sequence(format('%I.%I', table_schema, table_name), column_name) as col_sequence
from information_schema.columns
where table_schema not in ('pg_catalog', 'information_schema')
) t
where col_sequence is not null
), maxvals as (
select table_schema, table_name, column_name, col_sequence,
(xpath('/row/max/text()',
query_to_xml(format('select max(%I) from %I.%I', column_name, table_schema, table_name), true, true, ''))
)[1]::text::bigint as max_val
from sequences
)
select table_schema,
table_name,
column_name,
col_sequence,
coalesce(max_val, 0) as max_val,
setval(col_sequence, coalesce(max_val, 1))
from maxvals;
Backup / Restore
When converting between different databases it might be safer to use .sql
depending on version difference.
Dump remote database
pg_dump -U <user> -h <host> <database> > dump.sql
Restore remote database
psql -U <user> -h <host> -d <database> -f dump.sql
Dropping a database with connections
This is useful when doing development or staging environments where it's hard to get all connections dropped.
SELECT
pg_terminate_backend(pg_stat_activity.pid)
FROM
pg_stat_activity
WHERE
pg_stat_activity.datname = 'ifas'
AND pid <> pg_backend_pid();
DROP DATABASE ifas;
CREATE DATABASE ifas;
Identity Columns
To turn an existing column into an identity column you can use:
ALTER TABLE table_name
ALTER COLUMN id_column
ADD GENERATED ALWAYS AS IDENTITY;
or
ALTER TABLE table_name
ALTER COLUMN id_column
ADD GENERATED BY DEFAULT AS IDENTITY;
I prefer the generated always
as it will throw an error if you try to bypass the automatic generation of values.
If the table already contains data, you need to synchronized the underlying sequence with the values in the table:
select setval(pg_get_serial_sequence('the_table', 'id_column'), max(id))
from the_table;