您好,登錄后才能下訂單哦!
對于Update/Delete操作,PostgreSQL的MVCC機制仍會保留先前版本的數據,這些數據在VACUUM時將被清除.但如果在執行VACUUM時,存在先于VACUUM操作的活動事務,假定這些活動事務中最小的事務ID為OldestXmin,那么VACUUM不會清理刪除事務ID(即xmax) > OldestXmin的元組,如果業務繁忙并且OldestXmin事務一直不提交,會導致存儲空間一直膨脹,直至耗盡空間.
實驗驗證
數據準備
創建一張普通表,插入一行數據
drop table if exists t_page;
create table t_page (id int,c1 char(8),c2 varchar(16));
insert into t_page values(1,'1','a');
獲取該表對應的數據文件
10:26:31 (xdb@[local]:5432)testdb=# select pg_relation_filepath('t_page');
pg_relation_filepath
----------------------
base/16402/50824
(1 row)
查詢該數據表占用的空間
10:42:43 (xdb@[local]:5432)testdb=# \set v_tablename t_page
10:43:09 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
pg_size_pretty
----------------
8192 bytes
(1 row)
過程
session 1啟動事務,session 2執行pg_bench進行測試(在session 1后啟動),session 3監控數據表的空間增長/執行vacuum
session 1
10:46:48 (xdb@[local]:5432)testdb=# begin;
BEGIN
10:46:50 (xdb@[local]:5432)testdb=#* select txid_current();
txid_current
--------------
397083
(1 row)
10:46:54 (xdb@[local]:5432)testdb=#*
session 2執行pg_bench
[xdb@localhost script]$ cat test.sql
\set id random(1,10000)
begin;
update t_page set c1='c1'||:id;
commit;
session 3監控數據表的空間增長/執行vacuum
-- 空間
10:49:00 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
pg_size_pretty
----------------
192 kB --> 在執行壓力測試過程中從8K增長到192KB
(1 row)
-- 執行vacuum
10:49:16 (xdb@[local]:5432)testdb=# vacuum verbose t_page;
INFO: vacuuming "public.t_page"
INFO: "t_page": found 0 removable, 10825 nonremovable row versions in 59 out of 59 pages
DETAIL: 10824 dead row versions cannot be removed yet, oldest xmin: 397083
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins, 0 frozen pages.
0 pages are entirely empty.
CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s.
VACUUM
vacuum命令的輸出信息:10824 dead row versions cannot be removed yet, oldest xmin: 397083
因為刪除的xmax > OldestXmin,因此這些元組不能被清除.
源碼分析
元組對VACUUM的可見性判斷與元組對SELECT操作的可見性判斷類似,SELECT查詢調用的可見性判斷函數是HeapTupleSatisfiesMVCC,而VACUUM的可見性判斷函數是HeapTupleSatisfiesVacuum,該函數由lazy_scan_heap調用.
lazy_scan_heap
lazy_scan_heap函數的邏輯先前已介紹過,這里不再詳述,下面簡單梳理與元組清理的相關邏輯
static void
lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
Relation *Irel, int nindexes, bool aggressive)
{
...
for (blkno = 0; blkno < nblocks; blkno++)
{
//遍歷每個block
...
//遍歷block中的每個元組
for (offnum = FirstOffsetNumber;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
{
...
if (ItemIdIsDead(itemid))
{
//記錄需刪除的tuple
//vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
//vacrelstats->num_dead_tuples++;
lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
all_visible = false;
continue;
}
...
//在這里,主要目的是一個元組是否可能對所有正在運行中的事務可見.
switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
{
case HEAPTUPLE_DEAD:
...
case HEAPTUPLE_LIVE:
...
case HEAPTUPLE_RECENTLY_DEAD:
//這些元組不能被清除!
nkeep += 1;
all_visible = false;
break;
...
}
}
}
...
}
如果ItemIdIsDead,則記錄該元組,繼續下一元組;如ItemIdIsNormal,調用HeapTupleSatisfiesVacuum函數,判斷元組可見性.
ItemIdIsDead的判斷很簡單,判斷ItemId的lp_flags標記是否為LP_DEAD
((itemId)->lp_flags == LP_DEAD)
實際上,在執行update的時候,ItemId的lp_flags仍然是0x01,即Normal狀態.
11:20:49 (xdb@[local]:5432)testdb=# select lp,lp_off,lp_flags,t_xmin,t_xmax,t_field3 as t_cid,t_ctid,t_infomask2,t_infomask from heap_page_items(get_raw_page('t_page',0));
lp | lp_off | lp_flags | t_xmin | t_xmax | t_cid | t_ctid | t_infomask2 | t_infomask
-----+--------+----------+--------+--------+-------+---------+-------------+------------
1 | 8152 | 1 | 457343 | 457343 | 0 | (0,1) | 3 | 10642
2 | 8112 | 1 | 457342 | 457343 | 0 | (0,1) | 3 | 9474
3 | 8072 | 1 | 457341 | 457342 | 0 | (0,2) | 3 | 9474
...
查看該block中3號元組的信息
[xdb@localhost pg111db]$ hexdump -C base/16402/50831 -s 32 -n 2
00000020 88 9f |..|
00000022
[xdb@localhost pg111db]$ hexdump -C base/16402/50831 -s 34 -n 2
00000022 4e 00 |N.|
00000024
計算偏移/大小/標記
[xdb@localhost pg111db]$ #偏移
[xdb@localhost pg111db]$ echo $((0x9f88 & ~$((1<<15))))
8072
[xdb@localhost pg111db]$ #大小
[xdb@localhost pg111db]$ echo $((0x004e >> 1))
39
[xdb@localhost pg111db]$ #lp_flags
[xdb@localhost pg111db]$ echo $((0x004e & 0x0001))
[xdb@localhost pg111db]$ echo $((0x9f88 >> 15))
1
lp_flags=0x01,即LP_NORMAL
下面,簡述HeapTupleSatisfiesVacuum的實現邏輯,該函數判斷元組對于VACUUM操作的可見性.
HeapTupleSatisfiesVacuum
HTSV_Result
HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
Buffer buffer)
{
...
if (!HeapTupleHeaderXminCommitted(tuple))
{
//xmin事務未提交
...
}
//不符合以上條件,則可確定xmin事務已提交
if (tuple->t_infomask & HEAP_XMAX_INVALID)
//xmax為無效事務ID
return HEAPTUPLE_LIVE;
...
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
//xmax事務未提交
...
}
...
//不符合以上條件,則可確定xmax事務已提交
if (!TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin))
//6.元組xmax ≥ OldestXmin,標記為最近刪除
return HEAPTUPLE_RECENTLY_DEAD;
...
元組xmax ≥ OldestXmin,標記為HEAPTUPLE_RECENTLY_DEAD,這些元組不能被清理!
之所以需要避免長事務是因為如果OldestXmin事務一直不提交,那么后續被刪除的元組都一直保留,無法通過VACUUM清除.
上述案例,壓力測試完成后,空間是原來的330倍,而且通過VACUUM(包括VACUUM FULL)無法清理!
10:50:13 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
pg_size_pretty
----------------
2640 kB
(1 row)
11:01:28 (xdb@[local]:5432)testdb=# vacuum verbose t_page;
INFO: vacuuming "public.t_page"
INFO: "t_page": found 0 removable, 60255 nonremovable row versions in 326 out of 326 pages
DETAIL: 60254 dead row versions cannot be removed yet, oldest xmin: 397083
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins, 0 frozen pages.
0 pages are entirely empty.
CPU: user: 0.02 s, system: 0.00 s, elapsed: 0.02 s.
VACUUM
11:01:31 (xdb@[local]:5432)testdb=#
11:10:14 (xdb@[local]:5432)testdb=# vacuum full;
VACUUM
11:17:56 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
pg_size_pretty
----------------
2640 kB
(1 row)
應用建議
在實際應用中,應盡可能避免長事務,跑批操作盡可能安排在非繁忙時段執行.如確定為只讀事務,建議開啟自動提交.
對于Java應用并且啟用了連接池,如JDBC設置自動提交為false,就算是select操作,也務必在執行完畢后執行commit(Oracle不需要,但PG需要!).
參考資料
PostgreSQL Source Code
PostgreSQL 源碼解讀(134)- MVCC#18(vacuum過程-HeapTupleSatisfiesVacuum函數)
PostgreSQL 源碼解讀(130)- MVCC#14(vacuum過程-lazy_scan_heap函數)
PostgreSQL 源碼解讀(118)- MVCC#3(Tuple可見性判斷)
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。