您好,登錄后才能下訂單哦!
這篇文章主要講解了“PostgreSQL紹聚合函數實現中怎么使用的simplehash”,文中的講解內容簡單清晰,易于學習與理解,下面請大家跟著小編的思路慢慢深入,一起來研究和學習“PostgreSQL紹聚合函數實現中怎么使用的simplehash”吧!
//src/backend/executor/execGrouping.c #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key) //SH_HASH_KEY --> TupleHashTableHash #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0 //SH_EQUAL --> TupleHashTableMatch
TupleHashTable
哈希表定義
typedef struct TupleHashTableData *TupleHashTable; typedef struct TupleHashTableData { //底層Hash表 tuplehash_hash *hashtab; /* underlying hash table */ //在檢索鍵中的列數 int numCols; /* number of columns in lookup key */ //鍵列中的屬性格式 AttrNumber *keyColIdx; /* attr numbers of key columns */ //數據類型的哈希函數 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ //數據類型比較器 ExprState *tab_eq_func; /* comparator for table datatype(s) */ //包含數據表的內存上下文 MemoryContext tablecxt; /* memory context containing table */ //函數解析上下文 MemoryContext tempcxt; /* context for function evaluations */ //構造每個哈希條目的實際大小 Size entrysize; /* actual size to make each hash entry */ //依賴數據表條目的slot TupleTableSlot *tableslot; /* slot for referencing table entries */ /* The following fields are set transiently for each table search: */ //下面字段為每一個表檢索時臨時設置 //當前輸入tuple slot TupleTableSlot *inputslot; /* current input tuple's slot */ //輸入數據類型的哈希函數 FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ //input vs table的比較器 ExprState *cur_eq_func; /* comparator for input vs. table */ //哈希函數IV uint32 hash_iv; /* hash-function IV */ //表達式上下文 ExprContext *exprcontext; /* expression context */ } TupleHashTableData; typedef tuplehash_iterator TupleHashIterator; /* type definitions */ //哈希表類型定義 typedef struct SH_TYPE //tuplehash_hash { /* * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash * tables. Note that the maximum number of elements is lower * (SH_MAX_FILLFACTOR) * 數據/桶數組大小,64 bit用于處理UINT32_MAX哈希表. * 注意元素最大格式小于(SH_MAX_FILLFACTOR) */ uint64 size; /* how many elements have valid contents */ //有多少個元素具有有效內容 uint32 members; /* mask for bucket and size calculations, based on size */ //基于大小,用于計算桶和大小的掩碼 uint32 sizemask; /* boundary after which to grow hashtable */ //哈希表增長的閾值 uint32 grow_threshold; /* hash buckets */ //哈希桶 SH_ELEMENT_TYPE *data; /* memory context to use for allocations */ //用于分配的內存上下文 MemoryContext ctx; /* user defined data, useful for callbacks */ //用戶自定義的數據,通常用于回調函數 void *private_data; } SH_TYPE;//實際是tuplehash_hash
TupleHashEntryData
哈希表條目
typedef struct TupleHashEntryData *TupleHashEntry; typedef struct TupleHashTableData *TupleHashTable; typedef struct TupleHashEntryData { //該組第一個元組的拷貝 MinimalTuple firstTuple; /* copy of first tuple in this group */ //用戶數據 void *additional; /* user data */ //狀態(見SH_STATUS) uint32 status; /* hash status */ //哈希值(已緩存) uint32 hash; /* hash value (cached) */ } TupleHashEntryData; typedef enum SH_STATUS { SH_STATUS_EMPTY = 0x00, SH_STATUS_IN_USE = 0x01 } SH_STATUS;
MinimalTuple
最小化的元組定義
/* * MinimalTuple is an alternative representation that is used for transient * tuples inside the executor, in places where transaction status information * is not required, the tuple rowtype is known, and shaving off a few bytes * is worthwhile because we need to store many tuples. The representation * is chosen so that tuple access routines can work with either full or * minimal tuples via a HeapTupleData pointer structure. The access routines * see no difference, except that they must not access the transaction status * or t_ctid fields because those aren't there. * * For the most part, MinimalTuples should be accessed via TupleTableSlot * routines. These routines will prevent access to the "system columns" * and thereby prevent accidental use of the nonexistent fields. * * MinimalTupleData contains a length word, some padding, and fields matching * HeapTupleHeaderData beginning with t_infomask2. The padding is chosen so * that offsetof(t_infomask2) is the same modulo MAXIMUM_ALIGNOF in both * structs. This makes data alignment rules equivalent in both cases. * * When a minimal tuple is accessed via a HeapTupleData pointer, t_data is * set to point MINIMAL_TUPLE_OFFSET bytes before the actual start of the * minimal tuple --- that is, where a full tuple matching the minimal tuple's * data would start. This trick is what makes the structs seem equivalent. * * Note that t_hoff is computed the same as in a full tuple, hence it includes * the MINIMAL_TUPLE_OFFSET distance. t_len does not include that, however. * * MINIMAL_TUPLE_DATA_OFFSET is the offset to the first useful (non-pad) data * other than the length word. tuplesort.c and tuplestore.c use this to avoid * writing the padding to disk. */ #define MINIMAL_TUPLE_OFFSET \ ((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) / MAXIMUM_ALIGNOF * MAXIMUM_ALIGNOF) #define MINIMAL_TUPLE_PADDING \ ((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) % MAXIMUM_ALIGNOF) #define MINIMAL_TUPLE_DATA_OFFSET \ offsetof(MinimalTupleData, t_infomask2) struct MinimalTupleData { uint32 t_len; /* actual length of minimal tuple */ char mt_padding[MINIMAL_TUPLE_PADDING]; /* Fields below here must match HeapTupleHeaderData! */ uint16 t_infomask2; /* number of attributes + various flags */ uint16 t_infomask; /* various flag bits, see below */ uint8 t_hoff; /* sizeof header incl. bitmap, padding */ /* ^ - 23 bytes - ^ */ bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]; /* bitmap of NULLs */ /* MORE DATA FOLLOWS AT END OF STRUCT */ }; /* typedef appears in htup.h */ #define SizeofMinimalTupleHeader offsetof(MinimalTupleData, t_bits) typedef struct MinimalTupleData MinimalTupleData; typedef MinimalTupleData *MinimalTuple;
TupleHashTableHash
TupleHashTableHash用于計算tuple的哈希值(分組列值)
/* * Compute the hash value for a tuple * 計算tuple的哈希值 * * The passed-in key is a pointer to TupleHashEntryData. In an actual hash * table entry, the firstTuple field points to a tuple (in MinimalTuple * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a * NULL firstTuple field --- that cues us to look at the inputslot instead. * This convention avoids the need to materialize virtual input tuples unless * they actually need to get copied into the table. * 傳入的key是指向TupleHashEntryData結構體的指針. * 在實際的哈希表條目中,firstTuple字段指向一個元組(以MinimalTuple格式保存). * LookupTupleHashEntry會使用NULL firstTuple字段設置一個虛擬的TupleHashEntryData. * --- 這可以提示我們轉而查看inputslot. * 這個轉換避免了物化虛擬輸入元組,除非它們需要實際拷貝到數據表中. * * Also, the caller must select an appropriate memory context for running * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) * 同時,調用者必須選擇合適的內存上下文用于運行哈希函數. * (dynahash.c不會改變CurrentMemoryContext) */ static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { //Tuple 哈希表 TupleHashTable hashtable = (TupleHashTable) tb->private_data; //列數 int numCols = hashtable->numCols; //屬性編號 AttrNumber *keyColIdx = hashtable->keyColIdx; //哈希key uint32 hashkey = hashtable->hash_iv; //元組slot TupleTableSlot *slot; //哈希函數指針 FmgrInfo *hashfunctions; int i; if (tuple == NULL)//元組為NULL { /* Process the current input tuple for the table */ //處理當前輸入元組 slot = hashtable->inputslot; hashfunctions = hashtable->in_hash_funcs; } else { /* * Process a tuple already stored in the table. * 處理已存儲在數據表中的元組. * * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) * (這種情況因為simplehash.h的使用,實際上不會發生,因為哈希值存儲在條目中) */ slot = hashtable->tableslot; //存儲MinimalTuple ExecStoreMinimalTuple(tuple, slot, false); hashfunctions = hashtable->tab_hash_funcs; } for (i = 0; i < numCols; i++) { //------- 循環遍歷列數 //獲取屬性編號 AttrNumber att = keyColIdx[i]; Datum attr;//屬性 bool isNull;//是否為NULL? /* rotate hashkey left 1 bit at each step */ //每一步向左移動一位 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); //獲取屬性值 attr = slot_getattr(slot, att, &isNull); //如為null,則哈希key設置為0 if (!isNull) /* treat nulls as having hash key 0 */ { //不為NULL uint32 hkey; //調用哈希函數 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], attr)); hashkey ^= hkey; } } /* * The way hashes are combined above, among each other and with the IV, * doesn't lead to good bit perturbation. As the IV's goal is to lead to * achieve that, perform a round of hashing of the combined hash - * resulting in near perfect perturbation. * 上面哈希的的組合方式,彼此之間以及與IV的組合方式,都不會導致位擾動. * 因為IV存在的目的是實現該目標,執行組合哈希的hashing取整 -- 結果是完美的擾動. */ return murmurhash42(hashkey); }
TupleHashTableMatch
TupleHashTableMatch用于判斷兩個tuples是否匹配(有相同的hash值)
/* * See whether two tuples (presumably of the same hash value) match * 檢查兩個tuples是否匹配(有相同的hash值) * * As above, the passed pointers are pointers to TupleHashEntryData. * 如上所述,傳入的指針指向TupleHashEntryData */ static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2) { TupleTableSlot *slot1; TupleTableSlot *slot2; TupleHashTable hashtable = (TupleHashTable) tb->private_data; ExprContext *econtext = hashtable->exprcontext; /* * We assume that simplehash.h will only ever call us with the first * argument being an actual table entry, and the second argument being * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction * could be supported too, but is not currently required. */ Assert(tuple1 != NULL); slot1 = hashtable->tableslot; ExecStoreMinimalTuple(tuple1, slot1, false); Assert(tuple2 == NULL); slot2 = hashtable->inputslot; /* For crosstype comparisons, the inputslot must be first */ econtext->ecxt_innertuple = slot2; econtext->ecxt_outertuple = slot1; return !ExecQualAndReset(hashtable->cur_eq_func, econtext); }
測試腳本
-- 禁用并行 set max_parallel_workers_per_gather=0; select bh,avg(c1),min(c1),max(c2) from t_agg_simple group by bh;
跟蹤分析
(gdb) b TupleHashTableHash Breakpoint 1 at 0x6d3b2b: file execGrouping.c, line 379. (gdb) b TupleHashTableMatch Breakpoint 2 at 0x6d3c79: file execGrouping.c, line 446. (gdb) (gdb) c Continuing. Breakpoint 1, TupleHashTableHash (tb=0x2dd2720, tuple=0x0) at execGrouping.c:379 379 TupleHashTable hashtable = (TupleHashTable) tb->private_data; (gdb)
輸入參數
(gdb) p *tb $1 = {size = 256, members = 0, sizemask = 255, grow_threshold = 230, data = 0x2ddca00, ctx = 0x2db5310, private_data = 0x2dd2890} (gdb) p *tb->data $2 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
獲取分組列數
(gdb) n 380 int numCols = hashtable->numCols; (gdb) p *hashtable $3 = {hashtab = 0x2dd2720, numCols = 1, keyColIdx = 0x2dd2680, tab_hash_funcs = 0x2db72d0, tab_eq_func = 0x2ddea18, tablecxt = 0x2dcc370, tempcxt = 0x2db7320, entrysize = 24, tableslot = 0x2dd2928, inputslot = 0x2db7238, in_hash_funcs = 0x2db72d0, cur_eq_func = 0x2ddea18, hash_iv = 0, exprcontext = 0x2ddf338} (gdb) p tb->private_data $4 = (void *) 0x2dd2890
獲取分組列屬性編號
(gdb) n 381 AttrNumber *keyColIdx = hashtable->keyColIdx; (gdb) 382 uint32 hashkey = hashtable->hash_iv; (gdb) p *keyColIdx $5 = 1
如輸入tuple為NULL,設置slot和哈希函數
(gdb) n 387 if (tuple == NULL) (gdb) p hashkey $6 = 0 (gdb) n 390 slot = hashtable->inputslot; (gdb) 391 hashfunctions = hashtable->in_hash_funcs;
開始遍歷分組列
獲取hashkey
(gdb) n 406 for (i = 0; i < numCols; i++) (gdb) p numCols $8 = 1 (gdb) n 408 AttrNumber att = keyColIdx[i]; (gdb) 413 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); (gdb) p att $9 = 1 (gdb) p hashkey $10 = 0
獲取屬性值
(gdb) n 415 attr = slot_getattr(slot, att, &isNull); (gdb) p hashkey $11 = 0 (gdb) n 417 if (!isNull) /* treat nulls as having hash key 0 */ (gdb) p attr $12 = 140535426168416 (gdb) x\16c attr Invalid character '\' in expression. (gdb) x/16c attr 0x7fd0f427b660: 11 '\v' 71 'G' 90 'Z' 48 '0' 49 '1' 0 '\000' 0 '\000' 0 '\000' 0x7fd0f427b668: 1 '\001' 0 '\000' 0 '\000' 0 '\000' 1 '\001' 0 '\000' 0 '\000' 0 '\000'
計算哈希值
(gdb) n 421 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], (gdb) p hashfunctions[i] $13 = {fn_addr = 0x4c8a31 <hashtext>, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2db5310, fn_expr = 0x0} (gdb) p i $14 = 0 (gdb) n 423 hashkey ^= hkey; (gdb) p hkey $15 = 3431319292 (gdb) n 406 for (i = 0; i < numCols; i++) (gdb) p hashkey $16 = 3431319292
返回結果
(gdb) n 433 return murmurhash42(hashkey); (gdb) p murmurhash42(hashkey) $17 = 443809650 (gdb) n 434 } (gdb) tuplehash_insert (tb=0x2dd2720, key=0x0, found=0x7fff585be487) at ../../../src/include/lib/simplehash.h:497 497 insertdist = 0; (gdb)
TupleHashTableMatch
進入TupleHashTableMatch
(gdb) c Continuing. Breakpoint 2, TupleHashTableMatch (tb=0x2dd2720, tuple1=0x2dcc488, tuple2=0x0) at execGrouping.c:446 446 TupleHashTable hashtable = (TupleHashTable) tb->private_data; (gdb)
輸入參數
(gdb) p *tb $18 = {size = 256, members = 1, sizemask = 255, grow_threshold = 230, data = 0x2ddca00, ctx = 0x2db5310, private_data = 0x2dd2890} (gdb) p *tuple1 $19 = {t_len = 21, mt_padding = "\000\000\000\000\000", t_infomask2 = 1, t_infomask = 2, t_hoff = 24 '\030', t_bits = 0x2dcc497 ""}
對比是否匹配
(gdb) n 447 ExprContext *econtext = hashtable->exprcontext; (gdb) 455 Assert(tuple1 != NULL); (gdb) 456 slot1 = hashtable->tableslot; (gdb) 457 ExecStoreMinimalTuple(tuple1, slot1, false); (gdb) 458 Assert(tuple2 == NULL); (gdb) 459 slot2 = hashtable->inputslot; (gdb) 462 econtext->ecxt_innertuple = slot2; (gdb) 463 econtext->ecxt_outertuple = slot1; (gdb) 464 return !ExecQualAndReset(hashtable->cur_eq_func, econtext); (gdb) p hashtable->cur_eq_func $20 = (ExprState *) 0x2ddea18 (gdb) p *hashtable->cur_eq_func $21 = {tag = {type = T_ExprState}, flags = 7 '\a', resnull = false, resvalue = 0, resultslot = 0x0, steps = 0x2ddeab0, evalfunc = 0x6cd882 <ExecInterpExprStillValid>, expr = 0x0, evalfunc_private = 0x6cb43e <ExecInterpExpr>, steps_len = 7, steps_alloc = 16, parent = 0x0, ext_params = 0x0, innermost_caseval = 0x0, innermost_casenull = 0x0, innermost_domainval = 0x0, innermost_domainnull = 0x0}
返回值
$22 = true (gdb) n 465 } (gdb) tuplehash_insert (tb=0x2dd2720, key=0x0, found=0x7fff585be487) at ../../../src/include/lib/simplehash.h:556 556 Assert(entry->status == SH_STATUS_IN_USE); (gdb)
感謝各位的閱讀,以上就是“PostgreSQL紹聚合函數實現中怎么使用的simplehash”的內容了,經過本文的學習后,相信大家對PostgreSQL紹聚合函數實現中怎么使用的simplehash這一問題有了更深刻的體會,具體使用情況還需要大家實踐驗證。這里是億速云,小編將為大家推送更多相關知識點的文章,歡迎關注!
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。