PostgreSQL Source Code git master
hash_xlog.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hash_xlog.c
4 * WAL replay logic for hash index.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hash_xlog.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/bufmask.h"
18#include "access/hash.h"
19#include "access/hash_xlog.h"
20#include "access/xlogutils.h"
21#include "storage/standby.h"
22
23/*
24 * replay a hash index meta page
25 */
26static void
28{
29 XLogRecPtr lsn = record->EndRecPtr;
30 Page page;
31 Buffer metabuf;
32 ForkNumber forknum;
33
35
36 /* create the index' metapage */
37 metabuf = XLogInitBufferForRedo(record, 0);
38 Assert(BufferIsValid(metabuf));
39 _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
40 xlrec->ffactor, true);
41 page = BufferGetPage(metabuf);
42 PageSetLSN(page, lsn);
43 MarkBufferDirty(metabuf);
44
45 /*
46 * Force the on-disk state of init forks to always be in sync with the
47 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
48 * special handling for init forks as create index operations don't log a
49 * full page image of the metapage.
50 */
51 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
52 if (forknum == INIT_FORKNUM)
53 FlushOneBuffer(metabuf);
54
55 /* all done */
56 UnlockReleaseBuffer(metabuf);
57}
58
59/*
60 * replay a hash index bitmap page
61 */
62static void
64{
65 XLogRecPtr lsn = record->EndRecPtr;
66 Buffer bitmapbuf;
67 Buffer metabuf;
68 Page page;
69 HashMetaPage metap;
70 uint32 num_buckets;
71 ForkNumber forknum;
72
74
75 /*
76 * Initialize bitmap page
77 */
78 bitmapbuf = XLogInitBufferForRedo(record, 0);
79 _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
80 PageSetLSN(BufferGetPage(bitmapbuf), lsn);
81 MarkBufferDirty(bitmapbuf);
82
83 /*
84 * Force the on-disk state of init forks to always be in sync with the
85 * state in shared buffers. See XLogReadBufferForRedoExtended. We need
86 * special handling for init forks as create index operations don't log a
87 * full page image of the metapage.
88 */
89 XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
90 if (forknum == INIT_FORKNUM)
91 FlushOneBuffer(bitmapbuf);
92 UnlockReleaseBuffer(bitmapbuf);
93
94 /* add the new bitmap page to the metapage's list of bitmaps */
95 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
96 {
97 /*
98 * Note: in normal operation, we'd update the metapage while still
99 * holding lock on the bitmap page. But during replay it's not
100 * necessary to hold that lock, since nobody can see it yet; the
101 * creating transaction hasn't yet committed.
102 */
103 page = BufferGetPage(metabuf);
104 metap = HashPageGetMeta(page);
105
106 num_buckets = metap->hashm_maxbucket + 1;
107 metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
108 metap->hashm_nmaps++;
109
110 PageSetLSN(page, lsn);
111 MarkBufferDirty(metabuf);
112
113 XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
114 if (forknum == INIT_FORKNUM)
115 FlushOneBuffer(metabuf);
116 }
117 if (BufferIsValid(metabuf))
118 UnlockReleaseBuffer(metabuf);
119}
120
121/*
122 * replay a hash index insert without split
123 */
124static void
126{
127 HashMetaPage metap;
128 XLogRecPtr lsn = record->EndRecPtr;
129 xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
130 Buffer buffer;
131 Page page;
132
133 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
134 {
135 Size datalen;
136 char *datapos = XLogRecGetBlockData(record, 0, &datalen);
137
138 page = BufferGetPage(buffer);
139
140 if (PageAddItem(page, datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber)
141 elog(PANIC, "hash_xlog_insert: failed to add item");
142
143 PageSetLSN(page, lsn);
144 MarkBufferDirty(buffer);
145 }
146 if (BufferIsValid(buffer))
147 UnlockReleaseBuffer(buffer);
148
149 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
150 {
151 /*
152 * Note: in normal operation, we'd update the metapage while still
153 * holding lock on the page we inserted into. But during replay it's
154 * not necessary to hold that lock, since no other index updates can
155 * be happening concurrently.
156 */
157 page = BufferGetPage(buffer);
158 metap = HashPageGetMeta(page);
159 metap->hashm_ntuples += 1;
160
161 PageSetLSN(page, lsn);
162 MarkBufferDirty(buffer);
163 }
164 if (BufferIsValid(buffer))
165 UnlockReleaseBuffer(buffer);
166}
167
168/*
169 * replay addition of overflow page for hash index
170 */
171static void
173{
174 XLogRecPtr lsn = record->EndRecPtr;
176 Buffer leftbuf;
177 Buffer ovflbuf;
178 Buffer metabuf;
179 BlockNumber leftblk;
180 BlockNumber rightblk;
182 Page ovflpage;
183 HashPageOpaque ovflopaque;
184 uint32 *num_bucket;
185 char *data;
187 bool new_bmpage = false;
188
189 XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
190 XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
191
192 ovflbuf = XLogInitBufferForRedo(record, 0);
193 Assert(BufferIsValid(ovflbuf));
194
195 data = XLogRecGetBlockData(record, 0, &datalen);
196 num_bucket = (uint32 *) data;
197 Assert(datalen == sizeof(uint32));
199 true);
200 /* update backlink */
201 ovflpage = BufferGetPage(ovflbuf);
202 ovflopaque = HashPageGetOpaque(ovflpage);
203 ovflopaque->hasho_prevblkno = leftblk;
204
205 PageSetLSN(ovflpage, lsn);
206 MarkBufferDirty(ovflbuf);
207
208 if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
209 {
210 Page leftpage;
211 HashPageOpaque leftopaque;
212
213 leftpage = BufferGetPage(leftbuf);
214 leftopaque = HashPageGetOpaque(leftpage);
215 leftopaque->hasho_nextblkno = rightblk;
216
217 PageSetLSN(leftpage, lsn);
218 MarkBufferDirty(leftbuf);
219 }
220
221 if (BufferIsValid(leftbuf))
222 UnlockReleaseBuffer(leftbuf);
223 UnlockReleaseBuffer(ovflbuf);
224
225 /*
226 * Note: in normal operation, we'd update the bitmap and meta page while
227 * still holding lock on the overflow pages. But during replay it's not
228 * necessary to hold those locks, since no other index updates can be
229 * happening concurrently.
230 */
231 if (XLogRecHasBlockRef(record, 2))
232 {
233 Buffer mapbuffer;
234
235 if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
236 {
237 Page mappage = BufferGetPage(mapbuffer);
238 uint32 *freep = NULL;
239 uint32 *bitmap_page_bit;
240
241 freep = HashPageGetBitmap(mappage);
242
243 data = XLogRecGetBlockData(record, 2, &datalen);
244 bitmap_page_bit = (uint32 *) data;
245
246 SETBIT(freep, *bitmap_page_bit);
247
248 PageSetLSN(mappage, lsn);
249 MarkBufferDirty(mapbuffer);
250 }
251 if (BufferIsValid(mapbuffer))
252 UnlockReleaseBuffer(mapbuffer);
253 }
254
255 if (XLogRecHasBlockRef(record, 3))
256 {
257 Buffer newmapbuf;
258
259 newmapbuf = XLogInitBufferForRedo(record, 3);
260
261 _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
262
263 new_bmpage = true;
264 newmapblk = BufferGetBlockNumber(newmapbuf);
265
266 MarkBufferDirty(newmapbuf);
267 PageSetLSN(BufferGetPage(newmapbuf), lsn);
268
269 UnlockReleaseBuffer(newmapbuf);
270 }
271
272 if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
273 {
274 HashMetaPage metap;
275 Page page;
276 uint32 *firstfree_ovflpage;
277
278 data = XLogRecGetBlockData(record, 4, &datalen);
279 firstfree_ovflpage = (uint32 *) data;
280
281 page = BufferGetPage(metabuf);
282 metap = HashPageGetMeta(page);
283 metap->hashm_firstfree = *firstfree_ovflpage;
284
285 if (!xlrec->bmpage_found)
286 {
287 metap->hashm_spares[metap->hashm_ovflpoint]++;
288
289 if (new_bmpage)
290 {
291 Assert(BlockNumberIsValid(newmapblk));
292
293 metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
294 metap->hashm_nmaps++;
295 metap->hashm_spares[metap->hashm_ovflpoint]++;
296 }
297 }
298
299 PageSetLSN(page, lsn);
300 MarkBufferDirty(metabuf);
301 }
302 if (BufferIsValid(metabuf))
303 UnlockReleaseBuffer(metabuf);
304}
305
306/*
307 * replay allocation of page for split operation
308 */
309static void
311{
312 XLogRecPtr lsn = record->EndRecPtr;
314 Buffer oldbuf;
315 Buffer newbuf;
316 Buffer metabuf;
318
319 /*
320 * To be consistent with normal operation, here we take cleanup locks on
321 * both the old and new buckets even though there can't be any concurrent
322 * inserts.
323 */
324
325 /* replay the record for old bucket */
326 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
327
328 /*
329 * Note that we still update the page even if it was restored from a full
330 * page image, because the special space is not included in the image.
331 */
333 {
334 Page oldpage;
335 HashPageOpaque oldopaque;
336
337 oldpage = BufferGetPage(oldbuf);
338 oldopaque = HashPageGetOpaque(oldpage);
339
340 oldopaque->hasho_flag = xlrec->old_bucket_flag;
341 oldopaque->hasho_prevblkno = xlrec->new_bucket;
342
343 PageSetLSN(oldpage, lsn);
344 MarkBufferDirty(oldbuf);
345 }
346
347 /* replay the record for new bucket */
349 &newbuf);
350 _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
351 xlrec->new_bucket_flag, true);
352 MarkBufferDirty(newbuf);
353 PageSetLSN(BufferGetPage(newbuf), lsn);
354
355 /*
356 * We can release the lock on old bucket early as well but doing here to
357 * consistent with normal operation.
358 */
359 if (BufferIsValid(oldbuf))
360 UnlockReleaseBuffer(oldbuf);
361 if (BufferIsValid(newbuf))
362 UnlockReleaseBuffer(newbuf);
363
364 /*
365 * Note: in normal operation, we'd update the meta page while still
366 * holding lock on the old and new bucket pages. But during replay it's
367 * not necessary to hold those locks, since no other bucket splits can be
368 * happening concurrently.
369 */
370
371 /* replay the record for metapage changes */
372 if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
373 {
374 Page page;
375 HashMetaPage metap;
376 Size datalen;
377 char *data;
378 uint32 *uidata;
379 int uidatacount;
380
381 page = BufferGetPage(metabuf);
382 metap = HashPageGetMeta(page);
383 metap->hashm_maxbucket = xlrec->new_bucket;
384
385 data = XLogRecGetBlockData(record, 2, &datalen);
386
387 /*
388 * This cast is ok because XLogRecGetBlockData() returns a MAXALIGNed
389 * buffer.
390 */
391 uidata = (uint32 *) data;
392 uidatacount = 0;
393
395 {
396 uint32 lowmask = uidata[uidatacount++];
397 uint32 highmask = uidata[uidatacount++];
398
399 /* update metapage */
400 metap->hashm_lowmask = lowmask;
401 metap->hashm_highmask = highmask;
402 }
403
405 {
406 uint32 ovflpoint = uidata[uidatacount++];
407 uint32 ovflpages = uidata[uidatacount++];
408
409 /* update metapage */
410 metap->hashm_ovflpoint = ovflpoint;
411 metap->hashm_spares[ovflpoint] = ovflpages;
412 }
413
414 MarkBufferDirty(metabuf);
415 PageSetLSN(BufferGetPage(metabuf), lsn);
416 }
417
418 if (BufferIsValid(metabuf))
419 UnlockReleaseBuffer(metabuf);
420}
421
422/*
423 * replay of split operation
424 */
425static void
427{
428 Buffer buf;
429
430 if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
431 elog(ERROR, "Hash split record did not contain a full-page image");
432
434}
435
436/*
437 * replay completion of split operation
438 */
439static void
441{
442 XLogRecPtr lsn = record->EndRecPtr;
444 Buffer oldbuf;
445 Buffer newbuf;
447
448 /* replay the record for old bucket */
449 action = XLogReadBufferForRedo(record, 0, &oldbuf);
450
451 /*
452 * Note that we still update the page even if it was restored from a full
453 * page image, because the bucket flag is not included in the image.
454 */
456 {
457 Page oldpage;
458 HashPageOpaque oldopaque;
459
460 oldpage = BufferGetPage(oldbuf);
461 oldopaque = HashPageGetOpaque(oldpage);
462
463 oldopaque->hasho_flag = xlrec->old_bucket_flag;
464
465 PageSetLSN(oldpage, lsn);
466 MarkBufferDirty(oldbuf);
467 }
468 if (BufferIsValid(oldbuf))
469 UnlockReleaseBuffer(oldbuf);
470
471 /* replay the record for new bucket */
472 action = XLogReadBufferForRedo(record, 1, &newbuf);
473
474 /*
475 * Note that we still update the page even if it was restored from a full
476 * page image, because the bucket flag is not included in the image.
477 */
479 {
480 Page newpage;
481 HashPageOpaque nopaque;
482
483 newpage = BufferGetPage(newbuf);
484 nopaque = HashPageGetOpaque(newpage);
485
486 nopaque->hasho_flag = xlrec->new_bucket_flag;
487
488 PageSetLSN(newpage, lsn);
489 MarkBufferDirty(newbuf);
490 }
491 if (BufferIsValid(newbuf))
492 UnlockReleaseBuffer(newbuf);
493}
494
495/*
496 * replay move of page contents for squeeze operation of hash index
497 */
498static void
500{
501 XLogRecPtr lsn = record->EndRecPtr;
503 Buffer bucketbuf = InvalidBuffer;
504 Buffer writebuf = InvalidBuffer;
505 Buffer deletebuf = InvalidBuffer;
507
508 /*
509 * Ensure we have a cleanup lock on primary bucket page before we start
510 * with the actual replay operation. This is to ensure that neither a
511 * scan can start nor a scan can be already-in-progress during the replay
512 * of this operation. If we allow scans during this operation, then they
513 * can miss some records or show the same record multiple times.
514 */
515 if (xldata->is_prim_bucket_same_wrt)
516 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
517 else
518 {
519 /*
520 * we don't care for return value as the purpose of reading bucketbuf
521 * is to ensure a cleanup lock on primary bucket page.
522 */
523 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
524
525 action = XLogReadBufferForRedo(record, 1, &writebuf);
526 }
527
528 /* replay the record for adding entries in overflow buffer */
529 if (action == BLK_NEEDS_REDO)
530 {
531 Page writepage;
532 char *begin;
533 char *data;
534 Size datalen;
535 uint16 ninserted = 0;
536
537 data = begin = XLogRecGetBlockData(record, 1, &datalen);
538
539 writepage = BufferGetPage(writebuf);
540
541 if (xldata->ntups > 0)
542 {
543 OffsetNumber *towrite = (OffsetNumber *) data;
544
545 data += sizeof(OffsetNumber) * xldata->ntups;
546
547 while (data - begin < datalen)
548 {
549 IndexTuple itup = (IndexTuple) data;
550 Size itemsz;
551 OffsetNumber l;
552
553 itemsz = IndexTupleSize(itup);
554 itemsz = MAXALIGN(itemsz);
555
556 data += itemsz;
557
558 l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
559 if (l == InvalidOffsetNumber)
560 elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
561 (int) itemsz);
562
563 ninserted++;
564 }
565 }
566
567 /*
568 * number of tuples inserted must be same as requested in REDO record.
569 */
570 Assert(ninserted == xldata->ntups);
571
572 PageSetLSN(writepage, lsn);
573 MarkBufferDirty(writebuf);
574 }
575
576 /* replay the record for deleting entries from overflow buffer */
577 if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
578 {
579 Page page;
580 char *ptr;
581 Size len;
582
583 ptr = XLogRecGetBlockData(record, 2, &len);
584
585 page = BufferGetPage(deletebuf);
586
587 if (len > 0)
588 {
589 OffsetNumber *unused;
590 OffsetNumber *unend;
591
592 unused = (OffsetNumber *) ptr;
593 unend = (OffsetNumber *) (ptr + len);
594
595 if ((unend - unused) > 0)
596 PageIndexMultiDelete(page, unused, unend - unused);
597 }
598
599 PageSetLSN(page, lsn);
600 MarkBufferDirty(deletebuf);
601 }
602
603 /*
604 * Replay is complete, now we can release the buffers. We release locks at
605 * end of replay operation to ensure that we hold lock on primary bucket
606 * page till end of operation. We can optimize by releasing the lock on
607 * write buffer as soon as the operation for same is complete, if it is
608 * not same as primary bucket page, but that doesn't seem to be worth
609 * complicating the code.
610 */
611 if (BufferIsValid(deletebuf))
612 UnlockReleaseBuffer(deletebuf);
613
614 if (BufferIsValid(writebuf))
615 UnlockReleaseBuffer(writebuf);
616
617 if (BufferIsValid(bucketbuf))
618 UnlockReleaseBuffer(bucketbuf);
619}
620
621/*
622 * replay squeeze page operation of hash index
623 */
624static void
626{
627 XLogRecPtr lsn = record->EndRecPtr;
629 Buffer bucketbuf = InvalidBuffer;
630 Buffer writebuf = InvalidBuffer;
631 Buffer ovflbuf;
632 Buffer prevbuf = InvalidBuffer;
633 Buffer mapbuf;
635
636 /*
637 * Ensure we have a cleanup lock on primary bucket page before we start
638 * with the actual replay operation. This is to ensure that neither a
639 * scan can start nor a scan can be already-in-progress during the replay
640 * of this operation. If we allow scans during this operation, then they
641 * can miss some records or show the same record multiple times.
642 */
643 if (xldata->is_prim_bucket_same_wrt)
644 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
645 else
646 {
647 /*
648 * we don't care for return value as the purpose of reading bucketbuf
649 * is to ensure a cleanup lock on primary bucket page.
650 */
651 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
652
653 if (xldata->ntups > 0 || xldata->is_prev_bucket_same_wrt)
654 action = XLogReadBufferForRedo(record, 1, &writebuf);
655 else
657 }
658
659 /* replay the record for adding entries in overflow buffer */
660 if (action == BLK_NEEDS_REDO)
661 {
662 Page writepage;
663 char *begin;
664 char *data;
665 Size datalen;
666 uint16 ninserted = 0;
667 bool mod_wbuf = false;
668
669 data = begin = XLogRecGetBlockData(record, 1, &datalen);
670
671 writepage = BufferGetPage(writebuf);
672
673 if (xldata->ntups > 0)
674 {
675 OffsetNumber *towrite = (OffsetNumber *) data;
676
677 data += sizeof(OffsetNumber) * xldata->ntups;
678
679 while (data - begin < datalen)
680 {
681 IndexTuple itup = (IndexTuple) data;
682 Size itemsz;
683 OffsetNumber l;
684
685 itemsz = IndexTupleSize(itup);
686 itemsz = MAXALIGN(itemsz);
687
688 data += itemsz;
689
690 l = PageAddItem(writepage, itup, itemsz, towrite[ninserted], false, false);
691 if (l == InvalidOffsetNumber)
692 elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
693 (int) itemsz);
694
695 ninserted++;
696 }
697
698 mod_wbuf = true;
699 }
700 else
701 {
702 /*
703 * Ensure that the required flags are set when there are no
704 * tuples. See _hash_freeovflpage().
705 */
708 }
709
710 /*
711 * number of tuples inserted must be same as requested in REDO record.
712 */
713 Assert(ninserted == xldata->ntups);
714
715 /*
716 * if the page on which are adding tuples is a page previous to freed
717 * overflow page, then update its nextblkno.
718 */
719 if (xldata->is_prev_bucket_same_wrt)
720 {
721 HashPageOpaque writeopaque = HashPageGetOpaque(writepage);
722
723 writeopaque->hasho_nextblkno = xldata->nextblkno;
724 mod_wbuf = true;
725 }
726
727 /* Set LSN and mark writebuf dirty iff it is modified */
728 if (mod_wbuf)
729 {
730 PageSetLSN(writepage, lsn);
731 MarkBufferDirty(writebuf);
732 }
733 }
734
735 /* replay the record for initializing overflow buffer */
736 if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
737 {
738 Page ovflpage;
739 HashPageOpaque ovflopaque;
740
741 ovflpage = BufferGetPage(ovflbuf);
742
743 _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
744
745 ovflopaque = HashPageGetOpaque(ovflpage);
746
749 ovflopaque->hasho_bucket = InvalidBucket;
750 ovflopaque->hasho_flag = LH_UNUSED_PAGE;
751 ovflopaque->hasho_page_id = HASHO_PAGE_ID;
752
753 PageSetLSN(ovflpage, lsn);
754 MarkBufferDirty(ovflbuf);
755 }
756 if (BufferIsValid(ovflbuf))
757 UnlockReleaseBuffer(ovflbuf);
758
759 /* replay the record for page previous to the freed overflow page */
760 if (!xldata->is_prev_bucket_same_wrt &&
761 XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
762 {
763 Page prevpage = BufferGetPage(prevbuf);
764 HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
765
766 prevopaque->hasho_nextblkno = xldata->nextblkno;
767
768 PageSetLSN(prevpage, lsn);
769 MarkBufferDirty(prevbuf);
770 }
771 if (BufferIsValid(prevbuf))
772 UnlockReleaseBuffer(prevbuf);
773
774 /* replay the record for page next to the freed overflow page */
775 if (XLogRecHasBlockRef(record, 4))
776 {
777 Buffer nextbuf;
778
779 if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
780 {
781 Page nextpage = BufferGetPage(nextbuf);
782 HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
783
784 nextopaque->hasho_prevblkno = xldata->prevblkno;
785
786 PageSetLSN(nextpage, lsn);
787 MarkBufferDirty(nextbuf);
788 }
789 if (BufferIsValid(nextbuf))
790 UnlockReleaseBuffer(nextbuf);
791 }
792
793 if (BufferIsValid(writebuf))
794 UnlockReleaseBuffer(writebuf);
795
796 if (BufferIsValid(bucketbuf))
797 UnlockReleaseBuffer(bucketbuf);
798
799 /*
800 * Note: in normal operation, we'd update the bitmap and meta page while
801 * still holding lock on the primary bucket page and overflow pages. But
802 * during replay it's not necessary to hold those locks, since no other
803 * index updates can be happening concurrently.
804 */
805 /* replay the record for bitmap page */
806 if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
807 {
808 Page mappage = BufferGetPage(mapbuf);
809 uint32 *freep = NULL;
810 char *data;
811 uint32 *bitmap_page_bit;
812 Size datalen;
813
814 freep = HashPageGetBitmap(mappage);
815
816 data = XLogRecGetBlockData(record, 5, &datalen);
817 bitmap_page_bit = (uint32 *) data;
818
819 CLRBIT(freep, *bitmap_page_bit);
820
821 PageSetLSN(mappage, lsn);
822 MarkBufferDirty(mapbuf);
823 }
824 if (BufferIsValid(mapbuf))
825 UnlockReleaseBuffer(mapbuf);
826
827 /* replay the record for meta page */
828 if (XLogRecHasBlockRef(record, 6))
829 {
830 Buffer metabuf;
831
832 if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
833 {
834 HashMetaPage metap;
835 Page page;
836 char *data;
837 uint32 *firstfree_ovflpage;
838 Size datalen;
839
840 data = XLogRecGetBlockData(record, 6, &datalen);
841 firstfree_ovflpage = (uint32 *) data;
842
843 page = BufferGetPage(metabuf);
844 metap = HashPageGetMeta(page);
845 metap->hashm_firstfree = *firstfree_ovflpage;
846
847 PageSetLSN(page, lsn);
848 MarkBufferDirty(metabuf);
849 }
850 if (BufferIsValid(metabuf))
851 UnlockReleaseBuffer(metabuf);
852 }
853}
854
855/*
856 * replay delete operation of hash index
857 */
858static void
860{
861 XLogRecPtr lsn = record->EndRecPtr;
862 xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
863 Buffer bucketbuf = InvalidBuffer;
864 Buffer deletebuf;
865 Page page;
867
868 /*
869 * Ensure we have a cleanup lock on primary bucket page before we start
870 * with the actual replay operation. This is to ensure that neither a
871 * scan can start nor a scan can be already-in-progress during the replay
872 * of this operation. If we allow scans during this operation, then they
873 * can miss some records or show the same record multiple times.
874 */
875 if (xldata->is_primary_bucket_page)
876 action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
877 else
878 {
879 /*
880 * we don't care for return value as the purpose of reading bucketbuf
881 * is to ensure a cleanup lock on primary bucket page.
882 */
883 (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
884
885 action = XLogReadBufferForRedo(record, 1, &deletebuf);
886 }
887
888 /* replay the record for deleting entries in bucket page */
889 if (action == BLK_NEEDS_REDO)
890 {
891 char *ptr;
892 Size len;
893
894 ptr = XLogRecGetBlockData(record, 1, &len);
895
896 page = BufferGetPage(deletebuf);
897
898 if (len > 0)
899 {
900 OffsetNumber *unused;
901 OffsetNumber *unend;
902
903 unused = (OffsetNumber *) ptr;
904 unend = (OffsetNumber *) (ptr + len);
905
906 if ((unend - unused) > 0)
907 PageIndexMultiDelete(page, unused, unend - unused);
908 }
909
910 /*
911 * Mark the page as not containing any LP_DEAD items only if
912 * clear_dead_marking flag is set to true. See comments in
913 * hashbucketcleanup() for details.
914 */
915 if (xldata->clear_dead_marking)
916 {
917 HashPageOpaque pageopaque;
918
919 pageopaque = HashPageGetOpaque(page);
920 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
921 }
922
923 PageSetLSN(page, lsn);
924 MarkBufferDirty(deletebuf);
925 }
926 if (BufferIsValid(deletebuf))
927 UnlockReleaseBuffer(deletebuf);
928
929 if (BufferIsValid(bucketbuf))
930 UnlockReleaseBuffer(bucketbuf);
931}
932
933/*
934 * replay split cleanup flag operation for primary bucket page.
935 */
936static void
938{
939 XLogRecPtr lsn = record->EndRecPtr;
940 Buffer buffer;
941 Page page;
942
943 if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
944 {
945 HashPageOpaque bucket_opaque;
946
947 page = BufferGetPage(buffer);
948
949 bucket_opaque = HashPageGetOpaque(page);
950 bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
951 PageSetLSN(page, lsn);
952 MarkBufferDirty(buffer);
953 }
954 if (BufferIsValid(buffer))
955 UnlockReleaseBuffer(buffer);
956}
957
958/*
959 * replay for update meta page
960 */
961static void
963{
964 HashMetaPage metap;
965 XLogRecPtr lsn = record->EndRecPtr;
967 Buffer metabuf;
968 Page page;
969
970 if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
971 {
972 page = BufferGetPage(metabuf);
973 metap = HashPageGetMeta(page);
974
975 metap->hashm_ntuples = xldata->ntuples;
976
977 PageSetLSN(page, lsn);
978 MarkBufferDirty(metabuf);
979 }
980 if (BufferIsValid(metabuf))
981 UnlockReleaseBuffer(metabuf);
982}
983
984/*
985 * replay delete operation in hash index to remove
986 * tuples marked as DEAD during index tuple insertion.
987 */
988static void
990{
991 XLogRecPtr lsn = record->EndRecPtr;
993 Buffer buffer;
994 Buffer metabuf;
995 Page page;
997 HashPageOpaque pageopaque;
998 OffsetNumber *toDelete;
999
1000 xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
1001 toDelete = xldata->offsets;
1002
1003 /*
1004 * If we have any conflict processing to do, it must happen before we
1005 * update the page.
1006 *
1007 * Hash index records that are marked as LP_DEAD and being removed during
1008 * hash index tuple insertion can conflict with standby queries. You might
1009 * think that vacuum records would conflict as well, but we've handled
1010 * that already. XLOG_HEAP2_PRUNE_VACUUM_SCAN records provide the highest
1011 * xid cleaned by the vacuum of the heap and so we can resolve any
1012 * conflicts just once when that arrives. After that we know that no
1013 * conflicts exist from individual hash index vacuum records on that
1014 * index.
1015 */
1016 if (InHotStandby)
1017 {
1018 RelFileLocator rlocator;
1019
1020 XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
1022 xldata->isCatalogRel,
1023 rlocator);
1024 }
1025
1026 action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer);
1027
1028 if (action == BLK_NEEDS_REDO)
1029 {
1030 page = BufferGetPage(buffer);
1031
1032 PageIndexMultiDelete(page, toDelete, xldata->ntuples);
1033
1034 /*
1035 * Mark the page as not containing any LP_DEAD items. See comments in
1036 * _hash_vacuum_one_page() for details.
1037 */
1038 pageopaque = HashPageGetOpaque(page);
1039 pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1040
1041 PageSetLSN(page, lsn);
1042 MarkBufferDirty(buffer);
1043 }
1044 if (BufferIsValid(buffer))
1045 UnlockReleaseBuffer(buffer);
1046
1047 if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
1048 {
1049 Page metapage;
1050 HashMetaPage metap;
1051
1052 metapage = BufferGetPage(metabuf);
1053 metap = HashPageGetMeta(metapage);
1054
1055 metap->hashm_ntuples -= xldata->ntuples;
1056
1057 PageSetLSN(metapage, lsn);
1058 MarkBufferDirty(metabuf);
1059 }
1060 if (BufferIsValid(metabuf))
1061 UnlockReleaseBuffer(metabuf);
1062}
1063
1064void
1066{
1067 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1068
1069 switch (info)
1070 {
1073 break;
1076 break;
1077 case XLOG_HASH_INSERT:
1078 hash_xlog_insert(record);
1079 break;
1082 break;
1085 break;
1087 hash_xlog_split_page(record);
1088 break;
1091 break;
1094 break;
1096 hash_xlog_squeeze_page(record);
1097 break;
1098 case XLOG_HASH_DELETE:
1099 hash_xlog_delete(record);
1100 break;
1103 break;
1106 break;
1109 break;
1110 default:
1111 elog(PANIC, "hash_redo: unknown op code %u", info);
1112 }
1113}
1114
1115/*
1116 * Mask a hash page before performing consistency checks on it.
1117 */
1118void
1119hash_mask(char *pagedata, BlockNumber blkno)
1120{
1121 Page page = (Page) pagedata;
1122 HashPageOpaque opaque;
1123 int pagetype;
1124
1126
1127 mask_page_hint_bits(page);
1128 mask_unused_space(page);
1129
1130 opaque = HashPageGetOpaque(page);
1131
1132 pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
1133 if (pagetype == LH_UNUSED_PAGE)
1134 {
1135 /*
1136 * Mask everything on a UNUSED page.
1137 */
1138 mask_page_content(page);
1139 }
1140 else if (pagetype == LH_BUCKET_PAGE ||
1141 pagetype == LH_OVERFLOW_PAGE)
1142 {
1143 /*
1144 * In hash bucket and overflow pages, it is possible to modify the
1145 * LP_FLAGS without emitting any WAL record. Hence, mask the line
1146 * pointer flags. See hashgettuple(), _hash_kill_items() for details.
1147 */
1148 mask_lp_flags(page);
1149 }
1150
1151 /*
1152 * It is possible that the hint bit LH_PAGE_HAS_DEAD_TUPLES may remain
1153 * unlogged. So, mask it. See _hash_kill_items() for details.
1154 */
1155 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
1156}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define CLRBIT(x, i)
Definition: blutils.c:28
#define SETBIT(x, i)
Definition: blutils.c:29
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void mask_lp_flags(Page page)
Definition: bufmask.c:95
void mask_page_content(Page page)
Definition: bufmask.c:119
void mask_page_lsn_and_checksum(Page page)
Definition: bufmask.c:31
void mask_unused_space(Page page)
Definition: bufmask.c:71
void mask_page_hint_bits(Page page)
Definition: bufmask.c:46
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2943
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:5346
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:433
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:422
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_NORMAL
Definition: bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:384
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1160
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
PageData * Page
Definition: bufpage.h:81
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define MAXALIGN(LEN)
Definition: c.h:815
uint8_t uint8
Definition: c.h:541
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:228
uint16_t uint16
Definition: c.h:542
uint32_t uint32
Definition: c.h:543
size_t Size
Definition: c.h:615
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define HashPageGetOpaque(page)
Definition: hash.h:88
#define LH_BUCKET_PAGE
Definition: hash.h:55
#define LH_UNUSED_PAGE
Definition: hash.h:53
#define HashPageGetBitmap(page)
Definition: hash.h:316
#define HASHO_PAGE_ID
Definition: hash.h:101
#define HashPageGetMeta(page)
Definition: hash.h:323
#define LH_PAGE_TYPE
Definition: hash.h:63
#define LH_OVERFLOW_PAGE
Definition: hash.h:54
#define InvalidBucket
Definition: hash.h:37
static void hash_xlog_split_cleanup(XLogReaderState *record)
Definition: hash_xlog.c:937
static void hash_xlog_add_ovfl_page(XLogReaderState *record)
Definition: hash_xlog.c:172
static void hash_xlog_split_page(XLogReaderState *record)
Definition: hash_xlog.c:426
static void hash_xlog_init_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:27
void hash_mask(char *pagedata, BlockNumber blkno)
Definition: hash_xlog.c:1119
static void hash_xlog_split_complete(XLogReaderState *record)
Definition: hash_xlog.c:440
static void hash_xlog_update_meta_page(XLogReaderState *record)
Definition: hash_xlog.c:962
static void hash_xlog_vacuum_one_page(XLogReaderState *record)
Definition: hash_xlog.c:989
static void hash_xlog_squeeze_page(XLogReaderState *record)
Definition: hash_xlog.c:625
static void hash_xlog_insert(XLogReaderState *record)
Definition: hash_xlog.c:125
static void hash_xlog_move_page_contents(XLogReaderState *record)
Definition: hash_xlog.c:499
static void hash_xlog_split_allocate_page(XLogReaderState *record)
Definition: hash_xlog.c:310
void hash_redo(XLogReaderState *record)
Definition: hash_xlog.c:1065
static void hash_xlog_delete(XLogReaderState *record)
Definition: hash_xlog.c:859
static void hash_xlog_init_bitmap_page(XLogReaderState *record)
Definition: hash_xlog.c:63
#define XLOG_HASH_INIT_BITMAP_PAGE
Definition: hash_xlog.h:28
#define XLOG_HASH_SQUEEZE_PAGE
Definition: hash_xlog.h:35
#define XLOG_HASH_SPLIT_CLEANUP
Definition: hash_xlog.h:37
#define XLOG_HASH_ADD_OVFL_PAGE
Definition: hash_xlog.h:30
#define XLOG_HASH_UPDATE_META_PAGE
Definition: hash_xlog.h:38
#define XLOG_HASH_INSERT
Definition: hash_xlog.h:29
#define XLOG_HASH_SPLIT_ALLOCATE_PAGE
Definition: hash_xlog.h:31
#define XLOG_HASH_SPLIT_PAGE
Definition: hash_xlog.h:32
#define XLOG_HASH_INIT_META_PAGE
Definition: hash_xlog.h:27
#define XLOG_HASH_DELETE
Definition: hash_xlog.h:36
#define XLOG_HASH_SPLIT_COMPLETE
Definition: hash_xlog.h:33
#define XLH_SPLIT_META_UPDATE_SPLITPOINT
Definition: hash_xlog.h:46
#define XLOG_HASH_MOVE_PAGE_CONTENTS
Definition: hash_xlog.h:34
#define XLOG_HASH_VACUUM_ONE_PAGE
Definition: hash_xlog.h:40
#define XLH_SPLIT_META_UPDATE_MASKS
Definition: hash_xlog.h:45
Assert(PointerIsAligned(start, uint64))
void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
Definition: hashovfl.c:777
void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, bool initpage)
Definition: hashpage.c:157
void _hash_pageinit(Page page, Size size)
Definition: hashpage.c:596
void _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid, uint16 ffactor, bool initpage)
Definition: hashpage.c:498
IndexTupleData * IndexTuple
Definition: itup.h:53
static Size IndexTupleSize(const IndexTupleData *itup)
Definition: itup.h:71
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void size_t len
const void * data
while(p+4<=pend)
static char * buf
Definition: pg_test_fsync.c:72
ForkNumber
Definition: relpath.h:56
@ INIT_FORKNUM
Definition: relpath.h:61
void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, bool isCatalogRel, RelFileLocator locator)
Definition: standby.c:468
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]
Definition: hash.h:264
uint32 hashm_lowmask
Definition: hash.h:256
uint32 hashm_maxbucket
Definition: hash.h:254
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]
Definition: hash.h:262
double hashm_ntuples
Definition: hash.h:248
uint32 hashm_firstfree
Definition: hash.h:259
uint32 hashm_ovflpoint
Definition: hash.h:257
uint32 hashm_highmask
Definition: hash.h:255
uint32 hashm_nmaps
Definition: hash.h:260
BlockNumber hasho_nextblkno
Definition: hash.h:80
uint16 hasho_flag
Definition: hash.h:82
BlockNumber hasho_prevblkno
Definition: hash.h:79
uint16 hasho_page_id
Definition: hash.h:83
Bucket hasho_bucket
Definition: hash.h:81
XLogRecPtr EndRecPtr
Definition: xlogreader.h:206
bool clear_dead_marking
Definition: hash_xlog.h:181
bool is_primary_bucket_page
Definition: hash_xlog.h:183
RegProcedure procid
Definition: hash_xlog.h:214
OffsetNumber offnum
Definition: hash_xlog.h:58
BlockNumber prevblkno
Definition: hash_xlog.h:156
bool is_prim_bucket_same_wrt
Definition: hash_xlog.h:159
bool is_prev_bucket_same_wrt
Definition: hash_xlog.h:162
BlockNumber nextblkno
Definition: hash_xlog.h:157
TransactionId snapshotConflictHorizon
Definition: hash_xlog.h:248
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: hash_xlog.h:254
uint64 XLogRecPtr
Definition: xlogdefs.h:21
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2045
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1991
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:409
#define XLogRecGetData(decoder)
Definition: xlogreader.h:414
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:419
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:303
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:315
XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
Definition: xlogutils.c:340
#define InHotStandby
Definition: xlogutils.h:60
XLogRedoAction
Definition: xlogutils.h:73
@ BLK_RESTORED
Definition: xlogutils.h:76
@ BLK_NEEDS_REDO
Definition: xlogutils.h:74
@ BLK_NOTFOUND
Definition: xlogutils.h:77