summaryrefslogtreecommitdiffstats
path: root/src/backend/access/gin/ginbtree.c
blob: 8520e37ebdcfb0d35cc921342a34bed6718283ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
/*-------------------------------------------------------------------------
 *
 * ginbtree.c
 *	  page utilities routines for the postgres inverted index access method.
 *
 *
 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *			src/backend/access/gin/ginbtree.c
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/predicate.h"
#include "utils/memutils.h"
#include "utils/rel.h"

static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
						   void *insertdata, BlockNumber updateblkno,
						   Buffer childbuf, GinStatsData *buildStats);
static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
						   bool freestack, GinStatsData *buildStats);
static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
							  GinStatsData *buildStats, int access);

/*
 * Lock buffer by needed method for search.
 */
int
ginTraverseLock(Buffer buffer, bool searchMode)
{
	Page		page;
	int			access = GIN_SHARE;

	LockBuffer(buffer, GIN_SHARE);
	page = BufferGetPage(buffer);
	if (GinPageIsLeaf(page))
	{
		if (searchMode == false)
		{
			/* we should relock our page */
			LockBuffer(buffer, GIN_UNLOCK);
			LockBuffer(buffer, GIN_EXCLUSIVE);

			/* But root can become non-leaf during relock */
			if (!GinPageIsLeaf(page))
			{
				/* restore old lock type (very rare) */
				LockBuffer(buffer, GIN_UNLOCK);
				LockBuffer(buffer, GIN_SHARE);
			}
			else
				access = GIN_EXCLUSIVE;
		}
	}

	return access;
}

/*
 * Descend the tree to the leaf page that contains or would contain the key
 * we're searching for. The key should already be filled in 'btree', in
 * tree-type specific manner. If btree->fullScan is true, descends to the
 * leftmost leaf page.
 *
 * If 'searchmode' is false, on return stack->buffer is exclusively locked,
 * and the stack represents the full path to the root. Otherwise stack->buffer
 * is share-locked, and stack->parent is NULL.
 *
 * If 'rootConflictCheck' is true, tree root is checked for serialization
 * conflict.
 */
GinBtreeStack *
ginFindLeafPage(GinBtree btree, bool searchMode,
				bool rootConflictCheck, Snapshot snapshot)
{
	GinBtreeStack *stack;

	stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
	stack->blkno = btree->rootBlkno;
	stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
	stack->parent = NULL;
	stack->predictNumber = 1;

	if (rootConflictCheck)
		CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);

	for (;;)
	{
		Page		page;
		BlockNumber child;
		int			access;

		stack->off = InvalidOffsetNumber;

		page = BufferGetPage(stack->buffer);
		TestForOldSnapshot(snapshot, btree->index, page);

		access = ginTraverseLock(stack->buffer, searchMode);

		/*
		 * If we're going to modify the tree, finish any incomplete splits we
		 * encounter on the way.
		 */
		if (!searchMode && GinPageIsIncompleteSplit(page))
			ginFinishOldSplit(btree, stack, NULL, access);

		/*
		 * ok, page is correctly locked, we should check to move right ..,
		 * root never has a right link, so small optimization
		 */
		while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
			   btree->isMoveRight(btree, page))
		{
			BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;

			if (rightlink == InvalidBlockNumber)
				/* rightmost page */
				break;

			stack->buffer = ginStepRight(stack->buffer, btree->index, access);
			stack->blkno = rightlink;
			page = BufferGetPage(stack->buffer);
			TestForOldSnapshot(snapshot, btree->index, page);

			if (!searchMode && GinPageIsIncompleteSplit(page))
				ginFinishOldSplit(btree, stack, NULL, access);
		}

		if (GinPageIsLeaf(page))	/* we found, return locked page */
			return stack;

		/* now we have correct buffer, try to find child */
		child = btree->findChildPage(btree, stack);

		LockBuffer(stack->buffer, GIN_UNLOCK);
		Assert(child != InvalidBlockNumber);
		Assert(stack->blkno != child);

		if (searchMode)
		{
			/* in search mode we may forget path to leaf */
			stack->blkno = child;
			stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
		}
		else
		{
			GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));

			ptr->parent = stack;
			stack = ptr;
			stack->blkno = child;
			stack->buffer = ReadBuffer(btree->index, stack->blkno);
			stack->predictNumber = 1;
		}
	}
}

/*
 * Step right from current page.
 *
 * The next page is locked first, before releasing the current page. This is
 * crucial to prevent concurrent VACUUM from deleting a page that we are about
 * to step to. (The lock-coupling isn't strictly necessary when we are
 * traversing the tree to find an insert location, because page deletion grabs
 * a cleanup lock on the root to prevent any concurrent inserts. See Page
 * deletion section in the README. But there's no harm in doing it always.)
 */
Buffer
ginStepRight(Buffer buffer, Relation index, int lockmode)
{
	Buffer		nextbuffer;
	Page		page = BufferGetPage(buffer);
	bool		isLeaf = GinPageIsLeaf(page);
	bool		isData = GinPageIsData(page);
	BlockNumber blkno = GinPageGetOpaque(page)->rightlink;

	nextbuffer = ReadBuffer(index, blkno);
	LockBuffer(nextbuffer, lockmode);
	UnlockReleaseBuffer(buffer);

	/* Sanity check that the page we stepped to is of similar kind. */
	page = BufferGetPage(nextbuffer);
	if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
		elog(ERROR, "right sibling of GIN page is of different type");

	return nextbuffer;
}

void
freeGinBtreeStack(GinBtreeStack *stack)
{
	while (stack)
	{
		GinBtreeStack *tmp = stack->parent;

		if (stack->buffer != InvalidBuffer)
			ReleaseBuffer(stack->buffer);

		pfree(stack);
		stack = tmp;
	}
}

/*
 * Try to find parent for current stack position. Returns correct parent and
 * child's offset in stack->parent. The root page is never released, to
 * prevent conflict with vacuum process.
 */
static void
ginFindParents(GinBtree btree, GinBtreeStack *stack)
{
	Page		page;
	Buffer		buffer;
	BlockNumber blkno,
				leftmostBlkno;
	OffsetNumber offset;
	GinBtreeStack *root;
	GinBtreeStack *ptr;

	/*
	 * Unwind the stack all the way up to the root, leaving only the root
	 * item.
	 *
	 * Be careful not to release the pin on the root page! The pin on root
	 * page is required to lock out concurrent vacuums on the tree.
	 */
	root = stack->parent;
	while (root->parent)
	{
		ReleaseBuffer(root->buffer);
		root = root->parent;
	}

	Assert(root->blkno == btree->rootBlkno);
	Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
	root->off = InvalidOffsetNumber;

	blkno = root->blkno;
	buffer = root->buffer;

	ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));

	for (;;)
	{
		LockBuffer(buffer, GIN_EXCLUSIVE);
		page = BufferGetPage(buffer);
		if (GinPageIsLeaf(page))
			elog(ERROR, "Lost path");

		if (GinPageIsIncompleteSplit(page))
		{
			Assert(blkno != btree->rootBlkno);
			ptr->blkno = blkno;
			ptr->buffer = buffer;

			/*
			 * parent may be wrong, but if so, the ginFinishSplit call will
			 * recurse to call ginFindParents again to fix it.
			 */
			ptr->parent = root;
			ptr->off = InvalidOffsetNumber;

			ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
		}

		leftmostBlkno = btree->getLeftMostChild(btree, page);

		while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
		{
			blkno = GinPageGetOpaque(page)->rightlink;
			if (blkno == InvalidBlockNumber)
			{
				/* Link not present in this level */
				LockBuffer(buffer, GIN_UNLOCK);
				/* Do not release pin on the root buffer */
				if (buffer != root->buffer)
					ReleaseBuffer(buffer);
				break;
			}
			buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
			page = BufferGetPage(buffer);

			/* finish any incomplete splits, as above */
			if (GinPageIsIncompleteSplit(page))
			{
				Assert(blkno != btree->rootBlkno);
				ptr->blkno = blkno;
				ptr->buffer = buffer;
				ptr->parent = root;
				ptr->off = InvalidOffsetNumber;

				ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
			}
		}

		if (blkno != InvalidBlockNumber)
		{
			ptr->blkno = blkno;
			ptr->buffer = buffer;
			ptr->parent = root; /* it may be wrong, but in next call we will
								 * correct */
			ptr->off = offset;
			stack->parent = ptr;
			return;
		}

		/* Descend down to next level */
		blkno = leftmostBlkno;
		buffer = ReadBuffer(btree->index, blkno);
	}
}

/*
 * Insert a new item to a page.
 *
 * Returns true if the insertion was finished. On false, the page was split and
 * the parent needs to be updated. (A root split returns true as it doesn't
 * need any further action by the caller to complete.)
 *
 * When inserting a downlink to an internal page, 'childbuf' contains the
 * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
 * atomically with the insert. Also, the existing item at offset stack->off
 * in the target page is updated to point to updateblkno.
 *
 * stack->buffer is locked on entry, and is kept locked.
 * Likewise for childbuf, if given.
 */
static bool
ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
			   void *insertdata, BlockNumber updateblkno,
			   Buffer childbuf, GinStatsData *buildStats)
{
	Page		page = BufferGetPage(stack->buffer);
	bool		result;
	GinPlaceToPageRC rc;
	uint16		xlflags = 0;
	Page		childpage = NULL;
	Page		newlpage = NULL,
				newrpage = NULL;
	void	   *ptp_workspace = NULL;
	MemoryContext tmpCxt;
	MemoryContext oldCxt;

	/*
	 * We do all the work of this function and its subfunctions in a temporary
	 * memory context.  This avoids leakages and simplifies APIs, since some
	 * subfunctions allocate storage that has to survive until we've finished
	 * the WAL insertion.
	 */
	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
								   "ginPlaceToPage temporary context",
								   ALLOCSET_DEFAULT_SIZES);
	oldCxt = MemoryContextSwitchTo(tmpCxt);

	if (GinPageIsData(page))
		xlflags |= GIN_INSERT_ISDATA;
	if (GinPageIsLeaf(page))
	{
		xlflags |= GIN_INSERT_ISLEAF;
		Assert(!BufferIsValid(childbuf));
		Assert(updateblkno == InvalidBlockNumber);
	}
	else
	{
		Assert(BufferIsValid(childbuf));
		Assert(updateblkno != InvalidBlockNumber);
		childpage = BufferGetPage(childbuf);
	}

	/*
	 * See if the incoming tuple will fit on the page.  beginPlaceToPage will
	 * decide if the page needs to be split, and will compute the split
	 * contents if so.  See comments for beginPlaceToPage and execPlaceToPage
	 * functions for more details of the API here.
	 */
	rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
								 insertdata, updateblkno,
								 &ptp_workspace,
								 &newlpage, &newrpage);

	if (rc == GPTP_NO_WORK)
	{
		/* Nothing to do */
		result = true;
	}
	else if (rc == GPTP_INSERT)
	{
		/* It will fit, perform the insertion */
		START_CRIT_SECTION();

		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
		{
			XLogBeginInsert();
			XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
			if (BufferIsValid(childbuf))
				XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
		}

		/* Perform the page update, and register any extra WAL data */
		btree->execPlaceToPage(btree, stack->buffer, stack,
							   insertdata, updateblkno, ptp_workspace);

		MarkBufferDirty(stack->buffer);

		/* An insert to an internal page finishes the split of the child. */
		if (BufferIsValid(childbuf))
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
		{
			XLogRecPtr	recptr;
			ginxlogInsert xlrec;
			BlockIdData childblknos[2];

			xlrec.flags = xlflags;

			XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));

			/*
			 * Log information about child if this was an insertion of a
			 * downlink.
			 */
			if (BufferIsValid(childbuf))
			{
				BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
				BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
				XLogRegisterData((char *) childblknos,
								 sizeof(BlockIdData) * 2);
			}

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
			PageSetLSN(page, recptr);
			if (BufferIsValid(childbuf))
				PageSetLSN(childpage, recptr);
		}

		END_CRIT_SECTION();

		/* Insertion is complete. */
		result = true;
	}
	else if (rc == GPTP_SPLIT)
	{
		/*
		 * Didn't fit, need to split.  The split has been computed in newlpage
		 * and newrpage, which are pointers to palloc'd pages, not associated
		 * with buffers.  stack->buffer is not touched yet.
		 */
		Buffer		rbuffer;
		BlockNumber savedRightLink;
		ginxlogSplit data;
		Buffer		lbuffer = InvalidBuffer;
		Page		newrootpg = NULL;

		/* Get a new index page to become the right page */
		rbuffer = GinNewBuffer(btree->index);

		/* During index build, count the new page */
		if (buildStats)
		{
			if (btree->isData)
				buildStats->nDataPages++;
			else
				buildStats->nEntryPages++;
		}

		savedRightLink = GinPageGetOpaque(page)->rightlink;

		/* Begin setting up WAL record */
		data.node = btree->index->rd_node;
		data.flags = xlflags;
		if (BufferIsValid(childbuf))
		{
			data.leftChildBlkno = BufferGetBlockNumber(childbuf);
			data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
		}
		else
			data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;

		if (stack->parent == NULL)
		{
			/*
			 * splitting the root, so we need to allocate new left page and
			 * place pointers to left and right page on root page.
			 */
			lbuffer = GinNewBuffer(btree->index);

			/* During index build, count the new left page */
			if (buildStats)
			{
				if (btree->isData)
					buildStats->nDataPages++;
				else
					buildStats->nEntryPages++;
			}

			data.rrlink = InvalidBlockNumber;
			data.flags |= GIN_SPLIT_ROOT;

			GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);

			/*
			 * Construct a new root page containing downlinks to the new left
			 * and right pages.  (Do this in a temporary copy rather than
			 * overwriting the original page directly, since we're not in the
			 * critical section yet.)
			 */
			newrootpg = PageGetTempPage(newrpage);
			GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);

			btree->fillRoot(btree, newrootpg,
							BufferGetBlockNumber(lbuffer), newlpage,
							BufferGetBlockNumber(rbuffer), newrpage);

			if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
			{

				PredicateLockPageSplit(btree->index,
									   BufferGetBlockNumber(stack->buffer),
									   BufferGetBlockNumber(lbuffer));

				PredicateLockPageSplit(btree->index,
									   BufferGetBlockNumber(stack->buffer),
									   BufferGetBlockNumber(rbuffer));
			}
		}
		else
		{
			/* splitting a non-root page */
			data.rrlink = savedRightLink;

			GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
			GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
			GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);

			if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
			{

				PredicateLockPageSplit(btree->index,
									   BufferGetBlockNumber(stack->buffer),
									   BufferGetBlockNumber(rbuffer));
			}
		}

		/*
		 * OK, we have the new contents of the left page in a temporary copy
		 * now (newlpage), and likewise for the new contents of the
		 * newly-allocated right block. The original page is still unchanged.
		 *
		 * If this is a root split, we also have a temporary page containing
		 * the new contents of the root.
		 */

		START_CRIT_SECTION();

		MarkBufferDirty(rbuffer);
		MarkBufferDirty(stack->buffer);

		/*
		 * Restore the temporary copies over the real buffers.
		 */
		if (stack->parent == NULL)
		{
			/* Splitting the root, three pages to update */
			MarkBufferDirty(lbuffer);
			memcpy(page, newrootpg, BLCKSZ);
			memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
		}
		else
		{
			/* Normal split, only two pages to update */
			memcpy(page, newlpage, BLCKSZ);
			memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
		}

		/* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
		if (BufferIsValid(childbuf))
		{
			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
			MarkBufferDirty(childbuf);
		}

		/* write WAL record */
		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
		{
			XLogRecPtr	recptr;

			XLogBeginInsert();

			/*
			 * We just take full page images of all the split pages. Splits
			 * are uncommon enough that it's not worth complicating the code
			 * to be more efficient.
			 */
			if (stack->parent == NULL)
			{
				XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
				XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
				XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
			}
			else
			{
				XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
				XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
			}
			if (BufferIsValid(childbuf))
				XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);

			XLogRegisterData((char *) &data, sizeof(ginxlogSplit));

			recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);

			PageSetLSN(page, recptr);
			PageSetLSN(BufferGetPage(rbuffer), recptr);
			if (stack->parent == NULL)
				PageSetLSN(BufferGetPage(lbuffer), recptr);
			if (BufferIsValid(childbuf))
				PageSetLSN(childpage, recptr);
		}
		END_CRIT_SECTION();

		/*
		 * We can release the locks/pins on the new pages now, but keep
		 * stack->buffer locked.  childbuf doesn't get unlocked either.
		 */
		UnlockReleaseBuffer(rbuffer);
		if (stack->parent == NULL)
			UnlockReleaseBuffer(lbuffer);

		/*
		 * If we split the root, we're done. Otherwise the split is not
		 * complete until the downlink for the new page has been inserted to
		 * the parent.
		 */
		result = (stack->parent == NULL);
	}
	else
	{
		elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
		result = false;			/* keep compiler quiet */
	}

	/* Clean up temp context */
	MemoryContextSwitchTo(oldCxt);
	MemoryContextDelete(tmpCxt);

	return result;
}

/*
 * Finish a split by inserting the downlink for the new page to parent.
 *
 * On entry, stack->buffer is exclusively locked.
 *
 * If freestack is true, all the buffers are released and unlocked as we
 * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
 * locked, and stack is unmodified, except for possibly moving right to find
 * the correct parent of page.
 */
static void
ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
			   GinStatsData *buildStats)
{
	Page		page;
	bool		done;
	bool		first = true;

	/* this loop crawls up the stack until the insertion is complete */
	do
	{
		GinBtreeStack *parent = stack->parent;
		void	   *insertdata;
		BlockNumber updateblkno;

		/* search parent to lock */
		LockBuffer(parent->buffer, GIN_EXCLUSIVE);

		/*
		 * If the parent page was incompletely split, finish that split first,
		 * then continue with the current one.
		 *
		 * Note: we have to finish *all* incomplete splits we encounter, even
		 * if we have to move right. Otherwise we might choose as the target a
		 * page that has no downlink in the parent, and splitting it further
		 * would fail.
		 */
		if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
			ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);

		/* move right if it's needed */
		page = BufferGetPage(parent->buffer);
		while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
		{
			if (GinPageRightMost(page))
			{
				/*
				 * rightmost page, but we don't find parent, we should use
				 * plain search...
				 */
				LockBuffer(parent->buffer, GIN_UNLOCK);
				ginFindParents(btree, stack);
				parent = stack->parent;
				Assert(parent != NULL);
				break;
			}

			parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
			parent->blkno = BufferGetBlockNumber(parent->buffer);
			page = BufferGetPage(parent->buffer);

			if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
				ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
		}

		/* insert the downlink */
		insertdata = btree->prepareDownlink(btree, stack->buffer);
		updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
		done = ginPlaceToPage(btree, parent,
							  insertdata, updateblkno,
							  stack->buffer, buildStats);
		pfree(insertdata);

		/*
		 * If the caller requested to free the stack, unlock and release the
		 * child buffer now. Otherwise keep it pinned and locked, but if we
		 * have to recurse up the tree, we can unlock the upper pages, only
		 * keeping the page at the bottom of the stack locked.
		 */
		if (!first || freestack)
			LockBuffer(stack->buffer, GIN_UNLOCK);
		if (freestack)
		{
			ReleaseBuffer(stack->buffer);
			pfree(stack);
		}
		stack = parent;

		first = false;
	} while (!done);

	/* unlock the parent */
	LockBuffer(stack->buffer, GIN_UNLOCK);

	if (freestack)
		freeGinBtreeStack(stack);
}

/*
 * An entry point to ginFinishSplit() that is used when we stumble upon an
 * existing incompletely split page in the tree, as opposed to completing a
 * split that we just made outselves. The difference is that stack->buffer may
 * be merely share-locked on entry, and will be upgraded to exclusive mode.
 *
 * Note: Upgrading the lock momentarily releases it. Doing that in a scan
 * would not be OK, because a concurrent VACUUM might delete the page while
 * we're not holding the lock. It's OK in an insert, though, because VACUUM
 * has a different mechanism that prevents it from running concurrently with
 * inserts. (Namely, it holds a cleanup lock on the root.)
 */
static void
ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
{
	elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
		 stack->blkno, RelationGetRelationName(btree->index));

	if (access == GIN_SHARE)
	{
		LockBuffer(stack->buffer, GIN_UNLOCK);
		LockBuffer(stack->buffer, GIN_EXCLUSIVE);

		if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
		{
			/*
			 * Someone else already completed the split while we were not
			 * holding the lock.
			 */
			return;
		}
	}

	ginFinishSplit(btree, stack, false, buildStats);
}

/*
 * Insert a value to tree described by stack.
 *
 * The value to be inserted is given in 'insertdata'. Its format depends
 * on whether this is an entry or data tree, ginInsertValue just passes it
 * through to the tree-specific callback function.
 *
 * During an index build, buildStats is non-null and the counters it contains
 * are incremented as needed.
 *
 * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
 */
void
ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
			   GinStatsData *buildStats)
{
	bool		done;

	/* If the leaf page was incompletely split, finish the split first */
	if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
		ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);

	done = ginPlaceToPage(btree, stack,
						  insertdata, InvalidBlockNumber,
						  InvalidBuffer, buildStats);
	if (done)
	{
		LockBuffer(stack->buffer, GIN_UNLOCK);
		freeGinBtreeStack(stack);
	}
	else
		ginFinishSplit(btree, stack, true, buildStats);
}