doc/src/sgml/mvcc.sgml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257

<!-- $PostgreSQL: pgsql/doc/src/sgml/mvcc.sgml,v 2.74 2010/04/03 07:22:55 petere Exp $ -->

 <chapter id="mvcc">
  <title>Concurrency Control</title>

  <indexterm>
   <primary>concurrency</primary>
  </indexterm>

  <para>
   This chapter describes the behavior of the
   <productname>PostgreSQL</productname> database system when two or
   more sessions try to access the same data at the same time.  The
   goals in that situation are to allow efficient access for all
   sessions while maintaining strict data integrity.  Every developer
   of database applications should be familiar with the topics covered
   in this chapter.
  </para>

  <sect1 id="mvcc-intro">
   <title>Introduction</title>

   <indexterm>
    <primary>MVCC</primary>
   </indexterm>

   <para>
    <productname>PostgreSQL</productname> provides a rich set of tools 
    for developers to manage concurrent access to data.  Internally,
    data consistency is maintained by using a multiversion 
    model (Multiversion Concurrency Control, <acronym>MVCC</acronym>). 
    This means that while querying a database each transaction sees
    a snapshot of data (a <firstterm>database version</firstterm>)
    as it was some
    time ago, regardless of the current state of the underlying data.
    This protects the transaction from viewing inconsistent data that
    could be caused by (other) concurrent transaction updates on the same
    data rows, providing <firstterm>transaction isolation</firstterm>
    for each database session.  <acronym>MVCC</acronym>, by eschewing
    explicit locking methodologies of traditional database systems,
    minimizes lock contention in order to allow for reasonable 
    performance in multiuser environments.
   </para>

   <para>
    The main advantage of using the <acronym>MVCC</acronym> model of
    concurrency control rather than locking is that in
    <acronym>MVCC</acronym> locks acquired for querying (reading) data
    do not conflict with locks acquired for writing data, and so
    reading never blocks writing and writing never blocks reading.
   </para>

   <para>
    Table- and row-level locking facilities are also available in
    <productname>PostgreSQL</productname> for applications that cannot
    adapt easily to <acronym>MVCC</acronym> behavior.  However, proper
    use of <acronym>MVCC</acronym> will generally provide better
    performance than locks.  In addition, application-defined advisory
    locks provide a mechanism for acquiring locks that are not tied
    to a single transaction.
   </para>
  </sect1>

  <sect1 id="transaction-iso">
   <title>Transaction Isolation</title>

   <indexterm>
    <primary>transaction isolation</primary>
   </indexterm>

   <para>
    The <acronym>SQL</acronym> standard defines four levels of
    transaction isolation in terms of three phenomena that must be
    prevented between concurrent transactions.  These undesirable
    phenomena are:

    <variablelist>
     <varlistentry>
      <term>
       dirty read
       <indexterm><primary>dirty read</primary></indexterm>
      </term>
     <listitem>
      <para>
        A transaction reads data written by a concurrent uncommitted transaction.
       </para>
      </listitem>
     </varlistentry>

     <varlistentry>
      <term>
       nonrepeatable read
       <indexterm><primary>nonrepeatable read</primary></indexterm>
      </term>
     <listitem>
      <para>
        A transaction re-reads data it has previously read and finds that data
        has been modified by another transaction (that committed since the
        initial read).
       </para>
      </listitem>
     </varlistentry>

     <varlistentry>
      <term>
       phantom read
       <indexterm><primary>phantom read</primary></indexterm>
      </term>
     <listitem>
      <para>
        A transaction re-executes a query returning a set of rows that satisfy a
        search condition and finds that the set of rows satisfying the condition
        has changed due to another recently-committed transaction.
       </para>
      </listitem>
     </varlistentry>
    </variablelist>
   </para>

   <para>
    <indexterm>
     <primary>transaction isolation level</primary>
    </indexterm>
    The four transaction isolation levels and the corresponding
    behaviors are described in <xref linkend="mvcc-isolevel-table">.
   </para>

    <table tocentry="1" id="mvcc-isolevel-table">
     <title><acronym>SQL</acronym> Transaction Isolation Levels</title>
     <tgroup cols="4">
      <thead>
       <row>
        <entry>
         Isolation Level
        </entry>
        <entry>
         Dirty Read
        </entry>
        <entry>
         Nonrepeatable Read
        </entry>
        <entry>
         Phantom Read
        </entry>
       </row>
      </thead>
      <tbody>
       <row>
        <entry>
         Read uncommitted
        </entry>
        <entry>
         Possible
        </entry>
        <entry>
         Possible
        </entry>
        <entry>
         Possible
        </entry>
       </row>

       <row>
        <entry>
         Read committed
        </entry>
        <entry>
         Not possible
        </entry>
        <entry>
         Possible
        </entry>
        <entry>
         Possible
        </entry>
       </row>

       <row>
        <entry>
         Repeatable read
        </entry>
        <entry>
         Not possible
        </entry>
        <entry>
         Not possible
        </entry>
        <entry>
         Possible
        </entry>
       </row>

       <row>
        <entry>
         Serializable
        </entry>
        <entry>
         Not possible
        </entry>
        <entry>
         Not possible
        </entry>
        <entry>
         Not possible
        </entry>
       </row>
      </tbody>
     </tgroup>
    </table>

   <para>
    In <productname>PostgreSQL</productname>, you can request any of the
    four standard transaction isolation levels.  But internally, there are
    only two distinct isolation levels, which correspond to the levels Read
    Committed and Serializable.  When you select the level Read
    Uncommitted you really get Read Committed, and when you select
    Repeatable Read you really get Serializable, so the actual
    isolation level might be stricter than what you select.  This is
    permitted by the SQL standard: the four isolation levels only
    define which phenomena must not happen, they do not define which
    phenomena must happen.  The reason that <productname>PostgreSQL</>
    only provides two isolation levels is that this is the only
    sensible way to map the standard isolation levels to the multiversion
    concurrency control architecture.  The behavior of the available
    isolation levels is detailed in the following subsections.
   </para>

   <para>
    To set the transaction isolation level of a transaction, use the
    command <xref linkend="sql-set-transaction">.
   </para>

  <sect2 id="xact-read-committed">
   <title>Read Committed Isolation Level</title>

   <indexterm>
    <primary>transaction isolation level</primary>
    <secondary>read committed</secondary>
   </indexterm>

   <para>
    <firstterm>Read Committed</firstterm> is the default isolation
    level in <productname>PostgreSQL</productname>.  When a transaction
    uses this isolation level, a <command>SELECT</command> query
    (without a <literal>FOR UPDATE/SHARE</> clause) sees only data
    committed before the query began; it never sees either uncommitted
    data or changes committed during query execution by concurrent
    transactions.  In effect, a <command>SELECT</command> query sees
    a snapshot of the database as of the instant the query begins to
    run.   However, <command>SELECT</command> does see the effects
    of previous updates executed within its own transaction, even
    though they are not yet committed.  Also note that two successive
    <command>SELECT</command> commands can see different data, even
    though they are within a single transaction, if other transactions
    commit changes during execution of the first <command>SELECT</command>.
   </para>

   <para>
    <command>UPDATE</command>, <command>DELETE</command>, <command>SELECT
    FOR UPDATE</command>, and <command>SELECT FOR SHARE</command> commands
    behave the same as <command>SELECT</command>
    in terms of searching for target rows: they will only find target rows
    that were committed as of the command start time.  However, such a target
    row might have already been updated (or deleted or locked) by
    another concurrent transaction by the time it is found.  In this case, the
    would-be updater will wait for the first updating transaction to commit or
    roll back (if it is still in progress).  If the first updater rolls back,
    then its effects are negated and the second updater can proceed with
    updating the originally found row.  If the first updater commits, the
    second updater will ignore the row if the first updater deleted it,
    otherwise it will attempt to apply its operation to the updated version of
    the row.  The search condition of the command (the <literal>WHERE</> clause) is
    re-evaluated to see if the updated version of the row still matches the
    search condition.  If so, the second updater proceeds with its operation
    using the updated version of the row.  In the case of
    <command>SELECT FOR UPDATE</command> and <command>SELECT FOR
    SHARE</command>, this means it is the updated version of the row that is
    locked and returned to the client.
   </para>

   <para>
    Because of the above rule, it is possible for an updating command to see an
    inconsistent snapshot: it can see the effects of concurrent updating
    commands on the same rows it is trying to update, but it
    does not see effects of those commands on other rows in the database.
    This behavior makes Read Committed mode unsuitable for commands that
    involve complex search conditions; however, it is just right for simpler
    cases.  For example, consider updating bank balances with transactions
    like:

<screen>
BEGIN;
UPDATE accounts SET balance = balance + 100.00 WHERE acctnum = 12345;
UPDATE accounts SET balance = balance - 100.00 WHERE acctnum = 7534;
COMMIT;
</screen>

    If two such transactions concurrently try to change the balance of account
    12345, we clearly want the second transaction to start with the updated
    version of the account's row.  Because each command is affecting only a
    predetermined row, letting it see the updated version of the row does
    not create any troublesome inconsistency.
   </para>

   <para>
    More complex usage can produce undesirable results in Read Committed
    mode.  For example, consider a <command>DELETE</command> command
    operating on data that is being both added and removed from its
    restriction criteria by another command, e.g., assume
    <literal>website</literal> is a two-row table with
    <literal>website.hits</literal> equaling <literal>9</literal> and
    <literal>10</literal>:

<screen>
BEGIN;
UPDATE website SET hits = hits + 1;
-- run from another session:  DELETE FROM website WHERE hits = 10;
COMMIT;
</screen>

    The <command>DELETE</command> will have no effect even though
    there is a <literal>website.hits = 10</literal> row before and
    after the <command>UPDATE</command>. This occurs because the
    pre-update row value <literal>9</> is skipped, and when the
    <command>UPDATE</command> completes and <command>DELETE</command>
    obtains a lock, the new row value is no longer <literal>10</> but
    <literal>11</>, which no longer matches the criteria.
   </para>

   <para>
    Because Read Committed mode starts each command with a new snapshot
    that includes all transactions committed up to that instant,
    subsequent commands in the same transaction will see the effects
    of the committed concurrent transaction in any case.  The point
    at issue above is whether or not a <emphasis>single</> command
    sees an absolutely consistent view of the database.
   </para>

   <para>
    The partial transaction isolation provided by Read Committed mode
    is adequate for many applications, and this mode is fast and simple
    to use;  however, it is not sufficient for all cases.  Applications
    that do complex queries and updates might require a more rigorously
    consistent view of the database than Read Committed mode provides.
   </para>
  </sect2>

  <sect2 id="xact-serializable">
   <title>Serializable Isolation Level</title>

   <indexterm>
    <primary>transaction isolation level</primary>
    <secondary>serializable</secondary>
   </indexterm>

   <para>
    The <firstterm>Serializable</firstterm> isolation level provides the strictest transaction
    isolation.  This level emulates serial transaction execution,
    as if transactions had been executed one after another, serially,
    rather than concurrently.  However, applications using this level must
    be prepared to retry transactions due to serialization failures.
   </para>

   <para>
    When a transaction is using the serializable level,
    a <command>SELECT</command> query only sees data committed before the
    transaction began; it never sees either uncommitted data or changes
    committed
    during transaction execution by concurrent transactions.  (However,
    the query does see the effects of previous updates
    executed within its own transaction, even though they are not yet
    committed.)  This is different from Read Committed in that
    a query in a serializable transaction
    sees a snapshot as of the start of the <emphasis>transaction</>,
    not as of the start
    of the current query within the transaction.  Thus, successive
    <command>SELECT</command> commands within a <emphasis>single</>
    transaction see the same data, i.e., they do not see changes made by
    other transactions that committed after their own transaction started.
    (This behavior can be ideal for reporting applications.)
   </para>

   <para>
    <command>UPDATE</command>, <command>DELETE</command>, <command>SELECT
    FOR UPDATE</command>, and <command>SELECT FOR SHARE</command> commands
    behave the same as <command>SELECT</command>
    in terms of searching for target rows: they will only find target rows
    that were committed as of the transaction start time.  However, such a
    target
    row might have already been updated (or deleted or locked) by
    another concurrent transaction by the time it is found.  In this case, the
    serializable transaction will wait for the first updating transaction to commit or
    roll back (if it is still in progress).  If the first updater rolls back,
    then its effects are negated and the serializable transaction can proceed
    with updating the originally found row.  But if the first updater commits
    (and actually updated or deleted the row, not just locked it)
    then the serializable transaction will be rolled back with the message

<screen>
ERROR:  could not serialize access due to concurrent update
</screen>

    because a serializable transaction cannot modify or lock rows changed by
    other transactions after the serializable transaction began.
   </para>

   <para>
    When an application receives this error message, it should abort
    the current transaction and retry the whole transaction from
    the beginning.  The second time through, the transaction will see the
    previously-committed change as part of its initial view of the database,
    so there is no logical conflict in using the new version of the row
    as the starting point for the new transaction's update.
   </para>

   <para>
    Note that only updating transactions might need to be retried; read-only
    transactions will never have serialization conflicts.
   </para>

   <para>
    The Serializable mode provides a rigorous guarantee that each
    transaction sees a wholly consistent view of the database.  However,
    the application has to be prepared to retry transactions when concurrent
    updates make it impossible to sustain the illusion of serial execution.
    Since the cost of redoing complex transactions can be significant,
    serializable mode is recommended only when updating transactions contain logic
    sufficiently complex that they might give wrong answers in Read
    Committed mode.  Most commonly, Serializable mode is necessary when
    a transaction executes several successive commands that must see
    identical views of the database.
   </para>

   <sect3 id="mvcc-serializability">
    <title>Serializable Isolation versus True Serializability</title>

   <indexterm>
    <primary>serializability</primary>
   </indexterm>

   <indexterm>
    <primary>predicate locking</primary>
   </indexterm>

   <para>
    The intuitive meaning (and mathematical definition) of
    <quote>serializable</> execution is that any two successfully committed
    concurrent transactions will appear to have executed strictly serially,
    one after the other &mdash; although which one appeared to occur first might
    not be predictable in advance.  It is important to realize that forbidding
    the undesirable behaviors listed in <xref linkend="mvcc-isolevel-table">
    is not sufficient to guarantee true serializability, and in fact
    <productname>PostgreSQL</productname>'s Serializable mode <emphasis>does
    not guarantee serializable execution in this sense</>.  As an example,
    consider a table <structname>mytab</>, initially containing:
<screen>
 class | value 
-------+-------
     1 |    10
     1 |    20
     2 |   100
     2 |   200
</screen>
    Suppose that serializable transaction A computes:
<screen>
SELECT SUM(value) FROM mytab WHERE class = 1;
</screen>
    and then inserts the result (30) as the <structfield>value</> in a
    new row with <structfield>class</><literal> = 2</>.  Concurrently, serializable
    transaction B computes:
<screen>
SELECT SUM(value) FROM mytab WHERE class = 2;
</screen>
    and obtains the result 300, which it inserts in a new row with
    <structfield>class</><literal> = 1</>.  Then both transactions commit.  None of
    the listed undesirable behaviors have occurred, yet we have a result
    that could not have occurred in either order serially.  If A had
    executed before B, B would have computed the sum 330, not 300, and
    similarly the other order would have resulted in a different sum
    computed by A.
   </para>

   <para>
    To guarantee true mathematical serializability, it is necessary for
    a database system to enforce <firstterm>predicate locking</>, which
    means that a transaction cannot insert or modify a row that would
    have matched the <literal>WHERE</> condition of a query in another concurrent
    transaction.  For example, once transaction A has executed the query
    <literal>SELECT ... WHERE class = 1</>, a predicate-locking system
    would forbid transaction B from inserting any new row with class 1
    until A has committed.
     <footnote>
      <para>
       Essentially, a predicate-locking system prevents phantom reads
       by restricting what is written, whereas MVCC prevents them by
       restricting what is read.
      </para>
     </footnote>
    Such a locking system is complex to
    implement and extremely expensive in execution, since every session must
    be aware of the details of every query executed by every concurrent
    transaction.  And this large expense is mostly wasted, since in
    practice most applications do not do the sorts of things that could
    result in problems.  (Certainly the example above is rather contrived
    and unlikely to represent real software.)  For these reasons,
    <productname>PostgreSQL</productname> does not implement predicate
    locking.
   </para>

   <para>
    In cases where the possibility of non-serializable execution
    is a real hazard, problems can be prevented by appropriate use of
    explicit locking.  Further discussion appears in the following
    sections.
   </para>
  </sect3>
  </sect2>
 </sect1>

  <sect1 id="explicit-locking">
   <title>Explicit Locking</title>

   <indexterm>
    <primary>lock</primary>
   </indexterm>

   <para>
    <productname>PostgreSQL</productname> provides various lock modes
    to control concurrent access to data in tables.  These modes can
    be used for application-controlled locking in situations where
    <acronym>MVCC</acronym> does not give the desired behavior.  Also,
    most <productname>PostgreSQL</productname> commands automatically
    acquire locks of appropriate modes to ensure that referenced
    tables are not dropped or modified in incompatible ways while the
    command executes.  (For example, <command>ALTER TABLE</> cannot safely be
    executed concurrently with other operations on the same table, so it
    obtains an exclusive lock on the table to enforce that.)
   </para>

   <para>
    To examine a list of the currently outstanding locks in a database
    server, use the
    <link linkend="view-pg-locks"><structname>pg_locks</structname></link>
    system view. For more information on monitoring the status of the lock
    manager subsystem, refer to <xref linkend="monitoring">.
   </para>

  <sect2 id="locking-tables">
   <title>Table-Level Locks</title>

   <indexterm zone="locking-tables">
    <primary>LOCK</primary>
   </indexterm>

   <para>
    The list below shows the available lock modes and the contexts in
    which they are used automatically by
    <productname>PostgreSQL</productname>.  You can also acquire any
    of these locks explicitly with the command <xref
    linkend="sql-lock">.
    Remember that all of these lock modes are table-level locks,
    even if the name contains the word
    <quote>row</quote>; the names of the lock modes are historical.
    To some extent the names reflect the typical usage of each lock
    mode &mdash; but the semantics are all the same.  The only real difference
    between one lock mode and another is the set of lock modes with
    which each conflicts (see <xref linkend="table-lock-compatibility">).
 .  Two transactions cannot hold locks of conflicting
    modes on the same table at the same time.  (However, a transaction
    never conflicts with itself.  For example, it might acquire
    <literal>ACCESS EXCLUSIVE</literal> lock and later acquire
    <literal>ACCESS SHARE</literal> lock on the same table.)  Non-conflicting
    lock modes can be held concurrently by many transactions.  Notice in
    particular that some lock modes are self-conflicting (for example,
    an <literal>ACCESS EXCLUSIVE</literal> lock cannot be held by more than one
    transaction at a time) while others are not self-conflicting (for example,
    an <literal>ACCESS SHARE</literal> lock can be held by multiple transactions).
   </para>

     <variablelist>
      <title>Table-level lock modes</title>
      <varlistentry>
       <term>
        <literal>ACCESS SHARE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>ACCESS EXCLUSIVE</literal> lock
         mode only.
        </para>

        <para>
         The <command>SELECT</command> command acquires a lock of this mode on
         referenced tables.  In general, any query that only <emphasis>reads</> a table
         and does not modify it will acquire this lock mode.
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>ROW SHARE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>EXCLUSIVE</literal> and
         <literal>ACCESS EXCLUSIVE</literal> lock modes.
        </para>

        <para>
         The <command>SELECT FOR UPDATE</command> and
         <command>SELECT FOR SHARE</command> commands acquire a
         lock of this mode on the target table(s) (in addition to
         <literal>ACCESS SHARE</literal> locks on any other tables
         that are referenced but not selected
         <option>FOR UPDATE/FOR SHARE</option>).
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>ROW EXCLUSIVE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>SHARE</literal>, <literal>SHARE ROW
         EXCLUSIVE</literal>, <literal>EXCLUSIVE</literal>, and
         <literal>ACCESS EXCLUSIVE</literal> lock modes.
        </para>

        <para>
         The commands <command>UPDATE</command>,
         <command>DELETE</command>, and <command>INSERT</command>
         acquire this lock mode on the target table (in addition to
         <literal>ACCESS SHARE</literal> locks on any other referenced
         tables).  In general, this lock mode will be acquired by any
         command that <emphasis>modifies data</> in a table.
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>SHARE UPDATE EXCLUSIVE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>SHARE UPDATE EXCLUSIVE</literal>,
         <literal>SHARE</literal>, <literal>SHARE ROW
         EXCLUSIVE</literal>, <literal>EXCLUSIVE</literal>, and
         <literal>ACCESS EXCLUSIVE</literal> lock modes.
         This mode protects a table against
         concurrent schema changes and <command>VACUUM</> runs.
        </para>

        <para>
         Acquired by <command>VACUUM</command> (without <option>FULL</option>),
         <command>ANALYZE</>, and <command>CREATE INDEX CONCURRENTLY</>.
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>SHARE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>ROW EXCLUSIVE</literal>,
         <literal>SHARE UPDATE EXCLUSIVE</literal>, <literal>SHARE ROW
         EXCLUSIVE</literal>, <literal>EXCLUSIVE</literal>, and
         <literal>ACCESS EXCLUSIVE</literal> lock modes.
         This mode protects a table against concurrent data changes.
        </para>

        <para>
         Acquired by <command>CREATE INDEX</command>
         (without <option>CONCURRENTLY</option>).
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>SHARE ROW EXCLUSIVE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>ROW EXCLUSIVE</literal>,
         <literal>SHARE UPDATE EXCLUSIVE</literal>,
         <literal>SHARE</literal>, <literal>SHARE ROW
         EXCLUSIVE</literal>, <literal>EXCLUSIVE</literal>, and
         <literal>ACCESS EXCLUSIVE</literal> lock modes.
        </para>

        <para>
         This lock mode is not automatically acquired by any
         <productname>PostgreSQL</productname> command.
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>EXCLUSIVE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with the <literal>ROW SHARE</literal>, <literal>ROW
         EXCLUSIVE</literal>, <literal>SHARE UPDATE
         EXCLUSIVE</literal>, <literal>SHARE</literal>, <literal>SHARE
         ROW EXCLUSIVE</literal>, <literal>EXCLUSIVE</literal>, and
         <literal>ACCESS EXCLUSIVE</literal> lock modes.
         This mode allows only concurrent <literal>ACCESS SHARE</literal> locks,
         i.e., only reads from the table can proceed in parallel with a
         transaction holding this lock mode.
        </para>

        <para>
         This lock mode is not automatically acquired on user tables by any
         <productname>PostgreSQL</productname> command.  However it is
         acquired on certain system catalogs in some operations.
        </para>
       </listitem>
      </varlistentry>

      <varlistentry>
       <term>
        <literal>ACCESS EXCLUSIVE</literal>
       </term>
       <listitem>
        <para>
         Conflicts with locks of all modes (<literal>ACCESS
         SHARE</literal>, <literal>ROW SHARE</literal>, <literal>ROW
         EXCLUSIVE</literal>, <literal>SHARE UPDATE
         EXCLUSIVE</literal>, <literal>SHARE</literal>, <literal>SHARE
         ROW EXCLUSIVE</literal>, <literal>EXCLUSIVE</literal>, and
         <literal>ACCESS EXCLUSIVE</literal>).
         This mode guarantees that the
         holder is the only transaction accessing the table in any way.
        </para>

        <para>
         Acquired by the <command>ALTER TABLE</command>, <command>DROP
         TABLE</command>, <command>TRUNCATE</command>, <command>REINDEX</command>,
         <command>CLUSTER</command>, and <command>VACUUM FULL</command>
         commands.  This is also the default lock mode for <command>LOCK
         TABLE</command> statements that do not specify a mode explicitly.
        </para>
       </listitem>
      </varlistentry>
     </variablelist>

     <tip>
      <para>
       Only an <literal>ACCESS EXCLUSIVE</literal> lock blocks a
       <command>SELECT</command> (without <option>FOR UPDATE/SHARE</option>)
       statement.
      </para>
     </tip>

   <para>
    Once acquired, a lock is normally held till end of transaction.  But if a
    lock is acquired after establishing a savepoint, the lock is released
    immediately if the savepoint is rolled back to.  This is consistent with
    the principle that <command>ROLLBACK</> cancels all effects of the
    commands since the savepoint.  The same holds for locks acquired within a
    <application>PL/pgSQL</> exception block: an error escape from the block
    releases locks acquired within it.
   </para>


    <table tocentry="1" id="table-lock-compatibility">
     <title> Conflicting lock modes</title>
     <tgroup cols="9">
      <colspec colnum="2" colname="lockst">
      <colspec colnum="9" colname="lockend">
      <spanspec namest="lockst" nameend="lockend" spanname="lockreq">
      <thead>
       <row>
        <entry morerows="1">Requested Lock Mode</entry>
        <entry spanname="lockreq">Current Lock Mode</entry>
       </row>
       <row>
        <entry>ACCESS SHARE</entry>
        <entry>ROW SHARE</entry>
        <entry>ROW EXCLUSIVE</entry>
        <entry>SHARE UPDATE EXCLUSIVE</entry>
        <entry>SHARE</entry>
        <entry>SHARE ROW EXCLUSIVE</entry>
        <entry>EXCLUSIVE</entry>
        <entry>ACCESS EXCLUSIVE</entry>
       </row>
      </thead>
      <tbody>
       <row>
        <entry>ACCESS SHARE</entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>ROW SHARE</entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>ROW EXCLUSIVE</entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>SHARE UPDATE EXCLUSIVE</entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>SHARE</entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>SHARE ROW EXCLUSIVE</entry>
        <entry align="center"></entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>EXCLUSIVE</entry>
        <entry align="center"></entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
       <row>
        <entry>ACCESS EXCLUSIVE</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
        <entry align="center">X</entry>
       </row>
      </tbody>
     </tgroup>
    </table>
   </sect2>

   <sect2 id="locking-rows">
    <title>Row-Level Locks</title>

    <para>
     In addition to table-level locks, there are row-level locks, which
     can be exclusive or shared locks.  An exclusive row-level lock on a
     specific row is automatically acquired when the row is updated or
     deleted.  The lock is held until the transaction commits or rolls
     back, just like table-level locks.  Row-level locks do
     not affect data querying; they block only <emphasis>writers to the same
     row</emphasis>.
    </para>

    <para>
     To acquire an exclusive row-level lock on a row without actually
     modifying the row, select the row with <command>SELECT FOR
     UPDATE</command>.  Note that once the row-level lock is acquired,
     the transaction can update the row multiple times without
     fear of conflicts.
    </para>

    <para>
     To acquire a shared row-level lock on a row, select the row with
     <command>SELECT FOR SHARE</command>.  A shared lock does not prevent
     other transactions from acquiring the same shared lock.  However,
     no transaction is allowed to update, delete, or exclusively lock a
     row on which any other transaction holds a shared lock.  Any attempt
     to do so will block until the shared lock(s) have been released.
    </para>

    <para>
     <productname>PostgreSQL</productname> doesn't remember any
     information about modified rows in memory, so there is no limit on
     the number of rows locked at one time.  However, locking a row
     might cause a disk write, e.g., <command>SELECT FOR
     UPDATE</command> modifies selected rows to mark them locked, and so
     will result in disk writes.
    </para>

    <para>
     In addition to table and row locks, page-level share/exclusive locks are
     used to control read/write access to table pages in the shared buffer
     pool.  These locks are released immediately after a row is fetched or
     updated.  Application developers normally need not be concerned with
     page-level locks, but they are mentioned here for completeness.
    </para>

   </sect2>

   <sect2 id="locking-deadlocks">
    <title>Deadlocks</title>

    <indexterm zone="locking-deadlocks">
     <primary>deadlock</primary>
    </indexterm>

    <para>
     The use of explicit locking can increase the likelihood of
     <firstterm>deadlocks</>, wherein two (or more) transactions each
     hold locks that the other wants.  For example, if transaction 1
     acquires an exclusive lock on table A and then tries to acquire
     an exclusive lock on table B, while transaction 2 has already
     exclusive-locked table B and now wants an exclusive lock on table
     A, then neither one can proceed.
     <productname>PostgreSQL</productname> automatically detects
     deadlock situations and resolves them by aborting one of the
     transactions involved, allowing the other(s) to complete.
     (Exactly which transaction will be aborted is difficult to
     predict and should not be relied upon.)
    </para>

    <para>
     Note that deadlocks can also occur as the result of row-level
     locks (and thus, they can occur even if explicit locking is not
     used). Consider the case in which two concurrent
     transactions modify a table. The first transaction executes:

<screen>
UPDATE accounts SET balance = balance + 100.00 WHERE acctnum = 11111;
</screen>

     This acquires a row-level lock on the row with the specified
     account number. Then, the second transaction executes:

<screen>
UPDATE accounts SET balance = balance + 100.00 WHERE acctnum = 22222;
UPDATE accounts SET balance = balance - 100.00 WHERE acctnum = 11111;
</screen>

     The first <command>UPDATE</command> statement successfully
     acquires a row-level lock on the specified row, so it succeeds in
     updating that row. However, the second <command>UPDATE</command>
     statement finds that the row it is attempting to update has
     already been locked, so it waits for the transaction that
     acquired the lock to complete. Transaction two is now waiting on
     transaction one to complete before it continues execution. Now,
     transaction one executes:

<screen>
UPDATE accounts SET balance = balance - 100.00 WHERE acctnum = 22222;
</screen>

     Transaction one attempts to acquire a row-level lock on the
     specified row, but it cannot: transaction two already holds such
     a lock. So it waits for transaction two to complete. Thus,
     transaction one is blocked on transaction two, and transaction
     two is blocked on transaction one: a deadlock
     condition. <productname>PostgreSQL</productname> will detect this
     situation and abort one of the transactions.
    </para>

    <para>
     The best defense against deadlocks is generally to avoid them by
     being certain that all applications using a database acquire
     locks on multiple objects in a consistent order. In the example
     above, if both transactions
     had updated the rows in the same order, no deadlock would have
     occurred. One should also ensure that the first lock acquired on
     an object in a transaction is the most restrictive mode that will be
     needed for that object.  If it is not feasible to verify this in
     advance, then deadlocks can be handled on-the-fly by retrying
     transactions that abort due to deadlocks.
    </para>

    <para>
     So long as no deadlock situation is detected, a transaction seeking
     either a table-level or row-level lock will wait indefinitely for
     conflicting locks to be released.  This means it is a bad idea for
     applications to hold transactions open for long periods of time
     (e.g., while waiting for user input).
    </para>
   </sect2>

   <sect2 id="advisory-locks">
    <title>Advisory Locks</title>

    <indexterm zone="advisory-locks">
     <primary>lock</primary>
     <secondary>advisory</secondary>
    </indexterm>

    <para>
     <productname>PostgreSQL</productname> provides a means for
     creating locks that have application-defined meanings.  These are
     called <firstterm>advisory locks</>, because the system does not
     enforce their use &mdash; it is up to the application to use them
     correctly.  Advisory locks can be useful for locking strategies
     that are an awkward fit for the MVCC model.  Once acquired, an
     advisory lock is held until explicitly released or the session ends.
     Unlike standard locks, advisory locks do not
     honor transaction semantics: a lock acquired during a
     transaction that is later rolled back will still be held following the
     rollback, and likewise an unlock is effective even if the calling
     transaction fails later.  The same lock can be acquired multiple times by
     its owning process: for each lock request there must be a corresponding
     unlock request before the lock is actually released.  (If a session
     already holds a given lock, additional requests will always succeed, even
     if other sessions are awaiting the lock.)  Like all locks in
     <productname>PostgreSQL</productname>, a complete list of advisory
     locks currently held by any session can be found in the
     <link linkend="view-pg-locks"><structname>pg_locks</structname></link>
     system view.
    </para>

    <para>
     Advisory locks are allocated out of a shared memory pool whose size
     is defined by the configuration variables
     <xref linkend="guc-max-locks-per-transaction"> and
     <xref linkend="guc-max-connections">.
     Care must be taken not to exhaust this
     memory or the server will be unable to grant any locks at all.
     This imposes an upper limit on the number of advisory locks
     grantable by the server, typically in the tens to hundreds of thousands
     depending on how the server is configured.
    </para>

    <para>
     A common use of advisory locks is to emulate pessimistic locking
     strategies typical of so called <quote>flat file</> data management
     systems.
     While a flag stored in a table could be used for the same purpose,
     advisory locks are faster, avoid MVCC bloat, and are automatically
     cleaned up by the server at the end of the session.
     In certain cases using this advisory locking method, especially in queries
     involving explicit ordering and <literal>LIMIT</> clauses, care must be
     taken to control the locks acquired because of the order in which SQL
     expressions are evaluated.  For example:
<screen>
SELECT pg_advisory_lock(id) FROM foo WHERE id = 12345; -- ok
SELECT pg_advisory_lock(id) FROM foo WHERE id &gt; 12345 LIMIT 100; -- danger!
SELECT pg_advisory_lock(q.id) FROM
(
  SELECT id FROM foo WHERE id &gt; 12345 LIMIT 100
) q; -- ok
</screen>
     In the above queries, the second form is dangerous because the
     <literal>LIMIT</> is not guaranteed to be applied before the locking
     function is executed.  This might cause some locks to be acquired
     that the application was not expecting, and hence would fail to release
     (until it ends the session).
     From the point of view of the application, such locks
     would be dangling, although still viewable in
     <structname>pg_locks</structname>.
    </para>

    <para>
     The functions provided to manipulate advisory locks are described in
     <xref linkend="functions-advisory-locks">.
    </para>
   </sect2>

  </sect1>

  <sect1 id="applevel-consistency">
   <title>Data Consistency Checks at the Application Level</title>

   <para>
    Because readers in <productname>PostgreSQL</productname>
    do not lock data, regardless of
    transaction isolation level, data read by one transaction can be
    overwritten by another concurrent transaction. In other words,
    if a row is returned by <command>SELECT</command> it doesn't mean that
    the row is still current at the instant it is returned (i.e., sometime
    after the current query began).  The row might have been modified or
    deleted by an already-committed transaction that committed after
    the <command>SELECT</command> started.
    Even if the row is still valid <quote>now</>, it could be changed or
    deleted
    before the current transaction does a commit or rollback.
   </para>

   <para>
    Another way to think about it is that each
    transaction sees a snapshot of the database contents, and concurrently
    executing transactions might very well see different snapshots.  So the
    whole concept of <quote>now</quote> is somewhat ill-defined anyway.
    This is not normally
    a big problem if the client applications are isolated from each other,
    but if the clients can communicate via channels outside the database
    then serious confusion might ensue.
   </para>

   <para>
    To ensure the current validity of a row and protect it against
    concurrent updates one must use <command>SELECT FOR UPDATE</command>,
    <command>SELECT FOR SHARE</command>, or an appropriate <command>LOCK
    TABLE</command> statement.  (<command>SELECT FOR UPDATE</command>
    and <command>SELECT FOR SHARE</command> lock just the
    returned rows against concurrent updates, while <command>LOCK
    TABLE</command> locks the whole table.)  This should be taken into
    account when porting applications to
    <productname>PostgreSQL</productname> from other environments.
   </para>

   <para>
    Global validity checks require extra thought under <acronym>MVCC</acronym>.
    For example, a banking application might wish to check that the sum of
    all credits in one table equals the sum of debits in another table,
    when both tables are being actively updated.  Comparing the results of two
    successive <literal>SELECT sum(...)</literal> commands will not work reliably in
    Read Committed mode, since the second query will likely include the results
    of transactions not counted by the first.  Doing the two sums in a
    single serializable transaction will give an accurate picture of only the
    effects of transactions that committed before the serializable transaction
    started &mdash; but one might legitimately wonder whether the answer is still
    relevant by the time it is delivered.  If the serializable transaction
    itself applied some changes before trying to make the consistency check,
    the usefulness of the check becomes even more debatable, since now it
    includes some but not all post-transaction-start changes.  In such cases
    a careful person might wish to lock all tables needed for the check,
    in order to get an indisputable picture of current reality.  A
    <literal>SHARE</> mode (or higher) lock guarantees that there are no
    uncommitted changes in the locked table, other than those of the current
    transaction.
   </para>

   <para>
    Note also that if one is relying on explicit locking to prevent concurrent
    changes, one should either use Read Committed mode, or in Serializable
    mode be careful to obtain
    locks before performing queries.  A lock obtained by a
    serializable transaction guarantees that no other transactions modifying
    the table are still running, but if the snapshot seen by the
    transaction predates obtaining the lock, it might predate some now-committed
    changes in the table.  A serializable transaction's snapshot is actually
    frozen at the start of its first query or data-modification command
    (<literal>SELECT</>, <literal>INSERT</>,
    <literal>UPDATE</>, or <literal>DELETE</>), so
    it is possible to obtain locks explicitly before the snapshot is
    frozen.
   </para>
  </sect1>

  <sect1 id="locking-indexes">
   <title>Locking and Indexes</title>

   <indexterm zone="locking-indexes">
    <primary>index</primary>
    <secondary>locks</secondary>
   </indexterm>

   <para>
    Though <productname>PostgreSQL</productname>
    provides nonblocking read/write access to table
    data, nonblocking read/write access is not currently offered for every
    index access method implemented
    in <productname>PostgreSQL</productname>.
    The various index types are handled as follows:

    <variablelist>
     <varlistentry>
      <term>
       B-tree and <acronym>GiST</acronym> indexes
      </term>
      <listitem>
       <para>
        Short-term share/exclusive page-level locks are used for
        read/write access. Locks are released immediately after each
        index row is fetched or inserted.  These index types provide
        the highest concurrency without deadlock conditions.
       </para>
      </listitem>
     </varlistentry>

     <varlistentry>
      <term>
       Hash indexes
      </term>
      <listitem>
       <para>
        Share/exclusive hash-bucket-level locks are used for read/write
        access.  Locks are released after the whole bucket is processed.
        Bucket-level locks provide better concurrency than index-level
        ones, but deadlock is possible since the locks are held longer
        than one index operation.
       </para>
      </listitem>
     </varlistentry>

     <varlistentry>
      <term>
       <acronym>GIN</acronym> indexes
      </term>
      <listitem>
       <para>
        Short-term share/exclusive page-level locks are used for
        read/write access. Locks are released immediately after each
        index row is fetched or inserted. But note that insertion of a
        GIN-indexed value usually produces several index key insertions
        per row, so GIN might do substantial work for a single value's
        insertion.
       </para>
      </listitem>
     </varlistentry>
    </variablelist>
   </para>

   <para>
    Currently, B-tree indexes offer the best performance for concurrent
    applications; since they also have more features than hash
    indexes, they are the recommended index type for concurrent
    applications that need to index scalar data. When dealing with
    non-scalar data, B-trees are not useful, and GiST or GIN indexes should
    be used instead.
   </para>
  </sect1>
 </chapter>