Skip to content

Commit fe9ab5c

Browse files
authored
Force truncation checkpoint if WAL becomes runaway (#473)
1 parent d02ba97 commit fe9ab5c

File tree

1 file changed

+53
-33
lines changed

1 file changed

+53
-33
lines changed

db.go

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ const (
3131
DefaultCheckpointInterval = 1 * time.Minute
3232
DefaultMinCheckpointPageN = 1000
3333
DefaultMaxCheckpointPageN = 10000
34+
DefaultTruncatePageN = 500000
3435
)
3536

3637
// MaxIndex is the maximum possible WAL index.
@@ -85,6 +86,16 @@ type DB struct {
8586
// unbounded if there are always read transactions occurring.
8687
MaxCheckpointPageN int
8788

89+
// Threshold of WAL size, in pages, before a forced truncation checkpoint.
90+
// A forced truncation checkpoint will block new transactions and wait for
91+
// existing transactions to finish before issuing a checkpoint and
92+
// truncating the WAL.
93+
//
94+
// If zero, no truncates are forced. This can cause the WAL to grow
95+
// unbounded if there's a sudden spike of changes between other
96+
// checkpoints.
97+
TruncatePageN int
98+
8899
// Time between automatic checkpoints in the WAL. This is done to allow
89100
// more fine-grained WAL files so that restores can be performed with
90101
// better precision.
@@ -112,6 +123,7 @@ func NewDB(path string) *DB {
112123

113124
MinCheckpointPageN: DefaultMinCheckpointPageN,
114125
MaxCheckpointPageN: DefaultMaxCheckpointPageN,
126+
TruncatePageN: DefaultTruncatePageN,
115127
CheckpointInterval: DefaultCheckpointInterval,
116128
MonitorInterval: DefaultMonitorInterval,
117129
Logger: slog.With("db", path),
@@ -750,7 +762,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
750762
}
751763

752764
// Synchronize real WAL with current shadow WAL.
753-
newWALSize, err := db.syncWAL(info)
765+
origWALSize, newWALSize, err := db.syncWAL(info)
754766
if err != nil {
755767
return fmt.Errorf("sync wal: %w", err)
756768
}
@@ -759,7 +771,9 @@ func (db *DB) Sync(ctx context.Context) (err error) {
759771
// If WAL size is greater than min threshold, attempt checkpoint.
760772
var checkpoint bool
761773
checkpointMode := CheckpointModePassive
762-
if db.MaxCheckpointPageN > 0 && newWALSize >= calcWALSize(db.pageSize, db.MaxCheckpointPageN) {
774+
if db.TruncatePageN > 0 && origWALSize >= calcWALSize(db.pageSize, db.TruncatePageN) {
775+
checkpoint, checkpointMode = true, CheckpointModeTruncate
776+
} else if db.MaxCheckpointPageN > 0 && newWALSize >= calcWALSize(db.pageSize, db.MaxCheckpointPageN) {
763777
checkpoint, checkpointMode = true, CheckpointModeRestart
764778
} else if newWALSize >= calcWALSize(db.pageSize, db.MinCheckpointPageN) {
765779
checkpoint = true
@@ -918,29 +932,29 @@ type syncInfo struct {
918932
}
919933

920934
// syncWAL copies pending bytes from the real WAL to the shadow WAL.
921-
func (db *DB) syncWAL(info syncInfo) (newSize int64, err error) {
935+
func (db *DB) syncWAL(info syncInfo) (origSize int64, newSize int64, err error) {
922936
// Copy WAL starting from end of shadow WAL. Exit if no new shadow WAL needed.
923-
newSize, err = db.copyToShadowWAL(info.shadowWALPath)
937+
origSize, newSize, err = db.copyToShadowWAL(info.shadowWALPath)
924938
if err != nil {
925-
return newSize, fmt.Errorf("cannot copy to shadow wal: %w", err)
939+
return origSize, newSize, fmt.Errorf("cannot copy to shadow wal: %w", err)
926940
} else if !info.restart {
927-
return newSize, nil // If no restart required, exit.
941+
return origSize, newSize, nil // If no restart required, exit.
928942
}
929943

930944
// Parse index of current shadow WAL file.
931945
dir, base := filepath.Split(info.shadowWALPath)
932946
index, err := ParseWALPath(base)
933947
if err != nil {
934-
return 0, fmt.Errorf("cannot parse shadow wal filename: %s", base)
948+
return 0, 0, fmt.Errorf("cannot parse shadow wal filename: %s", base)
935949
}
936950

937951
// Start a new shadow WAL file with next index.
938952
newShadowWALPath := filepath.Join(dir, FormatWALPath(index+1))
939953
newSize, err = db.initShadowWALFile(newShadowWALPath)
940954
if err != nil {
941-
return 0, fmt.Errorf("cannot init shadow wal file: name=%s err=%w", newShadowWALPath, err)
955+
return 0, 0, fmt.Errorf("cannot init shadow wal file: name=%s err=%w", newShadowWALPath, err)
942956
}
943-
return newSize, nil
957+
return origSize, newSize, nil
944958
}
945959

946960
func (db *DB) initShadowWALFile(filename string) (int64, error) {
@@ -976,52 +990,58 @@ func (db *DB) initShadowWALFile(filename string) (int64, error) {
976990
_ = os.Chown(filename, uid, gid)
977991

978992
// Copy as much shadow WAL as available.
979-
newSize, err := db.copyToShadowWAL(filename)
993+
_, newSize, err := db.copyToShadowWAL(filename)
980994
if err != nil {
981995
return 0, fmt.Errorf("cannot copy to new shadow wal: %w", err)
982996
}
983997
return newSize, nil
984998
}
985999

986-
func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
1000+
func (db *DB) copyToShadowWAL(filename string) (origWalSize int64, newSize int64, err error) {
9871001
logger := db.Logger.With("filename", filename)
9881002
logger.Debug("copy-shadow")
9891003

9901004
r, err := os.Open(db.WALPath())
9911005
if err != nil {
992-
return 0, err
1006+
return 0, 0, err
9931007
}
9941008
defer r.Close()
9951009

1010+
fi, err := r.Stat()
1011+
if err != nil {
1012+
return 0, 0, err
1013+
}
1014+
origWalSize = frameAlign(fi.Size(), db.pageSize)
1015+
9961016
w, err := os.OpenFile(filename, os.O_RDWR, 0666)
9971017
if err != nil {
998-
return 0, err
1018+
return 0, 0, err
9991019
}
10001020
defer w.Close()
10011021

1002-
fi, err := w.Stat()
1022+
fi, err = w.Stat()
10031023
if err != nil {
1004-
return 0, err
1024+
return 0, 0, err
10051025
}
10061026
origSize := frameAlign(fi.Size(), db.pageSize)
10071027

10081028
// Read shadow WAL header to determine byte order for checksum & salt.
10091029
hdr := make([]byte, WALHeaderSize)
10101030
if _, err := io.ReadFull(w, hdr); err != nil {
1011-
return 0, fmt.Errorf("read header: %w", err)
1031+
return 0, 0, fmt.Errorf("read header: %w", err)
10121032
}
10131033
hsalt0 := binary.BigEndian.Uint32(hdr[16:])
10141034
hsalt1 := binary.BigEndian.Uint32(hdr[20:])
10151035

10161036
bo, err := headerByteOrder(hdr)
10171037
if err != nil {
1018-
return 0, err
1038+
return 0, 0, err
10191039
}
10201040

10211041
// Read previous checksum.
10221042
chksum0, chksum1, err := readLastChecksumFrom(w, db.pageSize)
10231043
if err != nil {
1024-
return 0, fmt.Errorf("last checksum: %w", err)
1044+
return 0, 0, fmt.Errorf("last checksum: %w", err)
10251045
}
10261046

10271047
// Write to a temporary shadow file.
@@ -1030,15 +1050,15 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10301050

10311051
f, err := internal.CreateFile(tempFilename, db.fileInfo)
10321052
if err != nil {
1033-
return 0, fmt.Errorf("create temp file: %w", err)
1053+
return 0, 0, fmt.Errorf("create temp file: %w", err)
10341054
}
10351055
defer f.Close()
10361056

10371057
// Seek to correct position on real wal.
10381058
if _, err := r.Seek(origSize, io.SeekStart); err != nil {
1039-
return 0, fmt.Errorf("real wal seek: %w", err)
1059+
return 0, 0, fmt.Errorf("real wal seek: %w", err)
10401060
} else if _, err := w.Seek(origSize, io.SeekStart); err != nil {
1041-
return 0, fmt.Errorf("shadow wal seek: %w", err)
1061+
return 0, 0, fmt.Errorf("shadow wal seek: %w", err)
10421062
}
10431063

10441064
// Read through WAL from last position to find the page of the last
@@ -1052,7 +1072,7 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10521072
logger.Debug("copy-shadow: break", "offset", offset, "error", err)
10531073
break // end of file or partial page
10541074
} else if err != nil {
1055-
return 0, fmt.Errorf("read wal: %w", err)
1075+
return 0, 0, fmt.Errorf("read wal: %w", err)
10561076
}
10571077

10581078
// Read frame salt & compare to header salt. Stop reading on mismatch.
@@ -1075,7 +1095,7 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10751095

10761096
// Write page to temporary WAL file.
10771097
if _, err := f.Write(frame); err != nil {
1078-
return 0, fmt.Errorf("write temp shadow wal: %w", err)
1098+
return 0, 0, fmt.Errorf("write temp shadow wal: %w", err)
10791099
}
10801100

10811101
logger.Debug("copy-shadow: ok", "offset", offset, "salt", fmt.Sprintf("%x %x", salt0, salt1))
@@ -1090,39 +1110,39 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10901110

10911111
// If no WAL writes found, exit.
10921112
if origSize == lastCommitSize {
1093-
return origSize, nil
1113+
return origSize, lastCommitSize, nil
10941114
}
10951115

10961116
walByteN := lastCommitSize - origSize
10971117

10981118
// Move to beginning of temporary file.
10991119
if _, err := f.Seek(0, io.SeekStart); err != nil {
1100-
return 0, fmt.Errorf("temp file seek: %w", err)
1120+
return 0, 0, fmt.Errorf("temp file seek: %w", err)
11011121
}
11021122

11031123
// Copy from temporary file to shadow WAL.
11041124
if _, err := io.Copy(w, &io.LimitedReader{R: f, N: walByteN}); err != nil {
1105-
return 0, fmt.Errorf("write shadow file: %w", err)
1125+
return 0, 0, fmt.Errorf("write shadow file: %w", err)
11061126
}
11071127

11081128
// Close & remove temporary file.
11091129
if err := f.Close(); err != nil {
1110-
return 0, err
1130+
return 0, 0, err
11111131
} else if err := os.Remove(tempFilename); err != nil {
1112-
return 0, err
1132+
return 0, 0, err
11131133
}
11141134

11151135
// Sync & close shadow WAL.
11161136
if err := w.Sync(); err != nil {
1117-
return 0, err
1137+
return 0, 0, err
11181138
} else if err := w.Close(); err != nil {
1119-
return 0, err
1139+
return 0, 0, err
11201140
}
11211141

11221142
// Track total number of bytes written to WAL.
11231143
db.totalWALBytesCounter.Add(float64(walByteN))
11241144

1125-
return lastCommitSize, nil
1145+
return origWalSize, lastCommitSize, nil
11261146
}
11271147

11281148
// ShadowWALReader opens a reader for a shadow WAL file at a given position.
@@ -1297,7 +1317,7 @@ func (db *DB) checkpoint(ctx context.Context, generation, mode string) error {
12971317
}
12981318

12991319
// Copy shadow WAL before checkpoint to copy as much as possible.
1300-
if _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
1320+
if _, _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
13011321
return fmt.Errorf("cannot copy to end of shadow wal before checkpoint: %w", err)
13021322
}
13031323

@@ -1332,7 +1352,7 @@ func (db *DB) checkpoint(ctx context.Context, generation, mode string) error {
13321352
}
13331353

13341354
// Copy the end of the previous WAL before starting a new shadow WAL.
1335-
if _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
1355+
if _, _, err := db.copyToShadowWAL(shadowWALPath); err != nil {
13361356
return fmt.Errorf("cannot copy to end of shadow wal: %w", err)
13371357
}
13381358

0 commit comments

Comments
 (0)