@@ -31,6 +31,7 @@ const (
3131 DefaultCheckpointInterval = 1 * time .Minute
3232 DefaultMinCheckpointPageN = 1000
3333 DefaultMaxCheckpointPageN = 10000
34+ DefaultTruncatePageN = 500000
3435)
3536
3637// MaxIndex is the maximum possible WAL index.
@@ -85,6 +86,16 @@ type DB struct {
8586 // unbounded if there are always read transactions occurring.
8687 MaxCheckpointPageN int
8788
89+ // Threshold of WAL size, in pages, before a forced truncation checkpoint.
90+ // A forced truncation checkpoint will block new transactions and wait for
91+ // existing transactions to finish before issuing a checkpoint and
92+ // truncating the WAL.
93+ //
94+ // If zero, no truncates are forced. This can cause the WAL to grow
95+ // unbounded if there's a sudden spike of changes between other
96+ // checkpoints.
97+ TruncatePageN int
98+
8899 // Time between automatic checkpoints in the WAL. This is done to allow
89100 // more fine-grained WAL files so that restores can be performed with
90101 // better precision.
@@ -112,6 +123,7 @@ func NewDB(path string) *DB {
112123
113124 MinCheckpointPageN : DefaultMinCheckpointPageN ,
114125 MaxCheckpointPageN : DefaultMaxCheckpointPageN ,
126+ TruncatePageN : DefaultTruncatePageN ,
115127 CheckpointInterval : DefaultCheckpointInterval ,
116128 MonitorInterval : DefaultMonitorInterval ,
117129 Logger : slog .With ("db" , path ),
@@ -750,7 +762,7 @@ func (db *DB) Sync(ctx context.Context) (err error) {
750762 }
751763
752764 // Synchronize real WAL with current shadow WAL.
753- newWALSize , err := db .syncWAL (info )
765+ origWALSize , newWALSize , err := db .syncWAL (info )
754766 if err != nil {
755767 return fmt .Errorf ("sync wal: %w" , err )
756768 }
@@ -759,7 +771,9 @@ func (db *DB) Sync(ctx context.Context) (err error) {
759771 // If WAL size is greater than min threshold, attempt checkpoint.
760772 var checkpoint bool
761773 checkpointMode := CheckpointModePassive
762- if db .MaxCheckpointPageN > 0 && newWALSize >= calcWALSize (db .pageSize , db .MaxCheckpointPageN ) {
774+ if db .TruncatePageN > 0 && origWALSize >= calcWALSize (db .pageSize , db .TruncatePageN ) {
775+ checkpoint , checkpointMode = true , CheckpointModeTruncate
776+ } else if db .MaxCheckpointPageN > 0 && newWALSize >= calcWALSize (db .pageSize , db .MaxCheckpointPageN ) {
763777 checkpoint , checkpointMode = true , CheckpointModeRestart
764778 } else if newWALSize >= calcWALSize (db .pageSize , db .MinCheckpointPageN ) {
765779 checkpoint = true
@@ -918,29 +932,29 @@ type syncInfo struct {
918932}
919933
920934// syncWAL copies pending bytes from the real WAL to the shadow WAL.
921- func (db * DB ) syncWAL (info syncInfo ) (newSize int64 , err error ) {
935+ func (db * DB ) syncWAL (info syncInfo ) (origSize int64 , newSize int64 , err error ) {
922936 // Copy WAL starting from end of shadow WAL. Exit if no new shadow WAL needed.
923- newSize , err = db .copyToShadowWAL (info .shadowWALPath )
937+ origSize , newSize , err = db .copyToShadowWAL (info .shadowWALPath )
924938 if err != nil {
925- return newSize , fmt .Errorf ("cannot copy to shadow wal: %w" , err )
939+ return origSize , newSize , fmt .Errorf ("cannot copy to shadow wal: %w" , err )
926940 } else if ! info .restart {
927- return newSize , nil // If no restart required, exit.
941+ return origSize , newSize , nil // If no restart required, exit.
928942 }
929943
930944 // Parse index of current shadow WAL file.
931945 dir , base := filepath .Split (info .shadowWALPath )
932946 index , err := ParseWALPath (base )
933947 if err != nil {
934- return 0 , fmt .Errorf ("cannot parse shadow wal filename: %s" , base )
948+ return 0 , 0 , fmt .Errorf ("cannot parse shadow wal filename: %s" , base )
935949 }
936950
937951 // Start a new shadow WAL file with next index.
938952 newShadowWALPath := filepath .Join (dir , FormatWALPath (index + 1 ))
939953 newSize , err = db .initShadowWALFile (newShadowWALPath )
940954 if err != nil {
941- return 0 , fmt .Errorf ("cannot init shadow wal file: name=%s err=%w" , newShadowWALPath , err )
955+ return 0 , 0 , fmt .Errorf ("cannot init shadow wal file: name=%s err=%w" , newShadowWALPath , err )
942956 }
943- return newSize , nil
957+ return origSize , newSize , nil
944958}
945959
946960func (db * DB ) initShadowWALFile (filename string ) (int64 , error ) {
@@ -976,52 +990,58 @@ func (db *DB) initShadowWALFile(filename string) (int64, error) {
976990 _ = os .Chown (filename , uid , gid )
977991
978992 // Copy as much shadow WAL as available.
979- newSize , err := db .copyToShadowWAL (filename )
993+ _ , newSize , err := db .copyToShadowWAL (filename )
980994 if err != nil {
981995 return 0 , fmt .Errorf ("cannot copy to new shadow wal: %w" , err )
982996 }
983997 return newSize , nil
984998}
985999
986- func (db * DB ) copyToShadowWAL (filename string ) (newSize int64 , err error ) {
1000+ func (db * DB ) copyToShadowWAL (filename string ) (origWalSize int64 , newSize int64 , err error ) {
9871001 logger := db .Logger .With ("filename" , filename )
9881002 logger .Debug ("copy-shadow" )
9891003
9901004 r , err := os .Open (db .WALPath ())
9911005 if err != nil {
992- return 0 , err
1006+ return 0 , 0 , err
9931007 }
9941008 defer r .Close ()
9951009
1010+ fi , err := r .Stat ()
1011+ if err != nil {
1012+ return 0 , 0 , err
1013+ }
1014+ origWalSize = frameAlign (fi .Size (), db .pageSize )
1015+
9961016 w , err := os .OpenFile (filename , os .O_RDWR , 0666 )
9971017 if err != nil {
998- return 0 , err
1018+ return 0 , 0 , err
9991019 }
10001020 defer w .Close ()
10011021
1002- fi , err : = w .Stat ()
1022+ fi , err = w .Stat ()
10031023 if err != nil {
1004- return 0 , err
1024+ return 0 , 0 , err
10051025 }
10061026 origSize := frameAlign (fi .Size (), db .pageSize )
10071027
10081028 // Read shadow WAL header to determine byte order for checksum & salt.
10091029 hdr := make ([]byte , WALHeaderSize )
10101030 if _ , err := io .ReadFull (w , hdr ); err != nil {
1011- return 0 , fmt .Errorf ("read header: %w" , err )
1031+ return 0 , 0 , fmt .Errorf ("read header: %w" , err )
10121032 }
10131033 hsalt0 := binary .BigEndian .Uint32 (hdr [16 :])
10141034 hsalt1 := binary .BigEndian .Uint32 (hdr [20 :])
10151035
10161036 bo , err := headerByteOrder (hdr )
10171037 if err != nil {
1018- return 0 , err
1038+ return 0 , 0 , err
10191039 }
10201040
10211041 // Read previous checksum.
10221042 chksum0 , chksum1 , err := readLastChecksumFrom (w , db .pageSize )
10231043 if err != nil {
1024- return 0 , fmt .Errorf ("last checksum: %w" , err )
1044+ return 0 , 0 , fmt .Errorf ("last checksum: %w" , err )
10251045 }
10261046
10271047 // Write to a temporary shadow file.
@@ -1030,15 +1050,15 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10301050
10311051 f , err := internal .CreateFile (tempFilename , db .fileInfo )
10321052 if err != nil {
1033- return 0 , fmt .Errorf ("create temp file: %w" , err )
1053+ return 0 , 0 , fmt .Errorf ("create temp file: %w" , err )
10341054 }
10351055 defer f .Close ()
10361056
10371057 // Seek to correct position on real wal.
10381058 if _ , err := r .Seek (origSize , io .SeekStart ); err != nil {
1039- return 0 , fmt .Errorf ("real wal seek: %w" , err )
1059+ return 0 , 0 , fmt .Errorf ("real wal seek: %w" , err )
10401060 } else if _ , err := w .Seek (origSize , io .SeekStart ); err != nil {
1041- return 0 , fmt .Errorf ("shadow wal seek: %w" , err )
1061+ return 0 , 0 , fmt .Errorf ("shadow wal seek: %w" , err )
10421062 }
10431063
10441064 // Read through WAL from last position to find the page of the last
@@ -1052,7 +1072,7 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10521072 logger .Debug ("copy-shadow: break" , "offset" , offset , "error" , err )
10531073 break // end of file or partial page
10541074 } else if err != nil {
1055- return 0 , fmt .Errorf ("read wal: %w" , err )
1075+ return 0 , 0 , fmt .Errorf ("read wal: %w" , err )
10561076 }
10571077
10581078 // Read frame salt & compare to header salt. Stop reading on mismatch.
@@ -1075,7 +1095,7 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10751095
10761096 // Write page to temporary WAL file.
10771097 if _ , err := f .Write (frame ); err != nil {
1078- return 0 , fmt .Errorf ("write temp shadow wal: %w" , err )
1098+ return 0 , 0 , fmt .Errorf ("write temp shadow wal: %w" , err )
10791099 }
10801100
10811101 logger .Debug ("copy-shadow: ok" , "offset" , offset , "salt" , fmt .Sprintf ("%x %x" , salt0 , salt1 ))
@@ -1090,39 +1110,39 @@ func (db *DB) copyToShadowWAL(filename string) (newSize int64, err error) {
10901110
10911111 // If no WAL writes found, exit.
10921112 if origSize == lastCommitSize {
1093- return origSize , nil
1113+ return origSize , lastCommitSize , nil
10941114 }
10951115
10961116 walByteN := lastCommitSize - origSize
10971117
10981118 // Move to beginning of temporary file.
10991119 if _ , err := f .Seek (0 , io .SeekStart ); err != nil {
1100- return 0 , fmt .Errorf ("temp file seek: %w" , err )
1120+ return 0 , 0 , fmt .Errorf ("temp file seek: %w" , err )
11011121 }
11021122
11031123 // Copy from temporary file to shadow WAL.
11041124 if _ , err := io .Copy (w , & io.LimitedReader {R : f , N : walByteN }); err != nil {
1105- return 0 , fmt .Errorf ("write shadow file: %w" , err )
1125+ return 0 , 0 , fmt .Errorf ("write shadow file: %w" , err )
11061126 }
11071127
11081128 // Close & remove temporary file.
11091129 if err := f .Close (); err != nil {
1110- return 0 , err
1130+ return 0 , 0 , err
11111131 } else if err := os .Remove (tempFilename ); err != nil {
1112- return 0 , err
1132+ return 0 , 0 , err
11131133 }
11141134
11151135 // Sync & close shadow WAL.
11161136 if err := w .Sync (); err != nil {
1117- return 0 , err
1137+ return 0 , 0 , err
11181138 } else if err := w .Close (); err != nil {
1119- return 0 , err
1139+ return 0 , 0 , err
11201140 }
11211141
11221142 // Track total number of bytes written to WAL.
11231143 db .totalWALBytesCounter .Add (float64 (walByteN ))
11241144
1125- return lastCommitSize , nil
1145+ return origWalSize , lastCommitSize , nil
11261146}
11271147
11281148// ShadowWALReader opens a reader for a shadow WAL file at a given position.
@@ -1297,7 +1317,7 @@ func (db *DB) checkpoint(ctx context.Context, generation, mode string) error {
12971317 }
12981318
12991319 // Copy shadow WAL before checkpoint to copy as much as possible.
1300- if _ , err := db .copyToShadowWAL (shadowWALPath ); err != nil {
1320+ if _ , _ , err := db .copyToShadowWAL (shadowWALPath ); err != nil {
13011321 return fmt .Errorf ("cannot copy to end of shadow wal before checkpoint: %w" , err )
13021322 }
13031323
@@ -1332,7 +1352,7 @@ func (db *DB) checkpoint(ctx context.Context, generation, mode string) error {
13321352 }
13331353
13341354 // Copy the end of the previous WAL before starting a new shadow WAL.
1335- if _ , err := db .copyToShadowWAL (shadowWALPath ); err != nil {
1355+ if _ , _ , err := db .copyToShadowWAL (shadowWALPath ); err != nil {
13361356 return fmt .Errorf ("cannot copy to end of shadow wal: %w" , err )
13371357 }
13381358
0 commit comments