Skip to content

Commit 71bf7e7

Browse files
committed
Fixed memory leak
1 parent 0118adc commit 71bf7e7

File tree

5 files changed

+218
-50
lines changed

5 files changed

+218
-50
lines changed

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
module github.com/dolthub/go-icu-regex
22

3-
go 1.19
3+
go 1.23
44

55
require (
66
github.com/stretchr/testify v1.8.2
7-
github.com/tetratelabs/wazero v1.1.0
7+
github.com/tetratelabs/wazero v1.8.2
88
gopkg.in/src-d/go-errors.v1 v1.0.0
99
)
1010

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
1212
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
1313
github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=
1414
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
15-
github.com/tetratelabs/wazero v1.1.0 h1:EByoAhC+QcYpwSZJSs/aV0uokxPwBgKxfiokSUwAknQ=
16-
github.com/tetratelabs/wazero v1.1.0/go.mod h1:wYx2gNRg8/WihJfSDxA1TIL8H+GkfLYm+bIfbblu9VQ=
15+
github.com/tetratelabs/wazero v1.8.2 h1:yIgLR/b2bN31bjxwXHD8a3d+BogigR952csSDdLYEv4=
16+
github.com/tetratelabs/wazero v1.8.2/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs=
1717
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
1818
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
1919
gopkg.in/src-d/go-errors.v1 v1.0.0 h1:cooGdZnCjYbeS1zb1s6pVAAimTdKceRrpn7aKOnNIfc=

initialization.go

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -15,57 +15,13 @@
1515
package regex
1616

1717
import (
18-
"context"
1918
_ "embed"
20-
"runtime"
21-
"sync"
22-
2319
"github.com/tetratelabs/wazero"
24-
"github.com/tetratelabs/wazero/api"
25-
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
2620
)
2721

2822
// Embedded data that will be loaded into our WASM runtime
2923
var (
3024
//go:embed icu/wasm/icu.wasm
3125
icuWasm []byte // This is generated using the "build.sh" script in the "icu" folder
26+
icuConfig = wazero.NewModuleConfig()
3227
)
33-
34-
var r wazero.Runtime
35-
var modulePool = sync.Pool{
36-
New: func() any {
37-
ctx := context.Background()
38-
39-
// Load the ICU library
40-
mod, err := r.Instantiate(ctx, icuWasm)
41-
if err != nil {
42-
panic(err)
43-
}
44-
45-
// We set a finalizer here, as the pool will periodically empty itself, and we need to close the module during
46-
// that time.
47-
runtime.SetFinalizer(mod, func(mod api.Module) {
48-
_ = mod.Close(context.Background())
49-
})
50-
return mod
51-
},
52-
}
53-
54-
func init() {
55-
ctx := context.Background()
56-
57-
// Create the WASM runtime
58-
r = wazero.NewRuntime(ctx)
59-
wasi_snapshot_preview1.MustInstantiate(ctx, r)
60-
envBuilder := r.NewHostModuleBuilder("env")
61-
noop_two := func(int32, int32) int32 { return -1 }
62-
noop_four := func(int32, int32, int32, int32) int32 { return -1 }
63-
envBuilder.NewFunctionBuilder().WithFunc(noop_two).Export("__syscall_stat64")
64-
envBuilder.NewFunctionBuilder().WithFunc(noop_two).Export("__syscall_lstat64")
65-
envBuilder.NewFunctionBuilder().WithFunc(noop_two).Export("__syscall_fstat64")
66-
envBuilder.NewFunctionBuilder().WithFunc(noop_four).Export("__syscall_newfstatat")
67-
_, err := envBuilder.Instantiate(ctx)
68-
if err != nil {
69-
panic(err)
70-
}
71-
}

pool.go

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// Copyright 2024 Dolthub, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package regex
16+
17+
import (
18+
"context"
19+
"github.com/tetratelabs/wazero"
20+
"github.com/tetratelabs/wazero/api"
21+
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
22+
"reflect"
23+
"runtime"
24+
"sync"
25+
)
26+
27+
// modulePool is the pool that is used internally by the project.
28+
var modulePool = NewPool()
29+
30+
// RuntimeTracker tracks all relevant information that the Pool needs regarding a runtime.
31+
type RuntimeTracker struct {
32+
id uint64
33+
r wazero.Runtime
34+
compiled wazero.CompiledModule
35+
modules []api.Module
36+
max uint64
37+
fetches uint64
38+
}
39+
40+
// Pool is a special pool object for handling ICU regex modules. The cause isn't quite clear, but runtimes continue to
41+
// hold onto memory even when their owned modules are closed, so this special pool type will also recycle the runtimes
42+
// once a certain number of modules have been fetched.
43+
type Pool struct {
44+
mutex *sync.Mutex
45+
runtimes []*RuntimeTracker
46+
returnedModules map[uintptr]uint64
47+
nextId uint64
48+
maxFetch uint64
49+
}
50+
51+
// NewPool creates a new *Pool.
52+
func NewPool() *Pool {
53+
r, compiled := createRuntime(context.Background())
54+
pool := &Pool{
55+
mutex: &sync.Mutex{},
56+
runtimes: []*RuntimeTracker{{
57+
id: 1,
58+
r: r,
59+
compiled: compiled,
60+
modules: make([]api.Module, 0, 16),
61+
max: 0,
62+
fetches: 0,
63+
}},
64+
returnedModules: make(map[uintptr]uint64),
65+
nextId: 2,
66+
maxFetch: 128,
67+
}
68+
return pool
69+
}
70+
71+
// Get returns a new module from the pool.
72+
func (pool *Pool) Get() api.Module {
73+
pool.mutex.Lock()
74+
defer pool.mutex.Unlock()
75+
76+
ctx := context.Background()
77+
rtracker := pool.runtimes[len(pool.runtimes)-1]
78+
rtracker.fetches++
79+
// If we've used up the number of fetches allowed in this runtime, then we'll create a new one
80+
if rtracker.fetches >= pool.maxFetch {
81+
r, compiled := createRuntime(ctx)
82+
rtracker = &RuntimeTracker{
83+
id: pool.nextId,
84+
r: r,
85+
compiled: compiled,
86+
modules: make([]api.Module, 0, 16),
87+
max: 0,
88+
fetches: 0,
89+
}
90+
pool.runtimes = append(pool.runtimes, rtracker)
91+
pool.nextId++
92+
}
93+
var module api.Module
94+
// If the runtime has no modules remaining, then we need to create a new module
95+
if len(rtracker.modules) == 0 {
96+
rtracker.max++
97+
var err error
98+
module, err = rtracker.r.InstantiateModule(ctx, rtracker.compiled, icuConfig)
99+
if err != nil {
100+
panic(err)
101+
}
102+
} else {
103+
// Pop the last module from the slice
104+
module = rtracker.modules[len(rtracker.modules)-1]
105+
rtracker.modules = rtracker.modules[:len(rtracker.modules)-1]
106+
}
107+
// Now we need to track that this module is being returned
108+
pool.returnedModules[reflect.ValueOf(module).Pointer()] = rtracker.id
109+
runtime.SetFinalizer(module, func(module api.Module) {
110+
pool.finalized(module)
111+
})
112+
return module
113+
}
114+
115+
// Put returns the module to the pool.
116+
func (pool *Pool) Put(module api.Module) {
117+
pool.mutex.Lock()
118+
defer pool.mutex.Unlock()
119+
pool.receivedModule(module, true)
120+
}
121+
122+
// finalized is called by the finalizer, and only exists to catch orphaned modules.
123+
func (pool *Pool) finalized(module api.Module) {
124+
pool.mutex.Lock()
125+
defer pool.mutex.Unlock()
126+
pool.receivedModule(module, false)
127+
}
128+
129+
// receivedModule is called when either the module is returned through Put, or the finalizer catches an orphaned module
130+
// through finalized.
131+
func (pool *Pool) receivedModule(module api.Module, isPut bool) {
132+
// Remove the finalizer that was set when the object was fetched.
133+
// This is only called from Put, as the finalizer is being called so we don't want to remove it.
134+
if isPut {
135+
runtime.SetFinalizer(module, nil)
136+
}
137+
// Grab the runtime ID and remove the module from the tracking map
138+
ptr := reflect.ValueOf(module).Pointer()
139+
runtimeId := pool.returnedModules[ptr]
140+
delete(pool.returnedModules, ptr)
141+
for rtrackerIdx := 0; rtrackerIdx < len(pool.runtimes); rtrackerIdx++ {
142+
ctx := context.Background()
143+
rtracker := pool.runtimes[rtrackerIdx]
144+
// If this is a different runtime, then we still need to check whether it should be removed
145+
if rtracker.id != runtimeId {
146+
if rtracker.fetches >= pool.maxFetch && uint64(len(rtracker.modules)) >= rtracker.max {
147+
pool.closeRuntime(ctx, rtrackerIdx, rtracker)
148+
rtrackerIdx--
149+
}
150+
continue
151+
}
152+
if isPut {
153+
// Add the module back to the runtime when called from Put
154+
rtracker.modules = append(rtracker.modules, module)
155+
} else {
156+
// We remove the module from the runtime altogether when called from the finalizer
157+
rtracker.max--
158+
_ = module.Close(ctx)
159+
}
160+
// If this runtime has run out of fetches and all of its modules are back, then we need to close and remove it
161+
if rtracker.fetches >= pool.maxFetch && uint64(len(rtracker.modules)) >= rtracker.max {
162+
pool.closeRuntime(ctx, rtrackerIdx, rtracker)
163+
}
164+
return
165+
}
166+
// We could not find the runtime ID (or the module was not in the map), which should never happen
167+
panic("go-icu-regex pool found orphaned module")
168+
}
169+
170+
// closeRuntime closes the given runtime, as well as removing it from the list of runtimes.
171+
func (pool *Pool) closeRuntime(ctx context.Context, rtrackerIdx int, rtracker *RuntimeTracker) {
172+
// First we'll close all the modules, then we'll close the runtime itself
173+
for _, mod := range rtracker.modules {
174+
_ = mod.Close(ctx)
175+
}
176+
_ = rtracker.r.Close(ctx)
177+
// We then remove the runtime from the slice
178+
newSlice := make([]*RuntimeTracker, len(pool.runtimes)-1)
179+
copy(newSlice, pool.runtimes[:rtrackerIdx])
180+
copy(newSlice, pool.runtimes[rtrackerIdx+1:])
181+
pool.runtimes = newSlice
182+
}
183+
184+
// createRuntime creates a new runtime, as well as compiling the ICU module. The compiled module is only valid with the
185+
// runtime that compiled it.
186+
func createRuntime(ctx context.Context) (wazero.Runtime, wazero.CompiledModule) {
187+
r := wazero.NewRuntime(ctx)
188+
wasi_snapshot_preview1.MustInstantiate(ctx, r)
189+
envBuilder := r.NewHostModuleBuilder("env")
190+
noop_two := func(int32, int32) int32 { return -1 }
191+
noop_four := func(int32, int32, int32, int32) int32 { return -1 }
192+
envBuilder.NewFunctionBuilder().WithFunc(noop_two).Export("__syscall_stat64")
193+
envBuilder.NewFunctionBuilder().WithFunc(noop_two).Export("__syscall_lstat64")
194+
envBuilder.NewFunctionBuilder().WithFunc(noop_two).Export("__syscall_fstat64")
195+
envBuilder.NewFunctionBuilder().WithFunc(noop_four).Export("__syscall_newfstatat")
196+
_, err := envBuilder.Instantiate(ctx)
197+
if err != nil {
198+
panic(err)
199+
}
200+
compiledIcuWasm, err := r.CompileModule(ctx, icuWasm)
201+
if err != nil {
202+
panic(err)
203+
}
204+
return r, compiledIcuWasm
205+
}
206+
207+
// SetPoolFetchMax determines how many fetches are allowed from the internal Pool before a runtime is recycled.
208+
func SetPoolFetchMax(maxFetch uint64) {
209+
modulePool.mutex.Lock()
210+
defer modulePool.mutex.Unlock()
211+
modulePool.maxFetch = maxFetch
212+
}

regex.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ const (
108108
// to call Close. This Regex is intended for single-threaded use only, therefore it is advised for each thread to use
109109
// its own Regex when one is needed.
110110
func CreateRegex(stringBufferInBytes uint32) Regex {
111-
mod := modulePool.Get().(api.Module)
111+
mod := modulePool.Get()
112112
pr := &privateRegex{
113113
mod: mod,
114114
regexPtr: 0,

0 commit comments

Comments
 (0)