Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion initialization.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ package regex

import (
_ "embed"

"github.com/tetratelabs/wazero"
)

// Embedded data that will be loaded into our WASM runtime
var (
//go:embed icu/wasm/icu.wasm
icuWasm []byte // This is generated using the "build.sh" script in the "icu" folder
icuWasm []byte // This is generated using the "build.sh" script in the "icu" folder
icuConfig = wazero.NewModuleConfig()
)
7 changes: 4 additions & 3 deletions pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ package regex

import (
"context"
"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
"reflect"
"runtime"
"sync"

"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
)

// modulePool is the pool that is used internally by the project.
Expand Down
105 changes: 105 additions & 0 deletions regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,19 @@ type Regex interface {
// SetMatchString sets the string that we will either be matching against, or executing the replacements on. This
// must be called after SetRegexString, but before any other calls.
SetMatchString(ctx context.Context, matchStr string) error
// IndexOf returns the index of the previously-set regex matching the previously-set match string. Must call
// SetRegexString and SetMatchString before this function. `endIndex` determines whether the returned index is at
// the beginning or end of the match. `start` and `occurrence` start at 1, not 0. Returns 0 if the index was not found.
IndexOf(ctx context.Context, start int, occurrence int, endIndex bool) (int, error)
// Matches returns whether the previously-set regex matches the previously-set match string. Must call
// SetRegexString and SetMatchString before this function.
Matches(ctx context.Context, start int, occurrence int) (bool, error)
// Replace returns a new string with the replacement string occupying the matched portions of the match string,
// based on the regex. Position starts at 1, not 0. Must call SetRegexString and SetMatchString before this function.
Replace(ctx context.Context, replacementStr string, position int, occurrence int) (string, error)
// Substring returns the match of the previously-set match string, using the previously-set regex. Must call
// SetRegexString and SetMatchString before this function. `start` and `occurrence` start at 1, not 0.
Substring(ctx context.Context, start int, occurrence int) (string, bool, error)
// StringBufferSize returns the size of the string buffers, in bytes. If the string buffer is not being used, then
// this returns zero.
StringBufferSize() uint32
Expand Down Expand Up @@ -280,6 +287,56 @@ func (pr *privateRegex) SetMatchString(ctx context.Context, matchStr string) (er
return nil
}

// IndexOf implements the interface Regex.
func (pr *privateRegex) IndexOf(ctx context.Context, start int, occurrence int, endIndex bool) (int, error) {
// Check for the regex pointer first
if pr.regexPtr == 0 {
return 0, ErrRegexNotYetSet.New()
}

// Check that the match string has been set
if pr.matchStrUPtr == 0 {
return 0, ErrMatchNotYetSet.New()
}

// Look for a match
var errorCode UErrorCode
ok, err := pr.uregex_find(ctx, pr.regexPtr, start-1, &errorCode)
if err != nil {
return 0, err
}
for i := 1; i < occurrence && ok; i++ {
ok, err = pr.uregex_findNext(ctx, pr.regexPtr, &errorCode)
if err != nil {
return 0, err
}
}
if !ok {
return 0, nil
}

// Get the index of the match
var index int
if endIndex {
index32, err := pr.uregex_end(ctx, pr.regexPtr, 0, &errorCode)
if err != nil {
return 0, err
}
index = int(index32)
} else {
index32, err := pr.uregex_start(ctx, pr.regexPtr, 0, &errorCode)
if err != nil {
return 0, err
}
index = int(index32)
}
if errorCode > 0 {
return 0, fmt.Errorf("unexpected UErrorCode from uregex_find/uregex_findNext: %d", errorCode)
}

return index + 1, nil
}

// Matches implements the interface Regex.
func (pr *privateRegex) Matches(ctx context.Context, start int, occurrence int) (ok bool, err error) {
// Check for the regex pointer first
Expand Down Expand Up @@ -354,6 +411,54 @@ func (pr *privateRegex) Replace(ctx context.Context, replacementStr string, star
return fromUTF16(returnStrBytes), nil
}

// Substring implements the interface Regex.
func (pr *privateRegex) Substring(ctx context.Context, start int, occurrence int) (string, bool, error) {
// Check for the regex pointer first
if pr.regexPtr == 0 {
return "", false, ErrRegexNotYetSet.New()
}

// Check that the match string has been set
if pr.matchStrUPtr == 0 {
return "", false, ErrMatchNotYetSet.New()
}

// Look for a match
var errorCode UErrorCode
ok, err := pr.uregex_find(ctx, pr.regexPtr, start-1, &errorCode)
if err != nil {
return "", false, err
}
for i := 1; i < occurrence && ok; i++ {
ok, err = pr.uregex_findNext(ctx, pr.regexPtr, &errorCode)
if err != nil {
return "", false, err
}
}
if !ok {
return "", false, nil
}

// Get the bounds of the match
idxStart, err := pr.uregex_start(ctx, pr.regexPtr, 0, &errorCode)
if err != nil {
return "", false, err
}
idxEnd, err := pr.uregex_end(ctx, pr.regexPtr, 0, &errorCode)
if err != nil {
return "", false, err
}
if errorCode > 0 {
return "", false, fmt.Errorf("unexpected UErrorCode from uregex_find/uregex_findNext: %d", errorCode)
}

returnStrBytes, ok := pr.mod.Memory().Read(uint32(pr.matchStrUPtr)+uint32(idxStart*2), uint32((idxEnd-idxStart)*2))
if !ok {
return "", false, fmt.Errorf("somehow failed when retrieving the substring")
}
return fromUTF16(returnStrBytes), true, nil
}

// StringBufferSize implements the interface Regex.
func (pr *privateRegex) StringBufferSize() uint32 {
return pr.bufferSize
Expand Down
90 changes: 90 additions & 0 deletions regex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,93 @@ func TestRegexReplace(t *testing.T) {
require.Equal(t, "X X X", replacedStr)
require.NoError(t, regex.Close())
}

func TestRegexIndexOf(t *testing.T) {
ctx := context.Background()
regex := CreateRegex(1024)
require.NoError(t, regex.SetRegexString(ctx, `[a-j]+`, RegexFlags_None))
err := regex.SetMatchString(ctx, "abc def ghi")
require.NoError(t, err)
idx, err := regex.IndexOf(ctx, 1, 1, false)
require.NoError(t, err)
require.Equal(t, 1, idx)
idx, err = regex.IndexOf(ctx, 4, 1, false)
require.NoError(t, err)
require.Equal(t, 5, idx)
idx, err = regex.IndexOf(ctx, 8, 1, false)
require.NoError(t, err)
require.Equal(t, 9, idx)
idx, err = regex.IndexOf(ctx, 1, 2, false)
require.NoError(t, err)
require.Equal(t, 5, idx)
idx, err = regex.IndexOf(ctx, 1, 3, false)
require.NoError(t, err)
require.Equal(t, 9, idx)
idx, err = regex.IndexOf(ctx, 1, 4, false)
require.NoError(t, err)
require.Equal(t, 0, idx)
idx, err = regex.IndexOf(ctx, 1, 1, true)
require.NoError(t, err)
require.Equal(t, 4, idx)
idx, err = regex.IndexOf(ctx, 4, 1, true)
require.NoError(t, err)
require.Equal(t, 8, idx)
idx, err = regex.IndexOf(ctx, 8, 1, true)
require.NoError(t, err)
require.Equal(t, 12, idx)
idx, err = regex.IndexOf(ctx, 1, 2, true)
require.NoError(t, err)
require.Equal(t, 8, idx)
idx, err = regex.IndexOf(ctx, 1, 3, true)
require.NoError(t, err)
require.Equal(t, 12, idx)
idx, err = regex.IndexOf(ctx, 1, 4, true)
require.NoError(t, err)
require.Equal(t, 0, idx)
require.NoError(t, regex.SetMatchString(ctx, "klmno fghij abcde"))
idx, err = regex.IndexOf(ctx, 1, 1, false)
require.NoError(t, err)
require.Equal(t, 7, idx)
idx, err = regex.IndexOf(ctx, 1, 1, true)
require.NoError(t, err)
require.Equal(t, 12, idx)
require.NoError(t, regex.Close())
}

func TestRegexSubstring(t *testing.T) {
ctx := context.Background()
regex := CreateRegex(1024)
require.NoError(t, regex.SetRegexString(ctx, `[a-z]+`, RegexFlags_None))
err := regex.SetMatchString(ctx, "abc def ghi")
require.NoError(t, err)
substr, ok, err := regex.Substring(ctx, 1, 1)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "abc", substr)
substr, ok, err = regex.Substring(ctx, 4, 1)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "def", substr)
substr, ok, err = regex.Substring(ctx, 8, 1)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "ghi", substr)
substr, ok, err = regex.Substring(ctx, 1, 2)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "def", substr)
substr, ok, err = regex.Substring(ctx, 1, 3)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "ghi", substr)
substr, ok, err = regex.Substring(ctx, 1, 4)
require.NoError(t, err)
require.False(t, ok)
require.Equal(t, "", substr)
require.NoError(t, regex.SetMatchString(ctx, "ghx dey abz"))
substr, ok, err = regex.Substring(ctx, 1, 1)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, "ghx", substr)
require.NoError(t, regex.Close())
}