Skip to content

Commit ac30239

Browse files
Refactor: Split out schemafiltering into its own package (#3220)
Alternative approach for #3210. [Maintaining a whole repo for a single function](pulumi/provider-schema-filter#1) comes with some overhead. Consider also that the logic in the filter package is tightly coupled with expectations set elsewhere in the bridge. - **Create schemafilter package** - **Use schemafilter package function** - **Add README** - **cleanups**
1 parent 6357a18 commit ac30239

16 files changed

+187
-148
lines changed

pkg/tfgen/generate.go

Lines changed: 2 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"os/exec"
2727
"path"
2828
"path/filepath"
29-
"regexp"
3029
"sort"
3130
"strings"
3231
"unicode"
@@ -48,6 +47,7 @@ import (
4847
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tf2pulumi/il"
4948
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge"
5049
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/internal/paths"
50+
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/schemafilter"
5151
shim "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfshim"
5252
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfshim/schema"
5353
"github.com/pulumi/pulumi-terraform-bridge/v3/unstable/metadata"
@@ -1043,64 +1043,6 @@ type GenerateOptions struct {
10431043
ModuleFormat string
10441044
}
10451045

1046-
func (g *Generator) FilterSchemaByLanguage(schemaBytes []byte) []byte {
1047-
// The span string stems from g.fixUpPropertyReference in docsgen and looks as follows:
1048-
// <span pulumi-lang-nodejs="firstProperty" pulumi-lang-go="FirstProperty" ...>first_property</span>
1049-
// When rendered in schema it uses escapes and unicode chars for the angle brackets:
1050-
// \u003cspan pulumi-lang-nodejs=\"`random.RandomBytes`\" pulumi-lang-dotnet=\"`random.RandomBytes`\" ... \u003e ...
1051-
spanRegex := regexp.MustCompile(`\\u003cspan pulumi-lang-nodejs=.*?\\u003c/span\\u003e`)
1052-
1053-
// Extract the language-specific inflection for the found inflection span
1054-
schemaBytes = spanRegex.ReplaceAllFunc(schemaBytes, func(match []byte) []byte {
1055-
languageKey := []byte(fmt.Sprintf(`pulumi-lang-%s=\"`, g.language))
1056-
_, startLanguageValue, _ := bytes.Cut(match, languageKey)
1057-
var languageValue []byte
1058-
1059-
// Sometimes we have double quotes in our language span. Handle this case so that we return the quotes.
1060-
doubleEscapedQuotes := []byte(`\"\"`)
1061-
singleEscapedQuotes := []byte(`\"`)
1062-
if loc := bytes.Index(startLanguageValue, doubleEscapedQuotes); loc > 0 {
1063-
// Cut after the first quote to include it in the result
1064-
languageValue = startLanguageValue[:loc+(len(singleEscapedQuotes))]
1065-
} else {
1066-
languageValue, _, _ = bytes.Cut(startLanguageValue, singleEscapedQuotes)
1067-
}
1068-
return languageValue
1069-
})
1070-
1071-
// Find code chooser blocks and filter to only keep the current language
1072-
codeChooserRegex := regexp.MustCompile(
1073-
`\\u003c!--Start PulumiCodeChooser --\\u003e.*?\\u003c!--End PulumiCodeChooser --\\u003e`,
1074-
)
1075-
1076-
schemaBytes = codeChooserRegex.ReplaceAllFunc(schemaBytes, func(match []byte) []byte {
1077-
content := string(match)
1078-
1079-
// In code choosers for registry docsgen, "nodejs" is "typescript"
1080-
codeLang := g.language
1081-
if g.language == "nodejs" {
1082-
codeLang = "typescript"
1083-
}
1084-
// In code choosers, "dotnet" is "csharp"
1085-
if g.language == "dotnet" {
1086-
codeLang = "csharp"
1087-
}
1088-
// Extract language-specific example only
1089-
_, after, found := strings.Cut(content, fmt.Sprintf("```%s", codeLang))
1090-
if !found {
1091-
return []byte("")
1092-
}
1093-
codeForLanguage, _, found := strings.Cut(after, "```")
1094-
if !found {
1095-
return []byte("")
1096-
}
1097-
codeForLanguage = fmt.Sprintf("```%s", codeLang) + codeForLanguage + "```"
1098-
1099-
return []byte(codeForLanguage)
1100-
})
1101-
return schemaBytes
1102-
}
1103-
11041046
// Generate creates Pulumi packages from the information it was initialized with.
11051047
func (g *Generator) Generate() (*GenerateSchemaResult, error) {
11061048
if g.language == "schema" || g.language == "registry-docs" || g.language == "pulumi" {
@@ -1117,7 +1059,7 @@ func (g *Generator) Generate() (*GenerateSchemaResult, error) {
11171059
return nil, err
11181060
}
11191061
// Generate the language-specific bytes
1120-
languageSchemaBytes := g.FilterSchemaByLanguage(schemaBytes)
1062+
languageSchemaBytes := schemafilter.FilterSchemaByLanguage(schemaBytes, string(g.language))
11211063

11221064
// Parse the filtered schema bytes back into PackageSpec
11231065
var languagePackageSpec pschema.PackageSpec

pkg/tfgen/generate_test.go

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package tfgen
1616

1717
import (
18-
"bytes"
1918
"fmt"
2019
"io"
2120
"os"
@@ -24,7 +23,6 @@ import (
2423
"testing"
2524

2625
"github.com/hashicorp/terraform-plugin-sdk/helper/schema"
27-
"github.com/hexops/autogold/v2"
2826
pschema "github.com/pulumi/pulumi/pkg/v3/codegen/schema"
2927
"github.com/pulumi/pulumi/sdk/v3/go/common/diag"
3028
"github.com/pulumi/pulumi/sdk/v3/go/common/diag/colors"
@@ -891,89 +889,3 @@ func TestExtraMappingError(t *testing.T) {
891889
})
892890
}
893891
}
894-
895-
func TestFilterSchemaByLanguage(t *testing.T) {
896-
t.Parallel()
897-
testCases := []struct {
898-
name string
899-
inputSchema []byte
900-
expectedLanguageSchemaBytes []byte
901-
generator *Generator
902-
}{
903-
{
904-
name: "Generates nodejs schema",
905-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
906-
generator: &Generator{
907-
version: "1.2.3-test",
908-
language: "nodejs",
909-
},
910-
},
911-
{
912-
name: "Generates python schema",
913-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
914-
generator: &Generator{
915-
version: "1.2.3-test",
916-
language: "python",
917-
},
918-
},
919-
{
920-
name: "Generates dotnet schema",
921-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
922-
generator: &Generator{
923-
version: "1.2.3-test",
924-
language: "dotnet",
925-
},
926-
},
927-
{
928-
name: "Generates go schema",
929-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
930-
generator: &Generator{
931-
version: "1.2.3-test",
932-
language: "go",
933-
},
934-
},
935-
{
936-
name: "Generates yaml schema",
937-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
938-
generator: &Generator{
939-
version: "1.2.3-test",
940-
language: "yaml",
941-
},
942-
},
943-
{
944-
name: "Generates java schema",
945-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
946-
generator: &Generator{
947-
version: "1.2.3-test",
948-
language: "java",
949-
},
950-
},
951-
{
952-
name: "Handles property names that are not surrounded by back ticks",
953-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema-no-backticks.json")),
954-
generator: &Generator{
955-
version: "1.2.3-test",
956-
language: "nodejs",
957-
},
958-
},
959-
{
960-
name: "Handles property names that are surrounded by back ticks AND double quotes",
961-
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema-backticks-and-quotes.json")),
962-
generator: &Generator{
963-
version: "1.2.3-test",
964-
language: "nodejs",
965-
},
966-
},
967-
}
968-
969-
for _, tc := range testCases {
970-
t.Run(tc.name, func(t *testing.T) {
971-
actual := tc.generator.FilterSchemaByLanguage(tc.inputSchema)
972-
hasSpan := bytes.Contains(actual, []byte("span"))
973-
require.False(t, hasSpan, "there should be no spans in the filtered schema")
974-
hasCodeChoosers := bytes.Contains(actual, []byte("PulumiCodeChooser"))
975-
require.False(t, hasCodeChoosers)
976-
autogold.ExpectFile(t, autogold.Raw(actual))
977-
})
978-
}
979-
}

pkg/tfgen/schemafilter/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# provider-schema-filter
2+
A small library for provider schema processing
3+
4+
> [NOTE] This library is meant for Pulumi-internal use. It is experimental and subject to change.
5+
6+
## Purpose
7+
8+
Prepare Pulumi provider schemas to be passed to the pulumi package gen-sdk command.
9+
10+
Pulumi provider schemas (traditionally located at `provider/cmd/pulumi-resource-foo/schema.json`) are our source for generating registry documentation, provider binaries, and language SDKs, including in-line documentation with examples.
11+
The schema contains language-specific translations of examples, as well as language-specific inflections of code strings.
12+
13+
Because the schema contains translations for _all_ Pulumi-supported languages, we need to filter the schema by its language before we pass it along to the Pulumi CLI's SDK generator.
14+
This library is meant for that purpose.
15+
The filter expects a certain schema format, which is consistent for all bridged providers, detailed below.
16+
17+
## Use
18+
19+
`import "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/schemafilter"`
20+
21+
## Schema markups
22+
23+
The Pulumi schema may contain the following markups:
24+
25+
### Pulumi Code Chooser
26+
27+
The content between the code chooser tags contains the examples that we are rendering in the registry via the language tab selector.
28+
The outline is as follows:
29+
`<!--Start PulumiCodeChooser -->```typescript {example code}```\n```python {example code}```\n```csharp {example code}```\n```go {example code}```/n```java {example code}```\n```yaml {example code}```\n<!--End PulumiCodeChooser -->`
30+
For each SDK, we want to display only the example relevant to that SDK's language.
31+
32+
### Variable inflection
33+
34+
The documentation contains property or resource names that should be inflected by language.
35+
The precise inflection format depends on whether we are inflecting a resource, a function, or a property name, but the basic format loks like this:
36+
`<span pulumi-lang-nodejs="exampleProperty" pulumi-lang-dotnet="ExampleProperty" pulumi-lang-go="exampleProperty" pulumi-lang-python="example_property" pulumi-lang-yaml="exampleProperty" pulumi-lang-java="exampleProperty">%s</span>`
37+

pkg/tfgen/schemafilter/filter.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package schemafilter
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"regexp"
7+
"strings"
8+
)
9+
10+
func FilterSchemaByLanguage(schemaBytes []byte, language string) []byte {
11+
// The span string stems from the Terraform bridge's generator's fixUpPropertyReference method in docsgen.
12+
// It looks as follows:
13+
// <span pulumi-lang-nodejs="firstProperty" pulumi-lang-go="FirstProperty" ...>first_property</span>
14+
// When rendered in schema it uses escapes and unicode chars for the angle brackets:
15+
// \u003cspan pulumi-lang-nodejs=\"`random.RandomBytes`\" pulumi-lang-dotnet=\"`random.RandomBytes`\" ... \u003e ...
16+
spanRegex := regexp.MustCompile(`\\u003cspan pulumi-lang-nodejs=.*?\\u003c/span\\u003e`)
17+
18+
// Extract the language-specific inflection for the found inflection span
19+
schemaBytes = spanRegex.ReplaceAllFunc(schemaBytes, func(match []byte) []byte {
20+
languageKey := []byte(fmt.Sprintf(`pulumi-lang-%s=\"`, language))
21+
_, startLanguageValue, _ := bytes.Cut(match, languageKey)
22+
var languageValue []byte
23+
24+
// Sometimes we have double quotes in our language span. Handle this case so that we return the quotes.
25+
doubleEscapedQuotes := []byte(`\"\"`)
26+
singleEscapedQuotes := []byte(`\"`)
27+
if loc := bytes.Index(startLanguageValue, doubleEscapedQuotes); loc > 0 {
28+
// Cut after the first quote to include it in the result
29+
languageValue = startLanguageValue[:loc+(len(singleEscapedQuotes))]
30+
} else {
31+
languageValue, _, _ = bytes.Cut(startLanguageValue, singleEscapedQuotes)
32+
}
33+
return languageValue
34+
})
35+
36+
// Find code chooser blocks and filter to only keep the current language
37+
codeChooserRegex := regexp.MustCompile(
38+
`\\u003c!--Start PulumiCodeChooser --\\u003e.*?\\u003c!--End PulumiCodeChooser --\\u003e`,
39+
)
40+
41+
schemaBytes = codeChooserRegex.ReplaceAllFunc(schemaBytes, func(match []byte) []byte {
42+
content := string(match)
43+
44+
// In code choosers for registry docsgen, "nodejs" is "typescript"
45+
codeLang := language
46+
if language == "nodejs" {
47+
codeLang = "typescript"
48+
}
49+
// In code choosers, "dotnet" is "csharp"
50+
if language == "dotnet" {
51+
codeLang = "csharp"
52+
}
53+
// Extract language-specific example only
54+
_, after, found := strings.Cut(content, fmt.Sprintf("```%s", codeLang))
55+
if !found {
56+
return []byte("")
57+
}
58+
codeForLanguage, _, found := strings.Cut(after, "```")
59+
if !found {
60+
return []byte("")
61+
}
62+
codeForLanguage = fmt.Sprintf("```%s", codeLang) + codeForLanguage + "```"
63+
64+
return []byte(codeForLanguage)
65+
})
66+
return schemaBytes
67+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package schemafilter
2+
3+
import (
4+
"bytes"
5+
"os"
6+
"testing"
7+
8+
"github.com/hexops/autogold/v2"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func TestFilterSchemaByLanguage(t *testing.T) {
13+
t.Parallel()
14+
testCases := []struct {
15+
name string
16+
inputSchema []byte
17+
expectedLanguageSchemaBytes []byte
18+
language string
19+
// generator *Generator
20+
}{
21+
{
22+
name: "Generates nodejs schema",
23+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
24+
language: "nodejs",
25+
},
26+
{
27+
name: "Generates python schema",
28+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
29+
language: "python",
30+
},
31+
{
32+
name: "Generates dotnet schema",
33+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
34+
language: "dotnet",
35+
},
36+
{
37+
name: "Generates go schema",
38+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
39+
language: "go",
40+
},
41+
{
42+
name: "Generates yaml schema",
43+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
44+
language: "yaml",
45+
},
46+
{
47+
name: "Generates java schema",
48+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema.json")),
49+
language: "java",
50+
},
51+
{
52+
name: "Handles property names that are not surrounded by back ticks",
53+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema-no-backticks.json")),
54+
language: "nodejs",
55+
},
56+
{
57+
name: "Handles property names that are surrounded by back ticks AND double quotes",
58+
inputSchema: []byte(readfile(t, "testdata/TestFilterSchemaByLanguage/schema-backticks-and-quotes.json")),
59+
language: "nodejs",
60+
},
61+
}
62+
63+
for _, tc := range testCases {
64+
t.Run(tc.name, func(t *testing.T) {
65+
actual := FilterSchemaByLanguage(tc.inputSchema, tc.language)
66+
hasSpan := bytes.Contains(actual, []byte("span"))
67+
require.False(t, hasSpan, "there should be no spans in the filtered schema")
68+
hasCodeChoosers := bytes.Contains(actual, []byte("PulumiCodeChooser"))
69+
require.False(t, hasCodeChoosers)
70+
71+
autogold.ExpectFile(t, autogold.Raw(actual))
72+
})
73+
}
74+
}
75+
76+
func readfile(t *testing.T, file string) string {
77+
t.Helper()
78+
bytes, err := os.ReadFile(file)
79+
require.NoError(t, err)
80+
return string(bytes)
81+
}

0 commit comments

Comments
 (0)