Skip to content

Commit 3a4ad6f

Browse files
authored
Allow Unicode characters in Selectors (#510)
* Allow Unicode characters in `Selector`s Resolves #454 * Refactor internal `ParserSettings` to use static or const members Refactored internal `ParserSettings` to convert instance-level properties and methods to static or const members. * feat: Filter `Selector` chars by allowlist or blocklilst (#511) * Change enum `FilterType` to `SelectorFilterType` Implement proposals from review: * SelectorFilterType.Alphanumeric: alphanumeric characters (upper and lower case), plus '_' and '-' * SelectorFilterType.VisualUnicodeChars: All Unicode characters are allowed in a selector, except 68 non-visual characters: Control Characters (U+0000–U+001F, U+007F), Format Characters (Category: Cf), Directional Formatting (Category: Cf), Invisible Separator, Common Combining Marks (Category: Mn), Whitespace Characters (non-glyph spacing). * Make `NonVisualUnicodeCharacters` read-only
1 parent 058f615 commit 3a4ad6f

File tree

12 files changed

+782
-180
lines changed

12 files changed

+782
-180
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
using System;
2+
using System.Linq;
3+
using NUnit.Framework;
4+
using SmartFormat.Core.Parsing;
5+
6+
namespace SmartFormat.Tests.Core;
7+
8+
[TestFixture]
9+
internal class CharSetTests
10+
{
11+
[Test]
12+
public void CharSet_Add_Remove()
13+
{
14+
char[] asciiChars = ['A', 'B', 'C'];
15+
char[] nonAsciiChars = ['Ā', 'Б', '中'];
16+
var charSet = new CharSet();
17+
charSet.AddRange(asciiChars.AsEnumerable());
18+
charSet.AddRange(nonAsciiChars.AsSpan());
19+
var countBeforeRemoval = charSet.Count;
20+
var existingRemoved = charSet.Remove('C');
21+
charSet.Remove('中');
22+
// trying to remove a not existing char returns false
23+
var nonExistingRemoved = charSet.Remove('?');
24+
var count = charSet.Count;
25+
26+
Assert.Multiple(() =>
27+
{
28+
Assert.That(countBeforeRemoval, Is.EqualTo(asciiChars.Length + nonAsciiChars.Length));
29+
Assert.That(count, Is.EqualTo(countBeforeRemoval - 2));
30+
Assert.That(existingRemoved, Is.True);
31+
Assert.That(nonExistingRemoved, Is.False);
32+
});
33+
}
34+
35+
[Test]
36+
public void CharSet_CreateFromSpan_GetCharacters_Contains()
37+
{
38+
char[] asciiAndNonAscii = ['\0', 'A', 'B', 'C', 'Ā', 'Б', '中'];
39+
var charSet = new CharSet(asciiAndNonAscii.AsSpan());
40+
41+
Assert.Multiple(() =>
42+
{
43+
Assert.That(charSet, Has.Count.EqualTo(7));
44+
Assert.That(charSet.Contains('A'), Is.True); // ASCII
45+
Assert.That(charSet.Contains('\0'), Is.True); // control character
46+
Assert.That(charSet.Contains('中'), Is.True); // non-ASCII
47+
Assert.That(charSet.Contains('?'), Is.False);
48+
Assert.That(charSet.GetCharacters(), Is.EquivalentTo(asciiAndNonAscii));
49+
charSet.Clear();
50+
Assert.That(charSet, Has.Count.EqualTo(0));
51+
Assert.That(charSet.GetCharacters(), Is.Empty);
52+
});
53+
}
54+
}

src/SmartFormat.Tests/Core/ParserTests.cs

Lines changed: 139 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
using NUnit.Framework;
2-
using SmartFormat.Core.Parsing;
3-
using SmartFormat.Core.Settings;
4-
using SmartFormat.Tests.TestUtils;
5-
using System;
1+
using System;
2+
using System.Collections.Generic;
63
using System.IO;
74
using System.Linq;
85
using System.Text.RegularExpressions;
6+
using NUnit.Framework;
7+
using SmartFormat.Core.Parsing;
8+
using SmartFormat.Core.Settings;
9+
using SmartFormat.Tests.TestUtils;
910

1011
namespace SmartFormat.Tests.Core;
1112

@@ -66,9 +67,9 @@ public void Parser_Throws_Exceptions(string format)
6667
Assert.Throws<ParsingErrors>(() => formatter.Test(format, args, "Error"));
6768
}
6869

69-
[TestCase("{V(LU)}")] // braces are illegal
70-
[TestCase("{V LU }")] // blanks are illegal
71-
[TestCase("{VĀLUĒ}")] // 0x100 and 0x112 are illegal chars
70+
[TestCase("{V(LU)}")] // braces are not allowed
71+
[TestCase("{V LU\\}")] // escape char is not allowed
72+
[TestCase("{V?LU,}")] // ? and , are allowed chars
7273
public void Parser_Throws_On_Illegal_Selector_Chars(string format)
7374
{
7475
var parser = GetRegularParser();
@@ -81,9 +82,9 @@ public void Parser_Throws_On_Illegal_Selector_Chars(string format)
8182
{
8283
Assert.Multiple(() =>
8384
{
84-
// Throws, because selector contains 2 illegal characters
85+
// Throws, because selector contains disallowed characters
8586
Assert.That(e, Is.InstanceOf<ParsingErrors>());
86-
Assert.That(((ParsingErrors) e).Issues, Has.Count.EqualTo(2));
87+
Assert.That(((ParsingErrors) e).Issues, Has.Count.GreaterThanOrEqualTo(1));
8788
});
8889
}
8990
}
@@ -154,6 +155,7 @@ public void Parser_Error_Action_Ignore()
154155
// | Literal | Erroneous | | Okay |
155156
var invalidTemplate = "Hello, I'm {Name from {City} {Street}";
156157

158+
// settings must be set before parser instantiation
157159
var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.Ignore}});
158160
using var parsed = parser.ParseFormat(invalidTemplate);
159161

@@ -176,6 +178,7 @@ public void Parser_Error_Action_Ignore()
176178
[TestCase("Hello, I'm {Name from {City} {Street", false)]
177179
public void Parser_Error_Action_MaintainTokens(string invalidTemplate, bool lastItemIsPlaceholder)
178180
{
181+
// settings must be set before parser instantiation
179182
var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.MaintainTokens}});
180183
using var parsed = parser.ParseFormat(invalidTemplate);
181184

@@ -203,8 +206,16 @@ public void Parser_Error_Action_OutputErrorInResult()
203206
{
204207
// | Literal | Erroneous |
205208
var invalidTemplate = "Hello, I'm {Name from {City}";
206-
207-
var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.OutputErrorInResult}});
209+
210+
var parser = GetRegularParser(new SmartSettings
211+
{
212+
Parser = new ParserSettings
213+
{
214+
SelectorCharFilter = SelectorFilterType.Alphanumeric, // default
215+
ErrorAction = ParseErrorAction.OutputErrorInResult
216+
}
217+
});
218+
208219
using var parsed = parser.ParseFormat(invalidTemplate);
209220

210221
Assert.That(parsed.Items, Has.Count.EqualTo(1));
@@ -412,11 +423,11 @@ public void Parser_NotifyParsingError()
412423
});
413424

414425
formatter.Parser.OnParsingFailure += (o, args) => parsingError = args.Errors;
415-
var res = formatter.Format("{NoName {Other} {Same", default(object)!);
426+
var res = formatter.Format("{NoName {Other} {Same");
416427
Assert.Multiple(() =>
417428
{
418429
Assert.That(parsingError!.Issues, Has.Count.EqualTo(3));
419-
Assert.That(parsingError.Issues[2].Issue, Is.EqualTo(new Parser.ParsingErrorText()[SmartFormat.Core.Parsing.Parser.ParsingError.MissingClosingBrace]));
430+
Assert.That(parsingError.Issues[2].Issue, Is.EqualTo(new Parser.ParsingErrorText()[Parser.ParsingError.MissingClosingBrace]));
420431
});
421432
}
422433

@@ -457,6 +468,18 @@ public void Escaping_TheEscapingCharacter_ShouldWork()
457468
Assert.That(result, Is.EqualTo(@"\\aaa\{}bbb ccc\x{}ddd\\"));
458469
}
459470

471+
[Test]
472+
public void Parsing_Selector_With_CharFromBlocklist_ShouldThrow()
473+
{
474+
var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = SelectorFilterType.VisualUnicodeChars } };
475+
var parser = GetRegularParser(settings);
476+
477+
// The newline character is in the default blocklist of disallowed characters
478+
Assert.That(() => parser.ParseFormat("{A\nB}"),
479+
Throws.Exception.InstanceOf<ParsingErrors>().And.Message
480+
.Contains(new Parser.ParsingErrorText()[Parser.ParsingError.InvalidCharactersInSelector]));
481+
}
482+
460483
[Test]
461484
public void StringFormat_Escaping_In_Literal()
462485
{
@@ -534,8 +557,10 @@ public void Parse_Unicode(string formatString, string unicodeLiteral, int itemIn
534557
[TestCase("{%C}", '%')]
535558
public void Selector_With_Custom_Selector_Character(string formatString, char customChar)
536559
{
560+
// settings must be set before parser instantiation
537561
var settings = new SmartSettings();
538-
settings.Parser.AddCustomSelectorChars(new[]{customChar});
562+
settings.Parser.AddCustomSelectorChars([customChar]);
563+
var x = settings.Parser.GetSelectorChars();
539564
var parser = GetRegularParser(settings);
540565
var result = parser.ParseFormat(formatString);
541566

@@ -544,7 +569,7 @@ public void Selector_With_Custom_Selector_Character(string formatString, char cu
544569
Assert.That(placeholder!.Selectors, Has.Count.EqualTo(1));
545570
Assert.Multiple(() =>
546571
{
547-
Assert.That(placeholder!.Selectors, Has.Count.EqualTo(placeholder!.GetSelectors().Count));
572+
Assert.That(placeholder.Selectors, Has.Count.EqualTo(placeholder.GetSelectors().Count));
548573
Assert.That(placeholder.Selectors[0].ToString(), Is.EqualTo(formatString.Substring(1, 2)));
549574
});
550575
}
@@ -553,8 +578,10 @@ public void Selector_With_Custom_Selector_Character(string formatString, char cu
553578
[TestCase("{a°b}", '°')]
554579
public void Selectors_With_Custom_Operator_Character(string formatString, char customChar)
555580
{
556-
var parser = GetRegularParser();
557-
parser.Settings.Parser.AddCustomOperatorChars(new[]{customChar});
581+
// settings must be set before parser instantiation
582+
var settings = new SmartSettings();
583+
settings.Parser.AddCustomOperatorChars([customChar]);
584+
var parser = GetRegularParser(settings);
558585
var result = parser.ParseFormat(formatString);
559586

560587
var placeholder = result.Items[0] as Placeholder;
@@ -568,6 +595,31 @@ public void Selectors_With_Custom_Operator_Character(string formatString, char c
568595
});
569596
}
570597

598+
[TestCase("German |öäüßÖÄÜ!")]
599+
[TestCase("Russian абвгдеёжзийклмн")]
600+
[TestCase("French >éèêëçàùâîô")]
601+
[TestCase("Spanish <áéíóúñü¡¿")]
602+
[TestCase("Portuguese !ãõáâêéíóúç")]
603+
[TestCase("Chinese 汉字测试")]
604+
[TestCase("Arabic مرحبا بالعالم")]
605+
[TestCase("Turkish çğöşüİı")]
606+
[TestCase("Hindi नमस्ते दुनिया")]
607+
public void Selector_WorksWithAllUnicodeChars(string selector)
608+
{
609+
// See https://github.com/axuno/SmartFormat/issues/454
610+
611+
// settings must be set before parser instantiation
612+
var settings = new SmartSettings { Parser = { SelectorCharFilter = SelectorFilterType.VisualUnicodeChars } };
613+
const string expected = "The Value";
614+
// The default formatter with default settings should be able to handle any
615+
// Unicode characters in selectors except the "magic" disallowed ones
616+
var formatter = Smart.CreateDefaultSmartFormat(settings);
617+
// Use the Unicode string as a selector of the placeholder
618+
var template = $"{{{selector}}}";
619+
var result = formatter.Format(template, new Dictionary<string, string> { { selector, expected } });
620+
Assert.That(result, Is.EqualTo(expected));
621+
}
622+
571623
[TestCase("{A?.B}")]
572624
[TestCase("{Selector0?.Selector1}")]
573625
[TestCase("{A?[1].B}")]
@@ -622,10 +674,11 @@ public void Selector_With_Nullable_Operator_Character(string formatString)
622674
public void Selector_With_Other_Contiguous_Operator_Characters(string formatString, char customChar)
623675
{
624676
// contiguous operator characters are parsed as "ONE operator string"
625-
626-
var parser = GetRegularParser();
677+
var settings = new SmartSettings();
678+
settings.Parser.AddCustomOperatorChars([customChar]);
679+
var parser = GetRegularParser(settings);
627680
// adding '.' is ignored, as it's a standard operator
628-
parser.Settings.Parser.AddCustomOperatorChars(new[]{customChar});
681+
parser.Settings.Parser.AddCustomOperatorChars([customChar]);
629682
var result = parser.ParseFormat(formatString);
630683

631684
var placeholder = result.Items[0] as Placeholder;
@@ -681,6 +734,41 @@ public void ParseInputAsHtml(string input)
681734
Assert.That(literalText!.RawText, Is.EqualTo(input));
682735
}
683736

737+
#region * Parse HTML input without ParserSetting 'IsHtml'
738+
739+
/// <summary>
740+
/// <see cref="ParserSettings.SelectorCharFilter"/> is <see cref="FilterType.Blocklist"/>:
741+
/// all characters are allowed in selectors
742+
/// </summary>
743+
[TestCase("<script>{Placeholder}</script>", "{Placeholder}")]
744+
[TestCase("<style>{Placeholder}</style>", "{Placeholder}")]
745+
[TestCase("Something <style>h1 { color : #000; }</style>! nice", "{ color : #000; }")]
746+
[TestCase("Something <script>{const a = '</script>';}</script>! nice", "{const a = '</script>';}")]
747+
public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string selector)
748+
{
749+
var parser = GetRegularParser(new SmartSettings
750+
{
751+
StringFormatCompatibility = false,
752+
Parser = new ParserSettings
753+
{
754+
SelectorCharFilter = SelectorFilterType.VisualUnicodeChars,
755+
ErrorAction = ParseErrorAction.ThrowError,
756+
ParseInputAsHtml = false
757+
}
758+
});
759+
760+
var result = parser.ParseFormat(input);
761+
Assert.Multiple(() =>
762+
{
763+
Assert.That(result.Items, Has.Count.EqualTo(3));
764+
Assert.That(((Placeholder) result.Items[1]).RawText, Is.EqualTo(selector));
765+
});
766+
}
767+
768+
/// <summary>
769+
/// <see cref="ParserSettings.SelectorCharFilter"/> is <see cref="FilterType.Allowlist"/>:
770+
/// Predefined set of allowed characters in selectors
771+
/// </summary>
684772
[TestCase("<script>{Placeholder}</script>", false)] // should parse a placeholder
685773
[TestCase("<style>{Placeholder}</style>", false)] // should parse a placeholder
686774
[TestCase("Something <style>h1 { color : #000; }</style>! nice", true)] // illegal selector chars
@@ -690,7 +778,12 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shoul
690778
var parser = GetRegularParser(new SmartSettings
691779
{
692780
StringFormatCompatibility = false,
693-
Parser = new ParserSettings { ErrorAction = ParseErrorAction.ThrowError, ParseInputAsHtml = false }
781+
Parser = new ParserSettings
782+
{
783+
SelectorCharFilter = SelectorFilterType.Alphanumeric,
784+
ErrorAction = ParseErrorAction.ThrowError,
785+
ParseInputAsHtml = false
786+
}
694787
});
695788

696789
switch (shouldThrow)
@@ -707,6 +800,8 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shoul
707800
}
708801
}
709802

803+
#endregion
804+
710805
/// <summary>
711806
/// SmartFormat is able to parse script tags, if <see cref="ParserSettings.ParseInputAsHtml"/> is <see langword="true"/>
712807
/// </summary>
@@ -807,29 +902,31 @@ function interpolationSearch(sortedArray, seekIndex) {
807902
[TestCase(true, false)]
808903
public void StyleTags_Can_Be_Parsed_Without_Failure(bool inputIsHtml, bool shouldFail)
809904
{
810-
var styles = @"
811-
<style type='text/css'>
812-
.media {
813-
display: grid;
814-
grid-template-columns: 1fr 3fr;
815-
}
905+
var styles = """
816906
817-
.media .content {
818-
font-size: .8rem;
819-
}
907+
<style type='text/css'>
908+
.media {
909+
display: grid;
910+
grid-template-columns: 1fr 3fr;
911+
}
820912
821-
.comment img {
822-
border: 1px solid grey;
823-
anything: 'xyz'
824-
}
913+
.media .content {
914+
font-size: .8rem;
915+
}
825916
826-
.list-item {
827-
border-bottom: 1px solid grey;
828-
}
829-
/* Comment: { which mixes up the parser without ParserSettings.ParseInputAsHtml = true */
830-
</style>
831-
<p>############### {TheVariable} ###############</p>
832-
";
917+
.comment img {
918+
border: 1px solid grey;
919+
anything: 'xyz'
920+
}
921+
922+
.list-item {
923+
border-bottom: 1px solid grey;
924+
}
925+
/* Comment: { which mixes up the parser without ParserSettings.ParseInputAsHtml = true */
926+
</style>
927+
<p>############### {TheVariable} ###############</p>
928+
929+
""";
833930
var parsingFailures = 0;
834931
var parser = GetRegularParser(new SmartSettings
835932
{

0 commit comments

Comments
 (0)