From 7a33e23a7b5a94500ed62dc7cc9487c873ac4bf6 Mon Sep 17 00:00:00 2001 From: "shubh@DOE" Date: Wed, 3 Dec 2025 00:44:15 -0800 Subject: [PATCH 1/5] init --- libc/src/stdio/printf_core/CMakeLists.txt | 5 ++ libc/src/stdio/printf_core/char_converter.h | 29 ++++++- libc/src/stdio/printf_core/parser.h | 13 ++- .../test/src/stdio/printf_core/CMakeLists.txt | 2 + .../src/stdio/printf_core/converter_test.cpp | 82 ++++++++++++++++++- .../src/stdio/printf_core/parser_test.cpp | 19 +++++ 6 files changed, 142 insertions(+), 8 deletions(-) diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 624129b2b36e7..798e8706b5331 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -67,6 +67,7 @@ add_header_library( parser.h DEPENDS .core_structs + libc.hdr.types.wint_t.h libc.src.__support.arg_list libc.src.__support.ctype_utils libc.src.__support.str_to_integer @@ -111,6 +112,8 @@ add_header_library( .printf_config .writer libc.include.inttypes + libc.hdr.types.wchar_t + libc.hdr.types.wint_t libc.src.__support.big_int libc.src.__support.common libc.src.__support.CPP.limits @@ -123,6 +126,8 @@ add_header_library( libc.src.__support.integer_to_string libc.src.__support.libc_assert libc.src.__support.uint128 + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.wcrtomb libc.src.__support.StringUtil.error_to_string libc.src.string.memory_utils.inline_memcpy ) diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h index fd2eb2553887a..43d3ebb70e708 100644 --- a/libc/src/stdio/printf_core/char_converter.h +++ b/libc/src/stdio/printf_core/char_converter.h @@ -1,4 +1,4 @@ -//===-- String Converter for printf -----------------------------*- C++ -*-===// +//===-- Character Converter for printf --------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,7 +9,11 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H +#include "hdr/types/wchar_t.h" +#include "hdr/types/wint_t.h" #include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcrtomb.h" #include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/writer.h" @@ -21,7 +25,6 @@ template LIBC_INLINE int convert_char(Writer *writer, const FormatSection &to_conv) { char c = static_cast(to_conv.conv_val_raw); - constexpr int STRING_LEN = 1; size_t padding_spaces = @@ -33,7 +36,27 @@ LIBC_INLINE int convert_char(Writer *writer, RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces)); } - RET_IF_RESULT_NEGATIVE(writer->write(c)); + if (to_conv.length_modifier == LengthModifier::l) { + wint_t wi = static_cast(to_conv.conv_val_raw); + + if (wi == WEOF) { + return -1; + } + + char mb_str[MB_LEN_MAX]; + internal::mbstate mbstate; + wchar_t wc = static_cast(wi); + + auto ret = internal::wcrtomb(mb_str, wc, &internal_mbstate); + if (!ret.has_value()) { + return -1; + } + + RET_IF_RESULT_NEGATIVE(writer->write({mb_str, ret.value()})); + + } else { + RET_IF_RESULT_NEGATIVE(writer->write(c)); + } // If the padding is on the right side, write the spaces last. if (padding_spaces > 0 && diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index cef9b1ae58fa0..dea1ff966b6bb 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H +#include "hdr/types/wint_t.h" #include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/algorithm.h" // max #include "src/__support/CPP/limits.h" @@ -73,9 +74,9 @@ template class Parser { ArgProvider args_cur; #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE - // args_start stores the start of the va_args, which is allows getting the - // value of arguments that have already been passed. args_index is tracked so - // that we know which argument args_cur is on. + // args_start stores the start of the va_args, which helps in getting the + // number of arguments that have already been passed. args_index is tracked + // so that we know which argument args_cur is on. ArgProvider args_start; size_t args_index = 1; @@ -173,7 +174,11 @@ template class Parser { section.has_conv = true; break; case ('c'): - WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); + if (section.length_modifier == LengthModifier::l) { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, wint_t, conv_index); + } else { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); + } break; case ('d'): case ('i'): diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt index ff7ebbc4f5fd0..f65d2baef41c7 100644 --- a/libc/test/src/stdio/printf_core/CMakeLists.txt +++ b/libc/test/src/stdio/printf_core/CMakeLists.txt @@ -7,6 +7,7 @@ add_libc_unittest( LINK_LIBRARIES LibcPrintfHelpers DEPENDS + libc.hdr.types.wchar_t libc.src.stdio.printf_core.parser libc.src.stdio.printf_core.core_structs libc.src.__support.CPP.string_view @@ -32,6 +33,7 @@ add_libc_unittest( SRCS converter_test.cpp DEPENDS + libc.hdr.types.wchar_t libc.src.stdio.printf_core.converter libc.src.stdio.printf_core.writer libc.src.stdio.printf_core.core_structs diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp index 2dae2a22c864c..296df9d9e6fa5 100644 --- a/libc/test/src/stdio/printf_core/converter_test.cpp +++ b/libc/test/src/stdio/printf_core/converter_test.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// +#include "hdr/types/wchar_t.h" #include "src/stdio/printf_core/converter.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/writer.h" - #include "test/UnitTest/Test.h" class LlvmLibcPrintfConverterTest : public LIBC_NAMESPACE::testing::Test { @@ -255,3 +255,83 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) { ASSERT_STREQ(str, "1234"); ASSERT_EQ(writer.get_chars_written(), size_t{4}); } + +TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) { + LIBC_NAMESPACE::printf_core::FormatSection section; + section.has_conv = true; + section.raw_string = "%lc"; + section.conv_name = 'c'; + section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l; + section.conv_val_raw = static_cast(L'€'); + + LIBC_NAMESPACE::printf_core::convert(&writer, section); + + wb.buff[wb.buff_cur] = '\0'; + + ASSERT_STREQ(str, "€"); + ASSERT_EQ(writer.get_chars_written(), size_t{1}); +} + +TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) { + LIBC_NAMESPACE::printf_core::FormatSection left_justified_conv; + left_justified_conv.has_conv = true; + left_justified_conv.raw_string = "%-4lc"; + left_justified_conv.conv_name = 'c'; + left_justified_conv.length_modifier = + LIBC_NAMESPACE::printf_core::LengthModifier::l; + left_justified_conv.flags = + LIBC_NAMESPACE::printf_core::FormatFlags::LEFT_JUSTIFIED; + left_justified_conv.min_width = 4; + left_justified_conv.conv_val_raw = static_cast(L'€'); + + LIBC_NAMESPACE::printf_core::convert(&writer, left_justified_conv); + wb.buff[wb.buff_cur] = '\0'; + + ASSERT_STREQ(str, "€ "); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); +} + +TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) { + LIBC_NAMESPACE::printf_core::FormatSection right_justified_conv; + right_justified_conv.has_conv = true; + right_justified_conv.raw_string = "%4lc"; + right_justified_conv.conv_name = 'c'; + right_justified_conv.length_modifier = + LIBC_NAMESPACE::printf_core::LengthModifier::l; + right_justified_conv.min_width = 4; + right_justified_conv.conv_val_raw = static_cast(L'€'); + + LIBC_NAMESPACE::printf_core::convert(&writer, right_justified_conv); + wb.buff[wb.buff_cur] = '\0'; + + ASSERT_STREQ(str, " €"); + ASSERT_EQ(writer.get_chars_written(), size_t{4}); +} + +TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionInvalid) { + LIBC_NAMESPACE::printf_core::FormatSection section; + section.has_conv = true; + section.raw_string = "%lc"; + section.conv_name = 'c'; + section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l; + // An invalid wide character. + section.conv_val_raw = static_cast(0xFFFFFFFF); + + int ret = LIBC_NAMESPACE::printf_core::convert(&writer, section); + + ASSERT_EQ(ret, -1); +} + +TEST_F(LlvmLibcPrintfConverterTest, WideCharWEOFConversion) { + LIBC_NAMESPACE::printf_core::FormatSection section; + section.has_conv = true; + section.raw_string = "%lc"; + section.conv_name = 'c'; + section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l; + // WEOF value. + section.conv_val_raw = static_cast(WEOF); + + int ret = LIBC_NAMESPACE::printf_core::convert(&writer, section); + + ASSERT_EQ(ret, -1); +} diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp index 9d192828860f7..b2edf2b61d992 100644 --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "hdr/types/wchar_t.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/string_view.h" #include "src/__support/arg_list.h" @@ -370,6 +371,24 @@ TEST(LlvmLibcPrintfParserTest, ASSERT_PFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcPrintfParserTest, EvalOneArgWithWideCharacter) { + LIBC_NAMESPACE::printf_core::FormatSection format_arr[2]; + const char *str = "%lc"; + wchar_t arg1 = L'€'; + evaluate(format_arr, str, arg1); + + LIBC_NAMESPACE::printf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = {str, 3}; + expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l; + expected.conv_val_raw = + static_cast::StorageType>(arg1); + expected.conv_name = 'c'; + + ASSERT_PFORMAT_EQ(expected, format_arr[0]); +} + #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE TEST(LlvmLibcPrintfParserTest, IndexModeOneArg) { From a9c58577c6e98e1dcd3068202fcdfee532eff12e Mon Sep 17 00:00:00 2001 From: "shubh@DOE" Date: Wed, 3 Dec 2025 00:48:59 -0800 Subject: [PATCH 2/5] fix build --- libc/src/stdio/printf_core/char_converter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h index 43d3ebb70e708..a05e9eb6f1a40 100644 --- a/libc/src/stdio/printf_core/char_converter.h +++ b/libc/src/stdio/printf_core/char_converter.h @@ -47,7 +47,7 @@ LIBC_INLINE int convert_char(Writer *writer, internal::mbstate mbstate; wchar_t wc = static_cast(wi); - auto ret = internal::wcrtomb(mb_str, wc, &internal_mbstate); + auto ret = internal::wcrtomb(mb_str, wc, &mbstate); if (!ret.has_value()) { return -1; } From cef5d5bbf34b10da45ba8edf9c617b54144b2891 Mon Sep 17 00:00:00 2001 From: "shubh@DOE" Date: Thu, 4 Dec 2025 10:39:43 -0800 Subject: [PATCH 3/5] weof fix --- libc/src/stdio/printf_core/CMakeLists.txt | 1 + libc/src/stdio/printf_core/char_converter.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 798e8706b5331..6d1866ec755f2 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -114,6 +114,7 @@ add_header_library( libc.include.inttypes libc.hdr.types.wchar_t libc.hdr.types.wint_t + libc.hdr.wchar_macros libc.src.__support.big_int libc.src.__support.common libc.src.__support.CPP.limits diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h index a05e9eb6f1a40..e46507fb6d154 100644 --- a/libc/src/stdio/printf_core/char_converter.h +++ b/libc/src/stdio/printf_core/char_converter.h @@ -11,6 +11,7 @@ #include "hdr/types/wchar_t.h" #include "hdr/types/wint_t.h" +#include "hdr/wchar_macros.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" #include "src/__support/wchar/wcrtomb.h" From a9e0814ee0d10ca905ce19b3a49bb7326c2a295f Mon Sep 17 00:00:00 2001 From: "shubh@DOE" Date: Thu, 4 Dec 2025 11:14:18 -0800 Subject: [PATCH 4/5] fix test errors --- libc/test/src/stdio/printf_core/CMakeLists.txt | 1 + libc/test/src/stdio/printf_core/converter_test.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt index f65d2baef41c7..d3be991f9f226 100644 --- a/libc/test/src/stdio/printf_core/CMakeLists.txt +++ b/libc/test/src/stdio/printf_core/CMakeLists.txt @@ -34,6 +34,7 @@ add_libc_unittest( converter_test.cpp DEPENDS libc.hdr.types.wchar_t + libc.hdr.wchar_macros libc.src.stdio.printf_core.converter libc.src.stdio.printf_core.writer libc.src.stdio.printf_core.core_structs diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp index 296df9d9e6fa5..9476b799c9be3 100644 --- a/libc/test/src/stdio/printf_core/converter_test.cpp +++ b/libc/test/src/stdio/printf_core/converter_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "hdr/types/wchar_t.h" +#include "hdr/wchar_macros.h" #include "src/stdio/printf_core/converter.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/writer.h" @@ -269,7 +270,7 @@ TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "€"); - ASSERT_EQ(writer.get_chars_written(), size_t{1}); + ASSERT_EQ(writer.get_chars_written(), size_t{3}); } TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) { @@ -288,7 +289,7 @@ TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, "€ "); - ASSERT_EQ(writer.get_chars_written(), size_t{4}); + ASSERT_EQ(writer.get_chars_written(), size_t{6}); } TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) { @@ -305,7 +306,7 @@ TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) { wb.buff[wb.buff_cur] = '\0'; ASSERT_STREQ(str, " €"); - ASSERT_EQ(writer.get_chars_written(), size_t{4}); + ASSERT_EQ(writer.get_chars_written(), size_t{6}); } TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionInvalid) { From 15a3e256a53bc30f6796d66bb2277e55e1e3edcb Mon Sep 17 00:00:00 2001 From: "shubh@DOE" Date: Sat, 6 Dec 2025 16:25:36 -0800 Subject: [PATCH 5/5] add snprintf test, flag for windows --- libc/docs/dev/printf_behavior.rst | 6 +++ libc/src/stdio/printf_core/char_converter.h | 4 ++ libc/test/src/stdio/CMakeLists.txt | 2 + libc/test/src/stdio/snprintf_test.cpp | 44 +++++++++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/libc/docs/dev/printf_behavior.rst b/libc/docs/dev/printf_behavior.rst index 01ab128a1f238..ec72113eff972 100644 --- a/libc/docs/dev/printf_behavior.rst +++ b/libc/docs/dev/printf_behavior.rst @@ -71,6 +71,12 @@ conversions (%r, %k); any fixed point number conversion will be treated as invalid. This reduces code size. This has no effect if the current compiler does not support fixed point numbers. +LIBC_COPT_PRINTF_DISABLE_WCHAR_T +-------------------------------- +When set, this flag disables support for wide characters (%lc). Any conversions +will be ignored. This reduces code size. This will be set by default on windows +platforms as current printf does not support UTF-16 wide characters. + LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS ---------------------------------- When set, this flag disables the nullptr checks in %n and %s. diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h index e46507fb6d154..4db6d44d94d7d 100644 --- a/libc/src/stdio/printf_core/char_converter.h +++ b/libc/src/stdio/printf_core/char_converter.h @@ -37,6 +37,7 @@ LIBC_INLINE int convert_char(Writer *writer, RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces)); } +#ifndef LIBC_TARGET_OS_IS_WINDOWS if (to_conv.length_modifier == LengthModifier::l) { wint_t wi = static_cast(to_conv.conv_val_raw); @@ -56,8 +57,11 @@ LIBC_INLINE int convert_char(Writer *writer, RET_IF_RESULT_NEGATIVE(writer->write({mb_str, ret.value()})); } else { + #endif // !LIBC_TARGET_OS_IS_WINDOWS RET_IF_RESULT_NEGATIVE(writer->write(c)); + #ifndef LIBC_TARGET_OS_IS_WINDOWS } + #endif // !LIBC_TARGET_OS_IS_WINDOWS // If the padding is on the right side, write the spaces last. if (padding_spaces > 0 && diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index a39428fb8d16c..22d593a682960 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -159,6 +159,8 @@ add_libc_test( SRCS snprintf_test.cpp DEPENDS + libc.hdr.types.wchar_t + libc.hdr.wchar_macros libc.src.stdio.snprintf ) diff --git a/libc/test/src/stdio/snprintf_test.cpp b/libc/test/src/stdio/snprintf_test.cpp index 95507e0885dbf..43f44b521a119 100644 --- a/libc/test/src/stdio/snprintf_test.cpp +++ b/libc/test/src/stdio/snprintf_test.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "hdr/types/wchar_t.h" +#include "hdr/wchar_macros.h" #include "src/stdio/snprintf.h" #include "test/UnitTest/ErrnoCheckingTest.h" @@ -74,3 +76,45 @@ TEST(LlvmLibcSNPrintfTest, CharsWrittenOverflow) { EXPECT_LT(written, 0); ASSERT_ERRNO_FAILURE(); } + +#ifndef LIBC_TARGET_OS_IS_WINDOWS +TEST(LlvmLibcSNPrintfTest, WideCharConversion) { + char buff[16]; + int written; + + written = LIBC_NAMESPACE::snprintf(buff, sizeof(buff), "%lc", + static_cast(L'€')); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "€"); +} + +TEST(LlvmLibcSNPrintfTest, WideCharConversionLeftJustified) { + char buff[16]; + int written; + + written = LIBC_NAMESPACE::snprintf(buff, sizeof(buff), "%-4lc", + static_cast(L'€')); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "€ "); +} + +TEST(LlvmLibcSNPrintfTest, WideCharConversionRightJustified) { + char buff[16]; + int written; + + written = LIBC_NAMESPACE::snprintf(buff, sizeof(buff), "%4lc", + static_cast(L'€')); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, " €"); +} + +TEST(LlvmLibcSNPrintfTest, WideCharWEOFConversion) { + char buff[16]; + int written; + + written = LIBC_NAMESPACE::snprintf(buff, sizeof(buff), "%lc", + static_cast(WEOF)); + EXPECT_EQ(written, -1); + ASSERT_ERRNO_FAILURE(); +} +#endif // !LIBC_TARGET_OS_IS_WINDOWS \ No newline at end of file