-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[libc] Support %lc in printf #169983
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[libc] Support %lc in printf #169983
Conversation
|
@llvm/pr-subscribers-libc Author: Shubh Pachchigar (shubhe25p) ChangesFull diff: https://github.com/llvm/llvm-project/pull/169983.diff 4 Files Affected:
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 624129b2b36e7..f35a65de1f110 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -111,6 +111,7 @@ add_header_library(
.printf_config
.writer
libc.include.inttypes
+ libc.hdr.types.wchar_t
libc.src.__support.big_int
libc.src.__support.common
libc.src.__support.CPP.limits
@@ -123,6 +124,8 @@ add_header_library(
libc.src.__support.integer_to_string
libc.src.__support.libc_assert
libc.src.__support.uint128
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.wcrtomb
libc.src.__support.StringUtil.error_to_string
libc.src.string.memory_utils.inline_memcpy
)
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index fd2eb2553887a..31dd34fe7a797 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -1,4 +1,4 @@
-//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//===-- Character Converter for printf -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,10 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
@@ -20,8 +23,11 @@ namespace printf_core {
template <WriteMode write_mode>
LIBC_INLINE int convert_char(Writer<write_mode> *writer,
const FormatSection &to_conv) {
- char c = static_cast<char>(to_conv.conv_val_raw);
-
+ char c;
+ wchar_t wc;
+ char mb_str[MB_LEN_MAX];
+ internal::mbstate internal_mbstate = {0};
+ int ret = 0;
constexpr int STRING_LEN = 1;
size_t padding_spaces =
@@ -33,7 +39,21 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer,
RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
}
- RET_IF_RESULT_NEGATIVE(writer->write(c));
+ if (to_conv.length_modifier == LengthModifier::l) {
+ wc = static_cast<wchar_t>(to_conv.conv_val_raw);
+ ret = internal::wcrtomb(mb_str, wc, &internal_mbstate);
+ if (ret <= 0) {
+ return -1;
+ }
+
+ for (int i = 0; i < ret; i++) {
+ RET_IF_RESULT_NEGATIVE(writer->write(mb_str[i]));
+ }
+
+ } else {
+ c = static_cast<char>(to_conv.conv_val_raw);
+ RET_IF_RESULT_NEGATIVE(writer->write(c));
+ }
// If the padding is on the right side, write the spaces last.
if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
index ff7ebbc4f5fd0..a4c919420777d 100644
--- a/libc/test/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -35,4 +35,5 @@ add_libc_unittest(
libc.src.stdio.printf_core.converter
libc.src.stdio.printf_core.writer
libc.src.stdio.printf_core.core_structs
+ libc.hdr.types.wchar_t
)
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 2dae2a22c864c..026e36747d4df 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -9,7 +9,7 @@
#include "src/stdio/printf_core/converter.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
-
+#include "hdr/types/wchar_t.h"
#include "test/UnitTest/Test.h"
class LlvmLibcPrintfConverterTest : public LIBC_NAMESPACE::testing::Test {
@@ -255,3 +255,56 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) {
ASSERT_STREQ(str, "1234");
ASSERT_EQ(writer.get_chars_written(), size_t{4});
}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) {
+
+ LIBC_NAMESPACE::printf_core::FormatSection section;
+ section.has_conv = true;
+ section.raw_string = "%c";
+ section.conv_name = 'c';
+ section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ section.conv_val_raw = static_cast<wchar_t>(L'S');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, "S");
+ ASSERT_EQ(writer.get_chars_written(), size_t{1});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) {
+ LIBC_NAMESPACE::printf_core::FormatSection left_justified_conv;
+ left_justified_conv.has_conv = true;
+ left_justified_conv.raw_string = "%-4c";
+ left_justified_conv.conv_name = 'c';
+ left_justified_conv.length_modifier =
+ LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ left_justified_conv.flags =
+ LIBC_NAMESPACE::printf_core::FormatFlags::LEFT_JUSTIFIED;
+ left_justified_conv.min_width = 4;
+ left_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, left_justified_conv);
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, "S ");
+ ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) {
+ LIBC_NAMESPACE::printf_core::FormatSection right_justified_conv;
+ right_justified_conv.has_conv = true;
+ right_justified_conv.raw_string = "%4c";
+ right_justified_conv.conv_name = 'c';
+ right_justified_conv.length_modifier =
+ LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ right_justified_conv.min_width = 4;
+ right_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, right_justified_conv);
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, " S");
+ ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
This PR has spun up three new interesting directions, each of which seems feasible with some guidance. I am not sure which one can be pulled into this PR and if it is even an issue. Let me know your thoughts @michaelrj-google:
|
38f014a to
c2a7797
Compare
| const FormatSection &to_conv) { | ||
| char c = static_cast<char>(to_conv.conv_val_raw); | ||
|
|
||
| char c; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please declare local variables in the scope where they're needed / combine this with initialization when possible.
That is, retain char c = static_cast<char>(...) in the else-block below, only declare local arrays / mbstate in the block for ell-specifier etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you so much for the feedback, I will update it.
| RET_IF_RESULT_NEGATIVE(writer->write(c)); | ||
| if (to_conv.length_modifier == LengthModifier::l) { | ||
| wc = static_cast<wchar_t>(static_cast<unsigned int>(to_conv.conv_val_raw)); | ||
| ret = internal::wcrtomb(mb_str, wc, &internal_mbstate); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is this even working, given that internal::wcrtomb returns ErrorOr<size_t> ? Is it possible that your
int ret = internal::wcrtomb(...) works by round-tripping through operator bool conversion for ErrorOr, and thus will always return 0 or 1?
I don't see any tests with wint_t that would be represented as multiple bytes, or wint_t which can't represent a valid Unicode character (e.g. 0x12ffff), they should probably be added.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes exactly, I never realized this, thank you so much. My test was only using a wint_t which is only a single byte long so it worked. I will add more tests.
| return -1; | ||
| } | ||
|
|
||
| for (int i = 0; i < ret; i++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Writer has an API to write a buffer (cpp::string_view), I believe you can call it as writer->write({mb_str, mb_len})
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True it does, I will update the code.
| } | ||
|
|
||
| TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) { | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove empty line
|
|
||
| LIBC_NAMESPACE::printf_core::FormatSection section; | ||
| section.has_conv = true; | ||
| section.raw_string = "%c"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make your raw_string consistent with its broken down into - e.g. here it must be "%lc". Same for test cases below..
| libc.src.stdio.printf_core.converter | ||
| libc.src.stdio.printf_core.writer | ||
| libc.src.stdio.printf_core.core_structs | ||
| libc.hdr.types.wchar_t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please move this above libc.src. dependencies
| section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l; | ||
| section.conv_val_raw = static_cast<wchar_t>(L'S'); | ||
|
|
||
| LIBC_NAMESPACE::printf_core::convert(&writer, section); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you should also consider adding end-to-end test of this functionality - e.g. a test of snprintf invocation which prints converted wide characters to a multi-byte sting, and a test case in libc/test/src/stdio/printf_core/parser_test.cpp to ensure that %lc is parsed correctly.
For example, the Parser should accept the arguments of type wint_t in this case, according to specification - so probably you'd need to update the parsing code.
| char c; | ||
| wchar_t wc; | ||
| char mb_str[MB_LEN_MAX]; | ||
| internal::mbstate internal_mbstate = {0}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: it can be just "mbstate" variable, there's nothing particularly internal about it. Also, its default constructor would do the right thing already.
|
|
||
| RET_IF_RESULT_NEGATIVE(writer->write(c)); | ||
| if (to_conv.length_modifier == LengthModifier::l) { | ||
| wc = static_cast<wchar_t>(static_cast<unsigned int>(to_conv.conv_val_raw)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The argument type would be wint_t per standard, so you might need to handle WEOF and then static_cast.
You mentioned BigInt problems in the review thread, could you elaborate how are these relevant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks this makes sense, what I was referring to was that naively casting from conv_val_raw (BigInt) to wchar_t leads to an error in BigInt to(), as the casting has a guard clause that essentially checks:
struct BigInt {
...
template <typename T>
LIBC_INLINE constexpr cpp::enable_if_t<
cpp::is_integral_v<T> && !cpp::is_same_v<T, bool>, T>
to() const {
... }
is_integral_v will return false if the type is not one of:
char, signed char, unsigned char, short, unsigned short, int,
unsigned int, long, unsigned long, long long, unsigned long long, bool
That is the reason why I cast to unsigned int first. Let me know your thoughts
|
Thank you for starting to work on this!
See comment in CL - I think you need to roundtrip the input arguments through
Could you figure out why is our UTF-32-only wcrtomb gets pulled in by this change? I.e. is there a particular function that is enabled on Windows and depends on the printf functionality?
That sounds like a natural follow-up! |
c2a7797 to
7a33e23
Compare
🐧 Linux x64 Test Results✅ The build succeeded and no tests ran. This is expected in some build configurations. |
| @@ -33,7 +37,27 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer, | |||
| RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces)); | |||
| } | |||
|
|
|||
| RET_IF_RESULT_NEGATIVE(writer->write(c)); | |||
| if (to_conv.length_modifier == LengthModifier::l) { | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this looks good overall, but we'll want to have a compile flag to disable wide characters to save code size. Could you add something similar to LIBC_COPT_PRINTF_DISABLE_STRERROR that just disables the wide character portion? You'll also need to guard the includes at the top of this file and the tests.
| @@ -255,3 +256,83 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) { | |||
| ASSERT_STREQ(str, "1234"); | |||
| ASSERT_EQ(writer.get_chars_written(), size_t{4}); | |||
| } | |||
|
|
|||
| TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) { | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For simplicity most of the functionality testing of the printf conversions is actually done in sprintf_test.cpp. Doing it that way makes the tests less verbose and more accurate to their actual use. I would recommend removing the tests from converter_test.cpp and only having the wide character tests in sprintf_test.cpp.
Add %lc support to libc printf by utilizing wcrtomb internal function, also added relevant unit tests. The work has spun up new issues stated below. Resolves #166598