Skip to content

Commit 6209c9b

Browse files
committed
Allow base32 decoder to auto-pad truncated blocks
Introduce PadResult, trim/pad incomplete base32 chunks, emit decoded prefixes, and still return error: invalid input in line with GNU basenc.
1 parent 5b3cd54 commit 6209c9b

File tree

2 files changed

+47
-20
lines changed

2 files changed

+47
-20
lines changed

src/uu/base32/src/base_common.rs

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use clap::{Arg, ArgAction, Command};
99
use std::ffi::OsString;
1010
use std::fs::File;
11-
use std::io::{self, ErrorKind, Read, Seek};
11+
use std::io::{self, ErrorKind, Read, Seek, Write};
1212
use std::path::{Path, PathBuf};
1313
use uucore::display::Quotable;
1414
use uucore::encoding::{
@@ -193,7 +193,7 @@ pub fn handle_input<R: Read + Seek>(input: &mut R, format: Format, config: Confi
193193

194194
let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();
195195
let mut stdout_lock = io::stdout().lock();
196-
if config.decode {
196+
let result = if config.decode {
197197
fast_decode::fast_decode(
198198
read,
199199
&mut stdout_lock,
@@ -207,6 +207,14 @@ pub fn handle_input<R: Read + Seek>(input: &mut R, format: Format, config: Confi
207207
supports_fast_decode_and_encode_ref,
208208
config.wrap_cols,
209209
)
210+
};
211+
212+
// Ensure any pending stdout buffer is flushed even if decoding failed; GNU basenc
213+
// keeps already-decoded bytes visible before reporting the error.
214+
match (result, stdout_lock.flush()) {
215+
(res, Ok(())) => res,
216+
(Ok(_), Err(err)) => Err(err.into()),
217+
(Err(original), Err(_)) => Err(original),
210218
}
211219
}
212220

@@ -531,6 +539,7 @@ pub mod fast_decode {
531539
fn write_to_output(decoded_buffer: &mut Vec<u8>, output: &mut dyn Write) -> io::Result<()> {
532540
// Write all data in `decoded_buffer` to `output`
533541
output.write_all(decoded_buffer.as_slice())?;
542+
output.flush()?;
534543

535544
decoded_buffer.clear();
536545

@@ -584,16 +593,6 @@ pub mod fast_decode {
584593
assert!(decode_in_chunks_of_size > 0);
585594
assert!(valid_multiple > 0);
586595

587-
if !ignore_garbage {
588-
// Match GNU basenc: fail fast when any non alphabet/non newline slips through without -i.
589-
if input
590-
.iter()
591-
.any(|&byte| byte != b'\n' && byte != b'\r' && !alphabet_table[usize::from(byte)])
592-
{
593-
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
594-
}
595-
}
596-
597596
// Start of buffers
598597

599598
// Decoded data that needs to be written to `output`
@@ -650,11 +649,22 @@ pub mod fast_decode {
650649
}
651650

652651
if !buffer.is_empty() {
653-
let padded = supports_fast_decode_and_encode.pad_remainder(&buffer);
654-
let final_chunk = padded.as_deref().unwrap_or(&buffer);
652+
let mut owned_chunk: Option<Vec<u8>> = None;
653+
let mut had_invalid_tail = false;
654+
655+
if let Some(pad_result) = supports_fast_decode_and_encode.pad_remainder(&buffer) {
656+
had_invalid_tail = pad_result.had_invalid_tail;
657+
owned_chunk = Some(pad_result.chunk);
658+
}
659+
660+
let final_chunk = owned_chunk.as_deref().unwrap_or(&buffer);
655661

656662
supports_fast_decode_and_encode.decode_into_vec(final_chunk, &mut decoded_buffer)?;
657663
write_to_output(&mut decoded_buffer, output)?;
664+
665+
if had_invalid_tail {
666+
return Err(USimpleError::new(1, "error: invalid input".to_owned()));
667+
}
658668
}
659669

660670
Ok(())

src/uucore/src/lib/features/encoding.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ impl EncodingWrapper {
214214
}
215215
}
216216

217+
pub struct PadResult {
218+
pub chunk: Vec<u8>,
219+
pub had_invalid_tail: bool,
220+
}
221+
217222
pub trait SupportsFastDecodeAndEncode {
218223
/// Returns the list of characters used by this encoding
219224
fn alphabet(&self) -> &'static [u8];
@@ -255,7 +260,7 @@ pub trait SupportsFastDecodeAndEncode {
255260

256261
/// Gives encoding-specific logic a chance to pad a trailing, non-empty remainder
257262
/// before the final decode attempt. The default implementation opts out.
258-
fn pad_remainder(&self, _remainder: &[u8]) -> Option<Vec<u8>> {
263+
fn pad_remainder(&self, _remainder: &[u8]) -> Option<PadResult> {
259264
None
260265
}
261266
}
@@ -561,21 +566,33 @@ impl SupportsFastDecodeAndEncode for Base32Wrapper {
561566
self.inner.valid_decoding_multiple()
562567
}
563568

564-
fn pad_remainder(&self, remainder: &[u8]) -> Option<Vec<u8>> {
569+
fn pad_remainder(&self, remainder: &[u8]) -> Option<PadResult> {
565570
if remainder.is_empty() || remainder.contains(&b'=') {
566571
return None;
567572
}
568573

569574
const VALID_REMAINDERS: [usize; 4] = [2, 4, 5, 7];
570575

571-
if !VALID_REMAINDERS.contains(&remainder.len()) {
576+
let mut len = remainder.len();
577+
let mut trimmed = false;
578+
579+
while len > 0 && !VALID_REMAINDERS.contains(&len) {
580+
len -= 1;
581+
trimmed = true;
582+
}
583+
584+
if len == 0 {
572585
return None;
573586
}
574587

575-
let mut padded = remainder.to_vec();
576-
let missing = self.valid_decoding_multiple() - remainder.len();
588+
let mut padded = remainder[..len].to_vec();
589+
let missing = self.valid_decoding_multiple() - padded.len();
577590
padded.extend(std::iter::repeat_n(b'=', missing));
578-
Some(padded)
591+
592+
Some(PadResult {
593+
chunk: padded,
594+
had_invalid_tail: trimmed,
595+
})
579596
}
580597

581598
fn supports_partial_decode(&self) -> bool {

0 commit comments

Comments
 (0)