88use clap:: { Arg , ArgAction , Command } ;
99use std:: ffi:: OsString ;
1010use std:: fs:: File ;
11- use std:: io:: { self , ErrorKind , Read , Seek } ;
11+ use std:: io:: { self , ErrorKind , Read , Seek , Write } ;
1212use std:: path:: { Path , PathBuf } ;
1313use uucore:: display:: Quotable ;
1414use uucore:: encoding:: {
15- BASE2LSBF , BASE2MSBF , Base58Wrapper , Base64SimdWrapper , EncodingWrapper , Format ,
15+ BASE2LSBF , BASE2MSBF , Base32Wrapper , Base58Wrapper , Base64SimdWrapper , EncodingWrapper , Format ,
1616 SupportsFastDecodeAndEncode , Z85Wrapper ,
1717 for_base_common:: { BASE32 , BASE32HEX , BASE64URL , HEXUPPER_PERMISSIVE } ,
1818} ;
@@ -193,7 +193,7 @@ pub fn handle_input<R: Read + Seek>(input: &mut R, format: Format, config: Confi
193193
194194 let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode. as_ref ( ) ;
195195 let mut stdout_lock = io:: stdout ( ) . lock ( ) ;
196- if config. decode {
196+ let result = if config. decode {
197197 fast_decode:: fast_decode (
198198 read,
199199 & mut stdout_lock,
@@ -207,6 +207,14 @@ pub fn handle_input<R: Read + Seek>(input: &mut R, format: Format, config: Confi
207207 supports_fast_decode_and_encode_ref,
208208 config. wrap_cols ,
209209 )
210+ } ;
211+
212+ // Ensure any pending stdout buffer is flushed even if decoding failed; GNU basenc
213+ // keeps already-decoded bytes visible before reporting the error.
214+ match ( result, stdout_lock. flush ( ) ) {
215+ ( res, Ok ( ( ) ) ) => res,
216+ ( Ok ( _) , Err ( err) ) => Err ( err. into ( ) ) ,
217+ ( Err ( original) , Err ( _) ) => Err ( original) ,
210218 }
211219}
212220
@@ -247,14 +255,14 @@ pub fn get_supports_fast_decode_and_encode(
247255 // spell-checker:disable-next-line
248256 b"01" ,
249257 ) ) ,
250- Format :: Base32 => Box :: from ( EncodingWrapper :: new (
258+ Format :: Base32 => Box :: from ( Base32Wrapper :: new (
251259 BASE32 ,
252260 BASE32_VALID_DECODING_MULTIPLE ,
253261 BASE32_UNPADDED_MULTIPLE ,
254262 // spell-checker:disable-next-line
255263 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=" ,
256264 ) ) ,
257- Format :: Base32Hex => Box :: from ( EncodingWrapper :: new (
265+ Format :: Base32Hex => Box :: from ( Base32Wrapper :: new (
258266 BASE32HEX ,
259267 BASE32_VALID_DECODING_MULTIPLE ,
260268 BASE32_UNPADDED_MULTIPLE ,
@@ -502,43 +510,21 @@ pub mod fast_encode {
502510
503511pub mod fast_decode {
504512 use std:: io:: { self , Write } ;
505- use uucore:: { encoding:: SupportsFastDecodeAndEncode , error:: UResult } ;
513+ use uucore:: {
514+ encoding:: SupportsFastDecodeAndEncode ,
515+ error:: { UResult , USimpleError } ,
516+ } ;
506517
507518 // Start of helper functions
508- fn alphabet_to_table ( alphabet : & [ u8 ] , ignore_garbage : bool ) -> [ bool ; 256 ] {
509- // If `ignore_garbage` is enabled, all characters outside the alphabet are ignored
510- // If it is not enabled, only '\n' and '\r' are ignored
511- if ignore_garbage {
512- // Note: "false" here
513- let mut table = [ false ; 256 ] ;
514-
515- // Pass through no characters except those in the alphabet
516- for ue in alphabet {
517- let us = usize:: from ( * ue) ;
518-
519- // Should not have been set yet
520- assert ! ( !table[ us] ) ;
521-
522- table[ us] = true ;
523- }
519+ fn alphabet_lookup ( alphabet : & [ u8 ] ) -> [ bool ; 256 ] {
520+ // Precompute O(1) membership checks so we can validate every byte before decoding.
521+ let mut table = [ false ; 256 ] ;
524522
525- table
526- } else {
527- // Note: "true" here
528- let mut table = [ true ; 256 ] ;
529-
530- // Pass through all characters except '\n' and '\r'
531- for ue in [ b'\n' , b'\r' ] {
532- let us = usize:: from ( ue) ;
533-
534- // Should not have been set yet
535- assert ! ( table[ us] ) ;
536-
537- table[ us] = false ;
538- }
539-
540- table
523+ for & byte in alphabet {
524+ table[ usize:: from ( byte) ] = true ;
541525 }
526+
527+ table
542528 }
543529
544530 fn decode_in_chunks_to_buffer (
@@ -553,11 +539,44 @@ pub mod fast_decode {
553539 fn write_to_output ( decoded_buffer : & mut Vec < u8 > , output : & mut dyn Write ) -> io:: Result < ( ) > {
554540 // Write all data in `decoded_buffer` to `output`
555541 output. write_all ( decoded_buffer. as_slice ( ) ) ?;
542+ output. flush ( ) ?;
556543
557544 decoded_buffer. clear ( ) ;
558545
559546 Ok ( ( ) )
560547 }
548+
549+ fn flush_ready_chunks (
550+ buffer : & mut Vec < u8 > ,
551+ block_limit : usize ,
552+ valid_multiple : usize ,
553+ supports_fast_decode_and_encode : & dyn SupportsFastDecodeAndEncode ,
554+ decoded_buffer : & mut Vec < u8 > ,
555+ output : & mut dyn Write ,
556+ ) -> UResult < ( ) > {
557+ // While at least one full decode block is buffered, keep draining
558+ // it and never yield more than block_limit per chunk.
559+ while buffer. len ( ) >= valid_multiple {
560+ let take = buffer. len ( ) . min ( block_limit) ;
561+ let aligned_take = take - ( take % valid_multiple) ;
562+
563+ if aligned_take < valid_multiple {
564+ break ;
565+ }
566+
567+ decode_in_chunks_to_buffer (
568+ supports_fast_decode_and_encode,
569+ & buffer[ ..aligned_take] ,
570+ decoded_buffer,
571+ ) ?;
572+
573+ write_to_output ( decoded_buffer, output) ?;
574+
575+ buffer. drain ( ..aligned_take) ;
576+ }
577+
578+ Ok ( ( ) )
579+ }
561580 // End of helper functions
562581
563582 pub fn fast_decode (
@@ -569,22 +588,12 @@ pub mod fast_decode {
569588 const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE : usize = 1_024 ;
570589
571590 let alphabet = supports_fast_decode_and_encode. alphabet ( ) ;
572- let decode_in_chunks_of_size = supports_fast_decode_and_encode. valid_decoding_multiple ( )
573- * DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE ;
591+ let alphabet_table = alphabet_lookup ( alphabet) ;
592+ let valid_multiple = supports_fast_decode_and_encode. valid_decoding_multiple ( ) ;
593+ let decode_in_chunks_of_size = valid_multiple * DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE ;
574594
575595 assert ! ( decode_in_chunks_of_size > 0 ) ;
576-
577- // Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
578- // "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
579- // passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also
580- // allows execution to stay on the happy path in "data-encoding":
581- // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756
582- // It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is
583- // false.
584- // Note that the alphabet constants above already include the padding characters
585- // TODO
586- // Precompute this
587- let table = alphabet_to_table ( alphabet, ignore_garbage) ;
596+ assert ! ( valid_multiple > 0 ) ;
588597
589598 // Start of buffers
590599
@@ -595,35 +604,69 @@ pub mod fast_decode {
595604
596605 let mut buffer = Vec :: with_capacity ( decode_in_chunks_of_size) ;
597606
598- input
599- . iter ( )
600- . filter ( |ch| table[ usize:: from ( * * ch) ] )
601- . for_each ( |ch| {
602- buffer. push ( * ch) ;
603- // How many bytes to steal from `read_buffer` to get
604- // `leftover_buffer` to the right size
605- if buffer. len ( ) == decode_in_chunks_of_size {
606- assert_eq ! ( decode_in_chunks_of_size, buffer. len( ) ) ;
607- // Decode data in chunks, then place it in `decoded_buffer`
608- decode_in_chunks_to_buffer (
609- supports_fast_decode_and_encode,
610- & buffer,
611- & mut decoded_buffer,
612- )
613- . unwrap ( ) ;
614- // Write all data in `decoded_buffer` to `output`
615- write_to_output ( & mut decoded_buffer, output) . unwrap ( ) ;
616- buffer. clear ( ) ;
617- }
618- } ) ;
619- // Cleanup
620- // `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed
621- {
622- // Decode all remaining encoded bytes, placing them in `decoded_buffer`
623- supports_fast_decode_and_encode. decode_into_vec ( & buffer, & mut decoded_buffer) ?;
607+ let supports_partial_decode = supports_fast_decode_and_encode. supports_partial_decode ( ) ;
624608
625- // Write all data in `decoded_buffer` to `output`
609+ for & byte in & input {
610+ if byte == b'\n' || byte == b'\r' {
611+ continue ;
612+ }
613+
614+ if alphabet_table[ usize:: from ( byte) ] {
615+ buffer. push ( byte) ;
616+ } else if ignore_garbage {
617+ continue ;
618+ } else {
619+ return Err ( USimpleError :: new ( 1 , "error: invalid input" . to_owned ( ) ) ) ;
620+ }
621+
622+ if supports_partial_decode {
623+ flush_ready_chunks (
624+ & mut buffer,
625+ decode_in_chunks_of_size,
626+ valid_multiple,
627+ supports_fast_decode_and_encode,
628+ & mut decoded_buffer,
629+ output,
630+ ) ?;
631+ } else if buffer. len ( ) == decode_in_chunks_of_size {
632+ decode_in_chunks_to_buffer (
633+ supports_fast_decode_and_encode,
634+ & buffer,
635+ & mut decoded_buffer,
636+ ) ?;
637+ write_to_output ( & mut decoded_buffer, output) ?;
638+ buffer. clear ( ) ;
639+ }
640+ }
641+
642+ if supports_partial_decode {
643+ flush_ready_chunks (
644+ & mut buffer,
645+ decode_in_chunks_of_size,
646+ valid_multiple,
647+ supports_fast_decode_and_encode,
648+ & mut decoded_buffer,
649+ output,
650+ ) ?;
651+ }
652+
653+ if !buffer. is_empty ( ) {
654+ let mut owned_chunk: Option < Vec < u8 > > = None ;
655+ let mut had_invalid_tail = false ;
656+
657+ if let Some ( pad_result) = supports_fast_decode_and_encode. pad_remainder ( & buffer) {
658+ had_invalid_tail = pad_result. had_invalid_tail ;
659+ owned_chunk = Some ( pad_result. chunk ) ;
660+ }
661+
662+ let final_chunk = owned_chunk. as_deref ( ) . unwrap_or ( & buffer) ;
663+
664+ supports_fast_decode_and_encode. decode_into_vec ( final_chunk, & mut decoded_buffer) ?;
626665 write_to_output ( & mut decoded_buffer, output) ?;
666+
667+ if had_invalid_tail {
668+ return Err ( USimpleError :: new ( 1 , "error: invalid input" . to_owned ( ) ) ) ;
669+ }
627670 }
628671
629672 Ok ( ( ) )
0 commit comments