@@ -123,9 +123,9 @@ pub(crate) fn decode_suffix(
123123 // TODO how do we know this?
124124 debug_assert ! ( morsels_in_leftover != 1 && morsels_in_leftover != 5 ) ;
125125 let leftover_bytes_to_append = morsels_in_leftover * 6 / 8 ;
126- let leftover_bits_to_append = leftover_bytes_to_append * 8 ;
127- // A couple percent speedup from nudging these ORs to use more ILP with a two-way split
128- let leftover_bits = ( ( u64:: from ( morsels[ 0 ] ) << 58 )
126+ // Put the up to 6 complete bytes as the high bytes.
127+ // Gain a couple percent speedup from nudging these ORs to use more ILP with a two-way split.
128+ let mut leftover_num = ( ( u64:: from ( morsels[ 0 ] ) << 58 )
129129 | ( u64:: from ( morsels[ 1 ] ) << 52 )
130130 | ( u64:: from ( morsels[ 2 ] ) << 46 )
131131 | ( u64:: from ( morsels[ 3 ] ) << 40 ) )
@@ -136,24 +136,22 @@ pub(crate) fn decode_suffix(
136136
137137 // if there are bits set outside the bits we care about, last symbol encodes trailing bits that
138138 // will not be included in the output
139- let mask = !0 >> leftover_bits_to_append ;
140- if !decode_allow_trailing_bits && ( leftover_bits & mask) != 0 {
139+ let mask = !0 >> ( leftover_bytes_to_append * 8 ) ;
140+ if !decode_allow_trailing_bits && ( leftover_num & mask) != 0 {
141141 // last morsel is at `morsels_in_leftover` - 1
142142 return Err ( DecodeError :: InvalidLastSymbol (
143143 start_of_leftovers + morsels_in_leftover - 1 ,
144144 last_symbol,
145145 ) ) ;
146146 }
147147
148- // TODO benchmark simply converting to big endian bytes
149- let mut leftover_bits_appended_to_buf = 0 ;
150- while leftover_bits_appended_to_buf < leftover_bits_to_append {
151- // `as` simply truncates the higher bits, which is what we want here
152- let selected_bits = ( leftover_bits >> ( 56 - leftover_bits_appended_to_buf ) ) as u8 ;
153- output[ output_index] = selected_bits ;
148+ // Strangely, this approach benchmarks better than writing bytes one at a time,
149+ // or copy_from_slice into output.
150+ for _ in 0 ..leftover_bytes_to_append {
151+ let hi_byte = ( leftover_num >> 56 ) as u8 ;
152+ leftover_num <<= 8 ;
153+ output[ output_index] = hi_byte ;
154154 output_index += 1 ;
155-
156- leftover_bits_appended_to_buf += 8 ;
157155 }
158156
159157 Ok ( DecodeMetadata :: new (
0 commit comments