@@ -85,23 +85,23 @@ fn do_main() -> io::Result<()> {
8585
8686 let erased_label = if erase { ", erased" } else { "" } ;
8787 match weights {
88- Unit => process ( readers, "" , |line | ( line , 1i64 ) ) ,
88+ Unit => process ( readers, "" , |_line | ( None , 1i64 ) ) ,
8989 Integral => {
9090 let re = Regex :: new ( r"(([+-]?)\d+)(\D*)$" ) . unwrap ( ) ;
9191 process (
9292 readers,
9393 & format ! ( " (weighted integral{})" , erased_label) ,
9494 |line| {
95- if let Some ( captures) = re. captures ( & line) {
95+ if let Some ( captures) = re. captures ( line) {
9696 let weight = i64:: from_str ( & captures[ 1 ] ) . unwrap ( ) ;
97- let line = if erase {
98- re. replace ( & line, "NNN${3}" ) . to_string ( )
97+ if erase {
98+ let line = re. replace ( line, "NNN${3}" ) . to_string ( ) ;
99+ ( Some ( line) , weight)
99100 } else {
100- line
101- } ;
102- ( line, weight)
101+ ( None , weight)
102+ }
103103 } else {
104- ( line , 1i64 )
104+ ( None , 1i64 )
105105 }
106106 } ,
107107 )
@@ -112,16 +112,16 @@ fn do_main() -> io::Result<()> {
112112 readers,
113113 & format ! ( " (weighted fractional{})" , erased_label) ,
114114 |line| {
115- if let Some ( captures) = re. captures ( & line) {
115+ if let Some ( captures) = re. captures ( line) {
116116 let weight = f64:: from_str ( & captures[ 1 ] ) . unwrap ( ) ;
117- let line = if erase {
118- re. replace ( & line, "NNN${4}" ) . to_string ( )
117+ if erase {
118+ let line = re. replace ( line, "NNN${4}" ) . to_string ( ) ;
119+ ( Some ( line) , weight)
119120 } else {
120- line
121- } ;
122- ( line, weight)
121+ ( None , weight)
122+ }
123123 } else {
124- ( line , 1f64 )
124+ ( None , 1f64 )
125125 }
126126 } ,
127127 )
@@ -137,22 +137,54 @@ fn process<F, N>(
137137 get_line_and_weight : F ,
138138) -> io:: Result < ( ) >
139139where
140- F : Fn ( String ) -> ( String , N ) ,
140+ F : Fn ( & str ) -> ( Option < String > , N ) ,
141141 N : Total ,
142142{
143- let mut counts: FxHashMap < String , N > = FxHashMap :: default ( ) ;
144143 let mut total = N :: from ( 0u32 ) ;
144+ let mut counts: FxHashMap < String , N > = FxHashMap :: default ( ) ;
145145
146- for reader in readers {
147- for line in reader. lines ( ) {
148- let Ok ( line) = line else {
149- eprintln ! ( "counts: non-UTF-8 input detected, aborting" ) ;
150- std:: process:: exit ( 1 ) ;
151- } ;
152- let ( line, weight) = get_line_and_weight ( line) ;
153- let entry = counts. entry ( line) . or_insert_with ( || N :: from ( 0u32 ) ) ;
154- * entry += weight;
146+ // `reader.lines()` is the obvious way to do this loop, but that requires
147+ // allocating every line into a `String`. Instead we use
148+ // `reader.read_line()` and use a single string for every iteration. On the
149+ // first occurrence of a line we need to do a `to_string` to insert it into
150+ // the table. On subsequent occurrences we don't. Most `counts` input tends
151+ // to have significant numbers of repeated lines, so this approach reduces
152+ // allocation counts greatly.
153+ let mut line_with_nl = String :: new ( ) ;
154+ for mut reader in readers {
155+ loop {
156+ match reader. read_line ( & mut line_with_nl) {
157+ Ok ( 0 ) => break ,
158+ Ok ( _) => { }
159+ Err ( err) => {
160+ eprintln ! ( "counts: {}" , err) ;
161+ std:: process:: exit ( 1 ) ;
162+ }
163+ }
164+
165+ let line = & line_with_nl[ ..line_with_nl. len ( ) - 1 ] ;
166+ let ( modified_line, weight) = get_line_and_weight ( line) ;
167+ match modified_line {
168+ None => {
169+ // The line has not been modified. Only promote to `String`
170+ // if it hasn't been seen before.
171+ if let Some ( entry) = counts. get_mut ( line) {
172+ * entry += weight;
173+ } else {
174+ counts. insert ( line. to_string ( ) , weight) ;
175+ }
176+ }
177+ Some ( modified_line) => {
178+ // The line has been modified, which means it has already
179+ // been promoted to a `String`.
180+ let entry = counts. entry ( modified_line) . or_insert_with ( || N :: from ( 0u32 ) ) ;
181+ * entry += weight;
182+ }
183+ }
155184 total += weight;
185+
186+ // We are finished with the contents of this line.
187+ line_with_nl. clear ( ) ;
156188 }
157189 }
158190
0 commit comments