66#![ allow( rustdoc:: private_intra_doc_links) ]
77
88use std:: cmp:: Ordering ;
9- use std:: io:: { self , BufReader } ;
9+ use std:: io:: { self , BufReader , ErrorKind } ;
1010use std:: {
1111 fs:: { File , remove_file} ,
1212 io:: { BufRead , BufWriter , Write } ,
@@ -71,6 +71,35 @@ impl CsplitOptions {
7171 }
7272}
7373
74+ pub struct LinesWithNewlines < T : BufRead > {
75+ inner : T ,
76+ }
77+
78+ impl < T : BufRead > LinesWithNewlines < T > {
79+ fn new ( s : T ) -> Self {
80+ Self { inner : s }
81+ }
82+ }
83+
84+ impl < T : BufRead > Iterator for LinesWithNewlines < T > {
85+ type Item = io:: Result < String > ;
86+
87+ fn next ( & mut self ) -> Option < Self :: Item > {
88+ fn ret ( v : Vec < u8 > ) -> io:: Result < String > {
89+ String :: from_utf8 ( v) . map_err ( |_| {
90+ io:: Error :: new ( ErrorKind :: InvalidData , "stream did not contain valid UTF-8" )
91+ } )
92+ }
93+
94+ let mut v = Vec :: new ( ) ;
95+ match self . inner . read_until ( b'\n' , & mut v) {
96+ Ok ( 0 ) => None ,
97+ Ok ( _) => Some ( ret ( v) ) ,
98+ Err ( e) => Some ( Err ( e) ) ,
99+ }
100+ }
101+ }
102+
74103/// Splits a file into severals according to the command line patterns.
75104///
76105/// # Errors
@@ -87,8 +116,7 @@ pub fn csplit<T>(options: &CsplitOptions, patterns: &[String], input: T) -> Resu
87116where
88117 T : BufRead ,
89118{
90- let enumerated_input_lines = input
91- . lines ( )
119+ let enumerated_input_lines = LinesWithNewlines :: new ( input)
92120 . map ( |line| line. map_err_context ( || "read error" . to_string ( ) ) )
93121 . enumerate ( ) ;
94122 let mut input_iter = InputSplitter :: new ( enumerated_input_lines) ;
@@ -243,7 +271,7 @@ impl SplitWriter<'_> {
243271 self . dev_null = true ;
244272 }
245273
246- /// Writes the line to the current split, appending a newline character .
274+ /// Writes the line to the current split.
247275 /// If [`self.dev_null`] is true, then the line is discarded.
248276 ///
249277 /// # Errors
@@ -255,8 +283,7 @@ impl SplitWriter<'_> {
255283 Some ( ref mut current_writer) => {
256284 let bytes = line. as_bytes ( ) ;
257285 current_writer. write_all ( bytes) ?;
258- current_writer. write_all ( b"\n " ) ?;
259- self . size += bytes. len ( ) + 1 ;
286+ self . size += bytes. len ( ) ;
260287 }
261288 None => panic ! ( "trying to write to a split that was not created" ) ,
262289 }
@@ -321,11 +348,11 @@ impl SplitWriter<'_> {
321348
322349 let mut ret = Err ( CsplitError :: LineOutOfRange ( pattern_as_str. to_string ( ) ) ) ;
323350 while let Some ( ( ln, line) ) = input_iter. next ( ) {
324- let l = line?;
351+ let line = line?;
325352 match n. cmp ( & ( & ln + 1 ) ) {
326353 Ordering :: Less => {
327354 assert ! (
328- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
355+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
329356 "the buffer is big enough to contain 1 line"
330357 ) ;
331358 ret = Ok ( ( ) ) ;
@@ -334,15 +361,15 @@ impl SplitWriter<'_> {
334361 Ordering :: Equal => {
335362 assert ! (
336363 self . options. suppress_matched
337- || input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
364+ || input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
338365 "the buffer is big enough to contain 1 line"
339366 ) ;
340367 ret = Ok ( ( ) ) ;
341368 break ;
342369 }
343370 Ordering :: Greater => ( ) ,
344371 }
345- self . writeln ( & l ) ?;
372+ self . writeln ( & line ) ?;
346373 }
347374 self . finish_split ( ) ;
348375 ret
@@ -379,23 +406,33 @@ impl SplitWriter<'_> {
379406 input_iter. set_size_of_buffer ( 1 ) ;
380407
381408 while let Some ( ( ln, line) ) = input_iter. next ( ) {
382- let l = line?;
409+ let line = line?;
410+ let l = if let Some ( l) = line. strip_suffix ( '\n' ) {
411+ if let Some ( l) = l. strip_suffix ( '\r' ) {
412+ l
413+ } else {
414+ l
415+ }
416+ } else {
417+ & line
418+ }
419+ . to_string ( ) ;
383420 if regex. is_match ( & l) {
384421 let mut next_line_suppress_matched = false ;
385422 match ( self . options . suppress_matched , offset) {
386423 // no offset, add the line to the next split
387424 ( false , 0 ) => {
388425 assert ! (
389- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
426+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
390427 "the buffer is big enough to contain 1 line"
391428 ) ;
392429 }
393430 // a positive offset, some more lines need to be added to the current split
394- ( false , _) => self . writeln ( & l ) ?,
431+ ( false , _) => self . writeln ( & line ) ?,
395432 // suppress matched option true, but there is a positive offset, so the line is printed
396433 ( true , 1 ..) => {
397434 next_line_suppress_matched = true ;
398- self . writeln ( & l ) ?;
435+ self . writeln ( & line ) ?;
399436 }
400437 _ => ( ) ,
401438 } ;
@@ -424,7 +461,7 @@ impl SplitWriter<'_> {
424461 }
425462 return Ok ( ( ) ) ;
426463 }
427- self . writeln ( & l ) ?;
464+ self . writeln ( & line ) ?;
428465 }
429466 } else {
430467 // With a negative offset we use a buffer to keep the lines within the offset.
@@ -435,7 +472,17 @@ impl SplitWriter<'_> {
435472 let offset_usize = -offset as usize ;
436473 input_iter. set_size_of_buffer ( offset_usize) ;
437474 while let Some ( ( ln, line) ) = input_iter. next ( ) {
438- let l = line?;
475+ let line = line?;
476+ let l = if let Some ( l) = line. strip_suffix ( '\n' ) {
477+ if let Some ( l) = l. strip_suffix ( '\r' ) {
478+ l
479+ } else {
480+ l
481+ }
482+ } else {
483+ & line
484+ }
485+ . to_string ( ) ;
439486 if regex. is_match ( & l) {
440487 for line in input_iter. shrink_buffer_to_size ( ) {
441488 self . writeln ( & line) ?;
@@ -444,12 +491,12 @@ impl SplitWriter<'_> {
444491 // since offset_usize is for sure greater than 0
445492 // the first element of the buffer should be removed and this
446493 // line inserted to be coherent with GNU implementation
447- input_iter. add_line_to_buffer ( ln, l ) ;
494+ input_iter. add_line_to_buffer ( ln, line ) ;
448495 } else {
449496 // add 1 to the buffer size to make place for the matched line
450497 input_iter. set_size_of_buffer ( offset_usize + 1 ) ;
451498 assert ! (
452- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
499+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
453500 "should be big enough to hold every lines"
454501 ) ;
455502 }
@@ -460,7 +507,7 @@ impl SplitWriter<'_> {
460507 }
461508 return Ok ( ( ) ) ;
462509 }
463- if let Some ( line) = input_iter. add_line_to_buffer ( ln, l ) {
510+ if let Some ( line) = input_iter. add_line_to_buffer ( ln, line ) {
464511 self . writeln ( & line) ?;
465512 }
466513 }
0 commit comments