-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathTextFieldParser.vb
More file actions
1860 lines (1525 loc) · 74.3 KB
/
TextFieldParser.vb
File metadata and controls
1860 lines (1525 loc) · 74.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
' Copyright (c) Microsoft Corporation. All rights reserved.
Option Explicit On
Option Strict On
Imports System
Imports System.ComponentModel
Imports System.Diagnostics
Imports System.Globalization
Imports System.IO
Imports System.Security.Permissions
Imports System.Text
Imports System.Text.RegularExpressions
Imports Microsoft.VisualBasic
Imports Microsoft.VisualBasic.CompilerServices
Imports Microsoft.VisualBasic.CompilerServices.ExceptionUtils
Imports Microsoft.VisualBasic.CompilerServices.Utils
Namespace Microsoft.VisualBasic.FileIO
'''*************************************************************************
''';TextFieldParser
''' <summary>
''' Enables parsing very large delimited or fixed width field files
''' </summary>
''' <remarks></remarks>
Public Class TextFieldParser
Implements IDisposable
'==PUBLIC**************************************************************
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse the passed in file
''' </summary>
''' <param name="path">The path of the file to be parsed</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal path As String)
' Default to UTF-8 and detect encoding
InitializeFromPath(path, System.Text.Encoding.UTF8, True)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse the passed in file
''' </summary>
''' <param name="path">The path of the file to be parsed</param>
''' <param name="defaultEncoding">The decoding to default to if encoding isn't determined from file</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal path As String, ByVal defaultEncoding As System.Text.Encoding)
' Default to detect encoding
InitializeFromPath(path, defaultEncoding, True)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse the passed in file
''' </summary>
''' <param name="path">The path of the file to be parsed</param>
''' <param name="defaultEncoding">The decoding to default to if encoding isn't determined from file</param>
''' <param name="detectEncoding">Indicates whether or not to try to detect the encoding from the BOM</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal path As String, ByVal defaultEncoding As System.Text.Encoding, ByVal detectEncoding As Boolean)
InitializeFromPath(path, defaultEncoding, detectEncoding)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse a file represented by the passed in stream
''' </summary>
''' <param name="stream"></param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal stream As Stream)
' Default to UTF-8 and detect encoding
InitializeFromStream(stream, System.Text.Encoding.UTF8, True)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse a file represented by the passed in stream
''' </summary>
''' <param name="stream"></param>
''' <param name="defaultEncoding">The decoding to default to if encoding isn't determined from file</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal stream As Stream, ByVal defaultEncoding As System.Text.Encoding)
' Default to detect encoding
InitializeFromStream(stream, defaultEncoding, True)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse a file represented by the passed in stream
''' </summary>
''' <param name="stream"></param>
''' <param name="defaultEncoding">The decoding to default to if encoding isn't determined from file</param>
''' <param name="detectEncoding">Indicates whether or not to try to detect the encoding from the BOM</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal stream As Stream, ByVal defaultEncoding As System.Text.Encoding, ByVal detectEncoding As Boolean)
InitializeFromStream(stream, defaultEncoding, detectEncoding)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse a file represented by the passed in stream
''' </summary>
''' <param name="stream"></param>
''' <param name="defaultEncoding">The decoding to default to if encoding isn't determined from file</param>
''' <param name="detectEncoding">Indicates whether or not to try to detect the encoding from the BOM</param>
''' <param name="leaveOpen">Indicates whether or not to leave the passed in stream open</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal stream As Stream, ByVal defaultEncoding As System.Text.Encoding, ByVal detectEncoding As Boolean, ByVal leaveOpen As Boolean)
m_LeaveOpen = leaveOpen
InitializeFromStream(stream, defaultEncoding, detectEncoding)
End Sub
'''*********************************************************************
''';New
''' <summary>
''' Creates a new TextFieldParser to parse a stream or file represented by the passed in TextReader
''' </summary>
''' <param name="reader">The TextReader that does the reading</param>
''' <remarks></remarks>
<HostProtection(Resources:=HostProtectionResource.ExternalProcessMgmt)> _
Public Sub New(ByVal reader As TextReader)
If reader Is Nothing Then
Throw GetArgumentNullException("reader")
End If
m_Reader = reader
ReadToBuffer()
End Sub
'''**********************************************************************
''';CommentTokens
''' <summary>
''' An array of the strings that indicate a line is a comment
''' </summary>
''' <value>An array of comment indicators</value>
''' <remarks>Returns an empty array if not set</remarks>
<EditorBrowsable(EditorBrowsableState.Advanced)> _
Public Property CommentTokens() As String()
Get
Return m_CommentTokens
End Get
Set(ByVal value As String())
CheckCommentTokensForWhitespace(value)
m_CommentTokens = value
m_NeedPropertyCheck = True
End Set
End Property
'''*******************************************************************
''';EndOfData
''' <summary>
''' Indicates whether or not there is any data (non ignorable lines) left to read in the file
''' </summary>
''' <value>True if there's more data to read, otherwise False</value>
''' <remarks>Ignores comments and blank lines</remarks>
Public ReadOnly Property EndOfData() As Boolean
Get
If m_EndOfData Then
Return m_EndOfData
End If
' Make sure we're not at end of file
If m_Reader Is Nothing Or m_Buffer Is Nothing Then
m_EndOfData = True
Return True
End If
'See if we can get a data line
If PeekNextDataLine() IsNot Nothing Then
Return False
End If
m_EndOfData = True
Return True
End Get
End Property
'''*******************************************************************
''';LineNumber
''' <summary>
''' The line to the right of the cursor.
''' </summary>
''' <value>The number of the line</value>
''' <remarks>LineNumber returns the location in the file and has nothing to do with rows or fields</remarks>
<EditorBrowsable(EditorBrowsableState.Advanced)> _
Public ReadOnly Property LineNumber() As Long
Get
If m_LineNumber <> -1 Then
' See if we're at the end of file
If m_Reader.Peek = -1 And m_Position = m_CharsRead Then
CloseReader()
End If
End If
Return m_LineNumber
End Get
End Property
'''*******************************************************************
''';ErrorLine
''' <summary>
''' Returns the last malformed line if there is one.
''' </summary>
''' <value>The last malformed line</value>
''' <remarks></remarks>
Public ReadOnly Property ErrorLine() As String
Get
Return m_ErrorLine
End Get
End Property
'''*******************************************************************
''';ErrorLineNumber
''' <summary>
''' Returns the line number of last malformed line if there is one.
''' </summary>
''' <value>The last malformed line line number</value>
''' <remarks></remarks>
Public ReadOnly Property ErrorLineNumber() As Long
Get
Return m_ErrorLineNumber
End Get
End Property
'''*******************************************************************
''';TextFieldType
''' <summary>
''' Indicates the type of file being read, either fixed width or delimited
''' </summary>
''' <value>The type of fields in the file</value>
''' <remarks></remarks>
Public Property TextFieldType() As FieldType
Get
Return m_TextFieldType
End Get
Set(ByVal value As FieldType)
ValidateFieldTypeEnumValue(value, "value")
m_TextFieldType = value
m_NeedPropertyCheck = True
End Set
End Property
'''******************************************************************
''';FieldWidths
''' <summary>
''' Gets or sets the widths of the fields for reading a fixed width file
''' </summary>
''' <value>An array of the widths</value>
''' <remarks></remarks>
Public Property FieldWidths() As Integer()
Get
Return m_FieldWidths
End Get
Set(ByVal value As Integer())
If value IsNot Nothing Then
ValidateFieldWidthsOnInput(value)
' Keep a copy so we can determine if the user changes elements of the array
m_FieldWidthsCopy = DirectCast(value.Clone(), Integer())
Else
m_FieldWidthsCopy = Nothing
End If
m_FieldWidths = value
m_NeedPropertyCheck = True
End Set
End Property
'''********************************************************************
''';Delimiters
''' <summary>
''' Gets or sets the delimiters used in a file
''' </summary>
''' <value>An array of the delimiters</value>
''' <remarks></remarks>
Public Property Delimiters() As String()
Get
Return m_Delimiters
End Get
Set(ByVal value As String())
If value IsNot Nothing Then
ValidateDelimiters(value)
' Keep a copy so we can determine if the user changes elements of the array
m_DelimitersCopy = DirectCast(value.Clone(), String())
Else
m_DelimitersCopy = Nothing
End If
m_Delimiters = value
m_NeedPropertyCheck = True
' Force rebuilding of regex
m_BeginQuotesRegex = Nothing
End Set
End Property
'''*******************************************************************
''';SetDelimiters
''' <summary>
''' Helper function to enable setting delimiters without diming an array
''' </summary>
''' <param name="delimiters">A list of the delimiters</param>
''' <remarks></remarks>
Public Sub SetDelimiters(ByVal ParamArray delimiters As String())
Me.Delimiters = delimiters
End Sub
'''*******************************************************************
''';SetFieldWidths
''' <summary>
''' Helper function to enable setting field widths without diming an array
''' </summary>
''' <param name="fieldWidths">A list of field widths</param>
''' <remarks></remarks>
Public Sub SetFieldWidths(ByVal ParamArray fieldWidths As Integer())
Me.FieldWidths = fieldWidths
End Sub
'''*******************************************************************
''';TrimWhiteSpace
''' <summary>
''' Indicates whether or not leading and trailing white space should be removed when returning a field
''' </summary>
''' <value>True if white space should be removed, otherwise False</value>
''' <remarks></remarks>
Public Property TrimWhiteSpace() As Boolean
Get
Return m_TrimWhiteSpace
End Get
Set(ByVal value As Boolean)
m_TrimWhiteSpace = value
End Set
End Property
'''********************************************************************
''';ReadLine
''' <summary>
''' Reads and returns the next line from the file
''' </summary>
''' <returns>The line read or Nothing if at the end of the file</returns>
''' <remarks>This is data unaware method. It simply reads the next line in the file.</remarks>
<EditorBrowsable(EditorBrowsableState.Advanced)> _
Public Function ReadLine() As String
If m_Reader Is Nothing Or m_Buffer Is Nothing Then
Return Nothing
End If
Dim Line As String
' Set the method to be used when we reach the end of the buffer
Dim BufferFunction As New ChangeBufferFunction(AddressOf ReadToBuffer)
Line = ReadNextLine(m_Position, BufferFunction)
If Line Is Nothing Then
FinishReading()
Return Nothing
Else
m_LineNumber += 1
Return Line.TrimEnd(Chr(13), Chr(10))
End If
End Function
'''*******************************************************************
''';ReadFields
''' <summary>
''' Reads a non ignorable line and parses it into fields
''' </summary>
''' <returns>The line parsed into fields</returns>
''' <remarks>This is a data aware method. Comments and blank lines are ignored.</remarks>
Public Function ReadFields() As String()
If m_Reader Is Nothing Or m_Buffer Is Nothing Then
Return Nothing
End If
ValidateReadyToRead()
Select Case m_TextFieldType
Case FieldType.FixedWidth
Return ParseFixedWidthLine()
Case FieldType.Delimited
Return ParseDelimitedLine()
Case Else
Debug.Fail("The TextFieldType is not supported")
End Select
Return Nothing
End Function
'''********************************************************************
''';PeekChars
''' <summary>
''' Enables looking at the passed in number of characters of the next data line without reading the line
''' </summary>
''' <param name="numberOfChars"></param>
''' <returns>A string consisting of the first NumberOfChars characters of the next line</returns>
''' <remarks>If numberOfChars is greater than the next line, only the next line is returned</remarks>
Public Function PeekChars(ByVal numberOfChars As Integer) As String
If numberOfChars <= 0 Then
Throw GetArgumentExceptionWithArgName("numberOfChars", ResID.MyID.TextFieldParser_NumberOfCharsMustBePositive, "numberOfChars")
End If
If m_Reader Is Nothing Or m_Buffer Is Nothing Then
Return Nothing
End If
' If we know there's no more data return Nothing
If m_EndOfData Then
Return Nothing
End If
' Get the next line without reading it
Dim Line As String = PeekNextDataLine()
If Line Is Nothing Then
m_EndOfData = True
Return Nothing
End If
' Strip of end of line chars
Line = Line.TrimEnd(Chr(13), Chr(10))
' If the number of chars is larger than the line, return the whole line. Otherwise
' return the NumberOfChars characters from the beginning of the line
If Line.Length < numberOfChars Then
Return Line
Else
Dim info As New StringInfo(Line)
Return info.SubstringByTextElements(0, numberOfChars)
End If
End Function
'''********************************************************************
''';ReadToEnd
''' <summary>
''' Reads the file starting at the current position and moving to the end of the file
''' </summary>
''' <returns>The contents of the file from the current position to the end of the file</returns>
''' <remarks>This is not a data aware method. Everything in the file from the current position to the end is read</remarks>
<EditorBrowsable(EditorBrowsableState.Advanced)> _
Public Function ReadToEnd() As String
If m_Reader Is Nothing Or m_Buffer Is Nothing Then
Return Nothing
End If
Dim Builder As New System.Text.StringBuilder(m_Buffer.Length)
' Get the lines in the Buffer first
Builder.Append(m_Buffer, m_Position, m_CharsRead - m_Position)
' Add what we haven't read
Builder.Append(m_Reader.ReadToEnd())
FinishReading()
Return Builder.ToString()
End Function
'''*********************************************************************
''';HasFieldsEnclosedInQuotes
''' <summary>
''' Indicates whether or not to handle quotes in a csv friendly way
''' </summary>
''' <value>True if we escape quotes otherwise false</value>
''' <remarks></remarks>
<EditorBrowsable(EditorBrowsableState.Advanced)> _
Public Property HasFieldsEnclosedInQuotes() As Boolean
Get
Return m_HasFieldsEnclosedInQuotes
End Get
Set(ByVal value As Boolean)
m_HasFieldsEnclosedInQuotes = value
End Set
End Property
'''**********************************************************************
''';Close
''' <summary>
''' Closes the StreamReader
''' </summary>
''' <remarks></remarks>
Public Sub Close()
CloseReader()
End Sub
'''**********************************************************************
''';Dispose
''' <summary>
''' Closes the StreamReader
''' </summary>
''' <remarks></remarks>
Public Sub Dispose() Implements System.IDisposable.Dispose
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
'==PROTECTED**************************************************************
'''***********************************************************************
''' ;Dispose
''' <summary>
''' Standard implementation of IDisposable.Dispose for non sealed classes. Classes derived from
''' TextFieldParser should override this method. After doing their own cleanup, they should call
''' this method (MyBase.Dispose(disposing))
''' </summary>
''' <param name="disposing">Indicates we are called by Dispose and not GC</param>
''' <remarks></remarks>
Protected Overridable Sub Dispose(ByVal disposing As Boolean)
If disposing Then
If Not Me.m_Disposed Then
Close()
End If
Me.m_Disposed = True
End If
End Sub
'''**************************************************************************
''' ;ValidateFieldTypeEnumValue
''' <summary>
''' Validates that the value being passed as an AudioPlayMode enum is a legal value
''' </summary>
''' <param name="value"></param>
''' <remarks></remarks>
Private Sub ValidateFieldTypeEnumValue(ByVal value As FieldType, ByVal paramName As String)
If value < FieldType.Delimited OrElse value > FieldType.FixedWidth Then
Throw New System.ComponentModel.InvalidEnumArgumentException(paramName, DirectCast(value, Integer), GetType(FieldType))
End If
End Sub
'''*******************************************************************************
''';Finalize
''' <summary>
''' Clean up following dispose pattern
''' </summary>
''' <remarks></remarks>
Protected Overrides Sub Finalize()
' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above.
Dispose(False)
MyBase.Finalize()
End Sub
'==PRIVATE**************************************************************
'''**********************************************************************
''';CloseReader
''' <summary>
''' Closes the StreamReader
''' </summary>
''' <remarks></remarks>
Private Sub CloseReader()
FinishReading()
If m_Reader IsNot Nothing Then
If Not m_LeaveOpen Then
m_Reader.Close()
End If
m_Reader = Nothing
End If
End Sub
'''**********************************************************************
''';FinishReading
''' <summary>
''' Cleans up managed resources except the StreamReader and indicates reading is finished
''' </summary>
''' <remarks></remarks>
Private Sub FinishReading()
m_LineNumber = -1
m_EndOfData = True
m_Buffer = Nothing
m_DelimiterRegex = Nothing
m_BeginQuotesRegex = Nothing
End Sub
''';InitializeFromPath
''' <summary>
''' Creates a StreamReader for the passed in Path
''' </summary>
''' <param name="path">The passed in path</param>
''' <param name="defaultEncoding">The encoding to default to if encoding can't be detected</param>
''' <param name="detectEncoding">Indicates whether or not to detect encoding from the BOM</param>
''' <remarks>We validate the arguments here for the three Public constructors that take a Path</remarks>
Private Sub InitializeFromPath(ByVal path As String, ByVal defaultEncoding As System.Text.Encoding, ByVal detectEncoding As Boolean)
If path = "" Then
Throw GetArgumentNullException("path")
End If
If defaultEncoding Is Nothing Then
Throw GetArgumentNullException("defaultEncoding")
End If
Dim fullPath As String = ValidatePath(path)
Dim fileStreamTemp As New FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
m_Reader = New StreamReader(fileStreamTemp, defaultEncoding, detectEncoding)
ReadToBuffer()
End Sub
'''*********************************************************************
''';InitializeFromStream
''' <summary>
''' Creates a StreamReader for a passed in stream
''' </summary>
''' <param name="stream">The passed in stream</param>
''' <param name="defaultEncoding">The encoding to default to if encoding can't be detected</param>
''' <param name="detectEncoding">Indicates whether or not to detect encoding from the BOM</param>
''' <remarks>We validate the arguments here for the three Public constructors that take a Stream</remarks>
Private Sub InitializeFromStream(ByVal stream As Stream, ByVal defaultEncoding As System.Text.Encoding, ByVal detectEncoding As Boolean)
If stream Is Nothing Then
Throw GetArgumentNullException("stream")
End If
If Not stream.CanRead Then
Throw GetArgumentExceptionWithArgName("stream", ResID.MyID.TextFieldParser_StreamNotReadable, "stream")
End If
If defaultEncoding Is Nothing Then
Throw GetArgumentNullException("defaultEncoding")
End If
m_Reader = New StreamReader(stream, defaultEncoding, detectEncoding)
ReadToBuffer()
End Sub
'''**********************************************************************
''';ValidatePath
''' <summary>
''' Gets full name and path from passed in path.
''' </summary>
''' <param name="path">The path to be validated</param>
''' <returns>The full name and path</returns>
''' <remarks>Throws if the file doesn't exist or if the path is malformed</remarks>
Private Function ValidatePath(ByVal path As String) As String
' Validate and get full path
Dim fullPath As String = FileSystem.NormalizeFilePath(path, "path")
' Make sure the file exists
If Not File.Exists(fullPath) Then
Throw New IO.FileNotFoundException(GetResourceString(ResID.MyID.IO_FileNotFound_Path, fullPath))
End If
Return fullPath
End Function
'''***********************************************************************
''';IgnoreLine
''' <summary>
''' Indicates whether or not the passed in line should be ignored
''' </summary>
''' <param name="line">The line to be tested</param>
''' <returns>True if the line should be ignored, otherwise False</returns>
''' <remarks>Lines to ignore are blank lines and comments</remarks>
Private Function IgnoreLine(ByVal line As String) As Boolean
' If the Line is Nothing, it has meaning (we've reached the end of the file) so don't
' ignore it
If line Is Nothing Then
Return False
End If
' Ignore empty or whitespace lines
Dim TrimmedLine As String = line.Trim()
If TrimmedLine.Length = 0 Then
Return True
End If
' Ignore comments
If m_CommentTokens IsNot Nothing Then
For Each Token As String In m_CommentTokens
If Token = "" Then
Continue For
End If
If TrimmedLine.StartsWith(Token, StringComparison.Ordinal) Then
Return True
End If
' Test original line in case whitespace char is a coment token
If line.StartsWith(Token, StringComparison.Ordinal) Then
Return True
End If
Next
End If
Return False
End Function
'''***********************************************************************
''';ReadToBuffer
''' <summary>
''' Reads characters from the file into the buffer
''' </summary>
''' <returns>The number of Chars read. If no Chars are read, we're at the end of the file</returns>
''' <remarks></remarks>
Private Function ReadToBuffer() As Integer
Debug.Assert(m_Buffer IsNot Nothing, "There's no buffer")
Debug.Assert(m_Reader IsNot Nothing, "There's no StreamReader")
' Set cursor to beginning of buffer
m_Position = 0
Dim BufferLength As Integer = m_Buffer.Length
Debug.Assert(BufferLength >= DEFAULT_BUFFER_LENGTH, "Buffer shrunk to below default")
' If the buffer has grown, shrink it back to the default size
If BufferLength > DEFAULT_BUFFER_LENGTH Then
BufferLength = DEFAULT_BUFFER_LENGTH
ReDim m_Buffer(BufferLength - 1)
End If
' Read from the stream
m_CharsRead = m_Reader.Read(m_Buffer, 0, BufferLength)
' Return the number of Chars read
Return m_CharsRead
End Function
'''************************************************************************
''';SlideCursorToStartOfBuffer
''' <summary>
''' Moves the cursor and all the data to the right of the cursor to the front of the buffer. It
''' then fills the remainder of the buffer from the file
''' </summary>
''' <returns>The number of Chars read in filling the remainder of the buffer</returns>
''' <remarks>
''' This should be called when we want to make maximum use of the space in the buffer. Characters
''' to the left of the cursor have already been read and can be discarded.
'''</remarks>
Private Function SlideCursorToStartOfBuffer() As Integer
Debug.Assert(m_Buffer IsNot Nothing, "There's no buffer")
Debug.Assert(m_Reader IsNot Nothing, "There's no StreamReader")
Debug.Assert(m_Position >= 0 And m_Position <= m_Buffer.Length, "The cursor is out of range")
' No need to slide if we're already at the beginning
If m_Position > 0 Then
Dim BufferLength As Integer = m_Buffer.Length
Dim TempArray(BufferLength - 1) As Char
Array.Copy(m_Buffer, m_Position, TempArray, 0, BufferLength - m_Position)
' Fill the rest of the buffer
Dim CharsRead As Integer = m_Reader.Read(TempArray, BufferLength - m_Position, m_Position)
m_CharsRead = m_CharsRead - m_Position + CharsRead
m_Position = 0
m_Buffer = TempArray
Return CharsRead
End If
Return 0
End Function
'''*********************************************************************
''';IncreaseBufferSize
''' <summary>
''' Increases the size of the buffer. Used when we are at the end of the buffer, we need
''' to read more data from the file, and we can't discard what we've already read.
''' </summary>
''' <returns>The number of characters read to fill the new buffer</returns>
''' <remarks>This is needed for PeekChars and EndOfData</remarks>
Private Function IncreaseBufferSize() As Integer
Debug.Assert(m_Buffer IsNot Nothing, "There's no buffer")
Debug.Assert(m_Reader IsNot Nothing, "There's no StreamReader")
' Set cursor
m_PeekPosition = m_CharsRead
' Create a larger buffer and copy our data into it
Dim BufferSize As Integer = m_Buffer.Length + DEFAULT_BUFFER_LENGTH
' Make sure the buffer hasn't grown too large
If BufferSize > m_MaxBufferSize Then
Throw GetInvalidOperationException(ResID.MyID.TextFieldParser_BufferExceededMaxSize)
End If
Dim TempArray(BufferSize - 1) As Char
Array.Copy(m_Buffer, TempArray, m_Buffer.Length)
Dim CharsRead As Integer = m_Reader.Read(TempArray, m_Buffer.Length, DEFAULT_BUFFER_LENGTH)
m_Buffer = TempArray
m_CharsRead += CharsRead
Debug.Assert(m_CharsRead <= BufferSize, "We've read more chars than we have space for")
Return CharsRead
End Function
'''**********************************************************************
''';ReadNextDataLine
''' <summary>
''' Returns the next line of data or nothing if there's no more data to be read
''' </summary>
''' <returns>The next line of data</returns>
''' <remarks>Moves the cursor past the line read</remarks>
Private Function ReadNextDataLine() As String
Dim Line As String
' Set function to use when we reach the end of the buffer
Dim BufferFunction As New ChangeBufferFunction(AddressOf ReadToBuffer)
Do
Line = ReadNextLine(m_Position, BufferFunction)
m_LineNumber += 1
Loop While IgnoreLine(Line)
If Line Is Nothing Then
CloseReader()
End If
Return Line
End Function
'''***********************************************************************
''';PeekNextDataLine
''' <summary>
''' Returns the next data line but doesn't move the cursor
''' </summary>
''' <returns>The next data line, or Nothing if there's no more data</returns>
''' <remarks></remarks>
Private Function PeekNextDataLine() As String
Dim Line As String
' Set function to use when we reach the end of the buffer
Dim BufferFunction As New ChangeBufferFunction(AddressOf IncreaseBufferSize)
' Slide the data to the left so that we make maximum use of the buffer
SlideCursorToStartOfBuffer()
m_PeekPosition = 0
Do
Line = ReadNextLine(m_PeekPosition, BufferFunction)
Loop While IgnoreLine(Line)
Return Line
End Function
'''*********************************************************************
''';ChangeBufferFunction
''' <summary>
''' Function to call when we're at the end of the buffer. We either re fill the buffer
''' or change the size of the buffer
''' </summary>
''' <returns></returns>
''' <remarks></remarks>
Private Delegate Function ChangeBufferFunction() As Integer
'''**********************************************************************
''';ReadNextLine
''' <summary>
''' Gets the next line from the file and moves the pased in cursor past the line
''' </summary>
''' <param name="Cursor">Indicates the current position in the buffer</param>
''' <param name="ChangeBuffer">Function to call when we've reached the end of the buffer</param>
''' <returns>The next line in the file</returns>
''' <remarks>Returns Nothing if we are at the end of the file</remarks>
Private Function ReadNextLine(ByRef Cursor As Integer, ByVal ChangeBuffer As ChangeBufferFunction) As String
Debug.Assert(m_Buffer IsNot Nothing, "There's no buffer")
Debug.Assert(Cursor >= 0 And Cursor <= m_CharsRead, "The cursor is out of range")
' Check to see if the cursor is at the end of the chars in the buffer. If it is, re fill the buffer
If Cursor = m_CharsRead Then
If ChangeBuffer() = 0 Then
' We're at the end of the file
Return Nothing
End If
End If
Dim Builder As StringBuilder = Nothing
Do
' Walk through buffer looking for the end of a line. End of line can be vbLf (\n), vbCr (\r) or vbCrLf (\r\n)
For i As Integer = Cursor To m_CharsRead - 1
Dim Character As Char = m_Buffer(i)
If Character = vbCr Or Character = vbLf Then
' We've found the end of a line so add everything we've read so far to the
' builder. We include the end of line char because we need to know what it is
' in case it's embedded in a field.
If Builder IsNot Nothing Then
Builder.Append(m_Buffer, Cursor, i - Cursor + 1)
Else
Builder = New StringBuilder(i + 1)
Builder.Append(m_Buffer, Cursor, i - Cursor + 1)
End If
Cursor = i + 1
' See if vbLf should be added as well
If Character = vbCr Then
If Cursor < m_CharsRead Then
If m_Buffer(Cursor) = vbLf Then
Cursor += 1
Builder.Append(vbLf)
End If
ElseIf ChangeBuffer() > 0 Then
If m_Buffer(Cursor) = vbLf Then
Cursor += 1
Builder.Append(vbLf)
End If
End If
End If
Return Builder.ToString()
End If
Next i
' We've searched the whole buffer and haven't found an end of line. Save what we have, and read more to the buffer.
Dim Size As Integer = m_CharsRead - Cursor
If Builder Is Nothing Then
Builder = New StringBuilder(Size + DEFAULT_BUILDER_INCREASE)
End If
Builder.Append(m_Buffer, Cursor, Size)
Loop While ChangeBuffer() > 0
Return Builder.ToString()
End Function
'''***************************************************************
''';ParseDelimitedLine
''' <summary>
''' Gets the next data line and parses it with the delimiters
''' </summary>
''' <returns>An array of the fields in the line</returns>
''' <remarks></remarks>
Private Function ParseDelimitedLine() As String()
Dim Line As String = ReadNextDataLine()
If Line Is Nothing Then
Return Nothing
End If
' The line number is that of the line just read
Dim CurrentLineNumber As Long = m_LineNumber - 1
Dim Index As Integer = 0
Dim Fields As New System.Collections.Generic.List(Of String)
Dim Field As String
Dim LineEndIndex As Integer = GetEndOfLineIndex(Line)
While Index <= LineEndIndex
' Is the field delimited in quotes? We only care about this if
' EscapedQuotes is True
Dim MatchResult As Match = Nothing
Dim QuoteDelimited As Boolean = False