-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathneedletail.pyi
More file actions
248 lines (210 loc) · 6.91 KB
/
needletail.pyi
File metadata and controls
248 lines (210 loc) · 6.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
from pathlib import Path
from typing import Iterator, Optional, Union
class NeedletailError(Exception):
"""
Raised when parsing invalid fastx data with parse_fastx_file or parse_fastx_string
"""
class FastxReader(Iterator[Record]):
"""
An iterator that yields sequence records.
Yields
------
Record
A `Record` object representing a sequence record.
See also
--------
parse_fastx_file:
A function to parse sequence records from a FASTA/FASTQ file.
parse_fastx_string:
A function to parse sequence records from a FASTA/FASTQ string.
Record:
A class representing a FASTA/FASTQ sequence record.
"""
class Record:
"""
A record representing a biological sequence.
Parameters
----------
id : str
The identifier of the sequence record.
seq : str
A string representing the sequence.
Attributes
----------
id : str
The identifier of the sequence record. In a FASTA file, this is the
string containing all characters (including whitespaces) after the
leading '>' character. In a FASTQ file, this is the string containing
all characters (including whitespaces) after the leading '@' character.
seq : str
A string representing the sequence.
qual : str, optional
A string representing the quality scores of the sequence. If the object
represents a FASTA record, this attribute will be `None`.
name : str
The name of the sequence record. This is the string before the first
whitespace character in the `id` attribute.
description : str, optional
The description of the sequence record. This is the string after the
first whitespace character in the `id` attribute. If the `id` attribute
contains no whitespace characters, this attribute will be `None`.
Methods
-------
is_fasta
Check if the object represents a FASTA record.
is_fastq
Check if the object represents a FASTQ record.
normalize(iupac)
Normalize the sequence stored in the `seq` attribute of the object.
"""
id: str
seq: str
name: str
description: Optional[str]
qual: Optional[str]
def is_fasta(self) -> bool:
"""
Check if the object represents a FASTA record.
Returns
-------
bool
`True` if the record lacks quality information, otherwise `False`.
"""
pass
def is_fastq(self) -> bool:
"""
Check if the object represents a FASTQ record.
Returns
-------
bool
`True` if the record has quality information, otherwise `False`.
"""
pass
def normalize(self, iupac: bool) -> None:
"""
Normalize the sequence stored in the `seq` attribute of the object.
See also
--------
normalize_seq: A function to normalize nucleotide sequence strings.
Notes
-----
The `normalize` method is designed for nucleotide sequences only. If
used with protein sequences, it will incorrectly process amino acid
characters as if they were nucleotides.
"""
pass
def parse_fastx_file(path: Union[str, Path]) -> FastxReader:
"""
Returns an iterator that parses a FASTA/FASTQ file and yields sequence
records.
Parameters
----------
path : str or pathlib.Path
The path to a FASTA/FASTQ file.
Returns
-------
FastxReader
A `FastxReader` iterator that yields `Record` objects representing
sequences from the input file.
Raises
------
NeedletailError
If an error occurs while reading and parsing the input file.
See also
--------
parse_fastx_string:
A function to parse sequence records from a FASTA/FASTQ string.
FastxReader:
A class with instances that are iterators that yield `Record` objects.
"""
pass
def parse_fastx_string(fastx_string: str) -> FastxReader:
"""
Returns an iterator that parses a FASTA/FASTQ string and yields sequence
records.
Parameters
----------
content : str
A string containing FASTA/FASTQ-formatted sequence records.
Returns
-------
FastxReader
A `FastxReader` iterator that yields `Record` objects representing
sequences from the input string.
Raises
------
NeedletailError
If an error occurs while parsing the input string.
See also
--------
parse_fastx_file:
A function to parse sequence records from a FASTA/FASTQ file.
FastxReader:
A class with instances that are iterators that yield `Record` objects.
"""
pass
def normalize_seq(seq: str, iupac: bool) -> str:
"""
Normalize the sequence string of nucleotide records by:
- Converting lowercase characters to uppercase.
- Removing whitespace and newline characters.
- Replacing 'U' with 'T'.
- Replacing '.' and '~' with '-'.
- Replacing characters not in 'ACGTN-' with 'N', unless `iupac` is `True`,
in which case characters representing nucleotide ambiguity are not
replaced.
Parameters
----------
seq : str
A string representing a nucleotide sequence.
iupac : bool, default: False
If `True`, characters representing nucleotide ambiguity ('B', 'D',
'H', 'V', 'R', 'Y', 'S', 'W', 'K', and 'M', and their lowercase
forms) will not be converted to 'N'. Lowercase characters will still
be converted to uppercase.
Returns
-------
str
The normalized sequence string.
Notes
-----
The `normalize_seq` function is designed for nucleotide sequences only. If
used with protein sequences, it will incorrectly process amino acid
characters as if they were nucleotides.
"""
pass
def reverse_complement(seq: str) -> str:
"""
Compute the reverse complement of a nucleotide sequence.
Parameters
----------
seq : str
A string representing a nucleotide sequence.
Returns
-------
str
The reverse complement of the input nucleotide sequence.
Notes
-----
The `reverse_complement` method is designed for nucleotide sequences
only. If used with protein sequences, it will incorrectly process
amino acid characters as if they were nucleotides.
"""
pass
def decode_phred(qual: str, base_64: bool) -> tuple[int]:
"""
Decode Phred quality strings to quality scores.
Parameters
----------
phred : str
A string representing Phred-encoded quality strings.
base_64 : bool, default=False
If `True`, return the quality using the Phred+64 encoding, otherwise
the Phred+33 encoding will be used.
Returns
-------
tuple of int
A list of integers representing quality scores derived from the
probability of a base-calling error using a logarithmic transformation.
"""
pass