Skip to content

Commit 15d268c

Browse files
committed
WIP: Add tests
1 parent bbdeb79 commit 15d268c

File tree

2 files changed

+117
-2
lines changed

2 files changed

+117
-2
lines changed

src/longsequences/operators.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,11 +388,10 @@ function _findlast_nonzero(f::Function, seq::SeqOrView{<:KNOWN_ALPHABETS})
388388
# This part is slightly different, because the coding bits are shifted to the right,
389389
# but we need to count the leading bits.
390390
# So, we set all the unused bits to zero, then count leading zeros, and then
391-
# ignore the unused zero bits.
391+
# subtract the unused bits from the leading zero count
392392
mask = (UInt(1) << (tail_bits & 63)) - 1
393393
lz = leading_zeros(f(tail) & mask)
394394
if lz < 64
395-
# Compensate for@inbounds(data[body_i]) noncoding zero bits
396395
zero_symbols = div((lz - (64 - tail_bits)) % UInt, bps) % Int
397396
return i - zero_symbols
398397
end

test/longsequences/find.jl

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,122 @@
9797
end
9898
end
9999

100+
@testset "SIMD find" begin
101+
# We exploit that all the following methods make use of the same
102+
# functions.
103+
# So, we first thoroughly check one of the methods for correctness,
104+
# and then we only need to check that the bitwise kernel of the rest is correct
105+
106+
# The thorough one - test the right indices are found, given that the kernel
107+
# is correct
108+
@testset "isambiguous 4bit" begin
109+
seq = randdnaseq(400)
110+
indices = [1, 2, 5, 11, 21, 50, 211, 380, 391, 399, 400]
111+
@test isnothing(findfirst(isambiguous, seq))
112+
for i in indices
113+
seq[i] = DNA_W
114+
end
115+
for i in indices
116+
@test findfirst(isambiguous, seq) == i
117+
seq[i] = DNA_A
118+
end
119+
for i in indices
120+
seq[i] = DNA_K
121+
end
122+
for i in reverse(indices)
123+
@test findlast(isambiguous, seq) == i
124+
seq[i] = DNA_T
125+
end
126+
@test isnothing(findfirst(isambiguous, seq))
127+
128+
# Some random tests
129+
@test findnext(isambiguous, dna"ATGCTGTA", 2) === nothing
130+
131+
seq = dna"ATSCTGCA"
132+
@test findnext(isambiguous, seq, 2) === 3
133+
@test findnext(isambiguous, seq, 3) === 3
134+
@test findnext(isambiguous, seq, 4) === nothing
135+
@test findprev(isambiguous, seq, 8) === 3
136+
@test findprev(isambiguous, seq, 3) === 3
137+
@test findprev(isambiguous, seq, 2) === nothing
138+
@test_throws BoundsError findnext(isambiguous, seq, 0)
139+
@test_throws BoundsError findprev(isambiguous, seq, 10)
140+
141+
seq = dna"ATSCTGMA"
142+
@test findnext(isambiguous, seq, 3) === 3
143+
@test findnext(isambiguous, seq, 4) === 7
144+
@test findnext(isambiguous, seq, 8) === nothing
145+
@test findprev(isambiguous, seq, 8) === 7
146+
@test findprev(isambiguous, seq, 7) === 7
147+
@test findprev(isambiguous, seq, 6) === 3
148+
@test findprev(isambiguous, seq, 2) === nothing
149+
end
150+
151+
@testset "Various kernels" begin
152+
for (f, A) in Any[
153+
(isambiguous, DNAAlphabet{4}()),
154+
(isambiguous, AminoAcidAlphabet()),
155+
(!isambiguous, DNAAlphabet{4}()),
156+
(!isambiguous, AminoAcidAlphabet()),
157+
(iscertain, DNAAlphabet{4}()),
158+
(!iscertain, AminoAcidAlphabet()),
159+
(isgap, DNAAlphabet{4}()),
160+
(!isgap, AminoAcidAlphabet()),
161+
]
162+
yes, no = Any[], Any[]
163+
for i in A
164+
push!(f(i) ? yes : no, i)
165+
end
166+
seq = LongSequence{typeof(A)}(undef, 3)
167+
for i in eachindex(seq)
168+
seq[i] = first(no)
169+
end
170+
171+
@test isnothing(findfirst(f, seq))
172+
@test isnothing(findlast(f, seq))
173+
174+
# Test validity of all 'yes'
175+
for i in yes
176+
seq[2] = i
177+
@test findfirst(f, seq) === 2
178+
@test findlast(f, seq) === 2
179+
end
180+
181+
# Test validity of all 'no'
182+
for i in no
183+
seq[1] = i
184+
seq[3] = i
185+
@test findfirst(f, seq) === 2
186+
@test findlast(f, seq) === 2
187+
end
188+
end
189+
end
190+
191+
@testset "Noop find methods" begin
192+
for s in Any[randseq(DNAAlphabet{2}(), 256), randseq(RNAAlphabet{2}(), 256), SimpleSeq(randdnaseq(256))]
193+
@test isnothing(findfirst(isgap, s))
194+
@test isnothing(findlast(isgap, s))
195+
196+
@test isnothing(findfirst(isambiguous, s))
197+
@test isnothing(findlast(isambiguous, s))
198+
199+
@test isnothing(findfirst(!iscertain, s))
200+
@test isnothing(findlast(!iscertain, s))
201+
202+
for f in [iscertain, !isambiguous, !isgap]
203+
for i in [10, 41, 256, 1]
204+
@test findnext(f, s, i) === i
205+
@test findprev(f, s, i) === i
206+
@test_throws BoundsError findnext(f, s, 0)
207+
@test_throws BoundsError findprev(f, s, lastindex(s) + 1)
208+
@test isnothing(findnext(f, s, lastindex(s) + 1))
209+
@test isnothing(findprev(f, s, 0))
210+
end
211+
end
212+
end
213+
end
214+
end
215+
100216
@testset "Search" begin
101217
# 0000000001111
102218
# 1234567890123

0 commit comments

Comments
 (0)