11# Zeros out all nibbles except the ones with 4bit A,C,G or T
2- function iscertain_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
2+ @inline function iscertain_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
33 x = enumerate_nibbles (x)
44 y = (x & 0x4444444444444444 ) >> 2
55 y |= (x & 0x2222222222222222 ) >> 1
66 return x & ~ y & 0x1111111111111111
77end
88
99# Zeros out all which is not a certain (normal or AA_Term)
10- function iscertain_kernel (:: AminoAcidAlphabet , x:: UInt64 )
10+ @inline function iscertain_kernel (:: AminoAcidAlphabet , x:: UInt64 )
1111 # 1. Set normal to FF, others to 00
1212 y = simd_lt_byte (x, 0x16 )
1313
@@ -18,6 +18,16 @@ function iscertain_kernel(::AminoAcidAlphabet, x::UInt64)
1818 return y | z
1919end
2020
21+ function _findnext (:: typeof (iscertain), seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} , i:: Int )
22+ f = @inline u -> iscertain_kernel (Alphabet (seq), u)
23+ return _findnext_nonzero (f, seq, i)
24+ end
25+
26+ function _findprev (:: typeof (iscertain), seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} , i:: Int )
27+ f = @inline u -> iscertain_kernel (Alphabet (seq), u)
28+ return _findprev_nonzero (f, seq, i)
29+ end
30+
2131@inline function simd_lt_byte (x:: UInt64 , byte:: UInt8 )
2232 T = NTuple{8 , VecElement{UInt8}}
2333 x = reinterpret (T, x)
2838 ret <8 x i8> %resb
2939 """
3040 z = Core. Intrinsics. llvmcall (s, NTuple{8 , VecElement{UInt8}}, Tuple{NTuple{8 , VecElement{UInt8}}, NTuple{8 , VecElement{UInt8}}}, x, y)
31- reinterpret (UInt64, z)
41+ return reinterpret (UInt64, z)
3242end
3343
3444@inline function sub_byte (x:: UInt64 , byte:: UInt8 )
4050 ret <8 x i8> %res
4151 """
4252 z = Core. Intrinsics. llvmcall (s, NTuple{8 , VecElement{UInt8}}, Tuple{NTuple{8 , VecElement{UInt8}}, NTuple{8 , VecElement{UInt8}}}, x, y)
43- reinterpret (UInt64, z)
53+ return reinterpret (UInt64, z)
4454end
4555
4656# Zeros out all nibbles encoding 4bit A,C,G or T
47- uncertain_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 ) = enumerate_nibbles (x) ⊻ 0x1111111111111111
57+ @inline function uncertain_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
58+ return enumerate_nibbles (x) ⊻ 0x1111111111111111
59+ end
4860
4961# Zero out normal AAs and AA_Term
50- function uncertain_kernel (:: AminoAcidAlphabet , x:: UInt64 )
62+ @inline function uncertain_kernel (:: AminoAcidAlphabet , x:: UInt64 )
5163 # Zero out normal AA, set rest to 0xFF
5264 y = ~ simd_lt_byte (x, 0x16 )
5365
@@ -57,8 +69,26 @@ function uncertain_kernel(::AminoAcidAlphabet, x::UInt64)
5769 return y & z
5870end
5971
72+ function _findnext (
73+ :: ComposedFunction{typeof(!), typeof(iscertain)} ,
74+ seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} ,
75+ i:: Int ,
76+ )
77+ f = @inline u -> uncertain_kernel (Alphabet (seq), u)
78+ return _findnext_nonzero (f, seq, i)
79+ end
80+
81+ function _findprev (
82+ :: ComposedFunction{typeof(!), typeof(iscertain)} ,
83+ seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} ,
84+ i:: Int ,
85+ )
86+ f = @inline u -> uncertain_kernel (Alphabet (seq), u)
87+ return _findprev_nonzero (f, seq, i)
88+ end
89+
6090# Zeros out A, C, G, T or Gap
61- function ambiguous_kernel (A:: NucleicAcidAlphabet{4} , x:: UInt64 )
91+ @inline function ambiguous_kernel (A:: NucleicAcidAlphabet{4} , x:: UInt64 )
6292 # The y part makes every nibble 0xF, unless it's 0 to begin with
6393 y = x | (x >>> 2 )
6494 y |= y >>> 1
@@ -68,23 +98,50 @@ function ambiguous_kernel(A::NucleicAcidAlphabet{4}, x::UInt64)
6898end
6999
70100# Zero out all except ambiguous symbols (AA_B, AA_J, AA_Z, AA_X), 0x16:0x19
71- function ambiguous_kernel (:: AminoAcidAlphabet , x:: UInt64 )
101+ @inline function ambiguous_kernel (:: AminoAcidAlphabet , x:: UInt64 )
72102 return simd_lt_byte (sub_byte (x, 0x16 ), 0x04 )
73103end
74104
105+ function _findnext (:: typeof (isambiguous), seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} , i:: Int )
106+ f = @inline u -> ambiguous_kernel (Alphabet (seq), u)
107+ return _findnext_nonzero (f, seq, i)
108+ end
109+
110+ function _findprev (:: typeof (isambiguous), seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} , i:: Int )
111+ f = @inline u -> ambiguous_kernel (Alphabet (seq), u)
112+ return _findprev_nonzero (f, seq, i)
113+ end
114+
75115# Zeros out all except A, C, G, T and Gap
76- function unambiguous_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
116+ @inline function unambiguous_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
77117 y = enumerate_nibbles (x)
78118 y = (y >>> 1 ) | (y >>> 2 )
79119 y &= 0x1111111111111111
80120 return y ⊻= 0x1111111111111111
81121end
82122
83123# Zero out the four ambiguous amino acids B, J, Z, X
84- function unambiguous_kernel (:: AminoAcidAlphabet , x:: UInt64 )
124+ @inline function unambiguous_kernel (:: AminoAcidAlphabet , x:: UInt64 )
85125 return ~ simd_lt_byte (sub_byte (x, 0x16 ), 0x04 )
86126end
87127
128+ function _findnext (
129+ :: ComposedFunction{typeof(!), typeof(isambiguous)} ,
130+ seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} ,
131+ i:: Int ,
132+ )
133+ f = @inline u -> unambiguous_kernel (Alphabet (seq), u)
134+ return _findnext_nonzero (f, seq, i)
135+ end
136+
137+ function _findprev (
138+ :: ComposedFunction{typeof(!), typeof(isambiguous)} ,
139+ seq:: SeqOrView{<:Union{<:NucleicAcidAlphabet{4}, AminoAcidAlphabet}} ,
140+ i:: Int ,
141+ )
142+ f = @inline u -> unambiguous_kernel (Alphabet (seq), u)
143+ return _findprev_nonzero (f, seq, i)
144+ end
88145
89146# For debug
90147#=
0 commit comments