1- # * iscertain + !iscertain
2- # Four bit: Certain bitcount OR enumerate_nibbles & 0x1111111 + tz
3- # AA: 0x00-0x15 or 0x1a
4- #
5-
6- # * isambiguous + !isambiguous
7- # Four bit: Ambiguous bitcount OR enumerate_nibbles & 0xEEE.. + tz
8- # AA: 0x16-0x19
9-
10- # Make 0 for any other bitpattern than 0x1, 0x2, 0x4, 0x8.
11-
121# Zeros out all nibbles except the ones with 4bit A,C,G or T
13- function iscertain_kernel (x:: UInt64 )
2+ function iscertain_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
143 x = enumerate_nibbles (x)
15- y = (x & 0x4444444444444444 ) >> 2
4+ y = (x & 0x4444444444444444 ) >> 2
165 y |= (x & 0x2222222222222222 ) >> 1
17- x & ~ y & 0x1111111111111111
6+ return x & ~ y & 0x1111111111111111
187end
198
20- # Zeros out all which is not a certain AA
21- # TODO : This is not efficient.
22- function iscertain_aa_kernel (x:: UInt64 )
23- y = reinterpret (NTuple{8 , UInt8}, x)
24- y = map (i -> ((i < 0x16 ) | (i == 0x1a )), y)
25- reinterpret (UInt64, y)
9+ # Zeros out all which is not a certain (normal or AA_Term)
10+ function iscertain_kernel (:: AminoAcidAlphabet , x:: UInt64 )
11+ # 1. Set normal to FF, others to 00
12+ y = simd_lt_byte (x, 0x16 )
13+
14+ # 2. Set Term to FF, others to 00
15+ z = set_zero_encoding (BitsPerSymbol {8} (), x ⊻ 0x1a1a1a1a1a1a1a1a ) * 0xFF
16+
17+ # 3: OR them
18+ return y | z
2619end
2720
21+ @inline function simd_lt_byte (x:: UInt64 , byte:: UInt8 )
22+ T = NTuple{8 , VecElement{UInt8}}
23+ x = reinterpret (T, x)
24+ y = ntuple (i -> VecElement (byte), Val {8} ())
25+ s = """
26+ %res = icmp ult <8 x i8> %0, %1
27+ %resb = sext <8 x i1> %res to <8 x i8>
28+ ret <8 x i8> %resb
29+ """
30+ z = Core. Intrinsics. llvmcall (s, NTuple{8 , VecElement{UInt8}}, Tuple{NTuple{8 , VecElement{UInt8}}, NTuple{8 , VecElement{UInt8}}}, x, y)
31+ reinterpret (UInt64, z)
32+ end
33+
34+ @inline function sub_byte (x:: UInt64 , byte:: UInt8 )
35+ T = NTuple{8 , VecElement{UInt8}}
36+ x = reinterpret (T, x)
37+ y = ntuple (i -> VecElement (byte), Val {8} ())
38+ s = """
39+ %res = sub <8 x i8> %0, %1
40+ ret <8 x i8> %res
41+ """
42+ z = Core. Intrinsics. llvmcall (s, NTuple{8 , VecElement{UInt8}}, Tuple{NTuple{8 , VecElement{UInt8}}, NTuple{8 , VecElement{UInt8}}}, x, y)
43+ reinterpret (UInt64, z)
44+ end
2845
2946# Zeros out all nibbles encoding 4bit A,C,G or T
30- uncertain_kernel (x:: UInt64 ) = enumerate_nibbles (x) ⊻ 0x1111111111111111
47+ uncertain_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 ) = enumerate_nibbles (x) ⊻ 0x1111111111111111
48+
49+ # Zero out normal AAs and AA_Term
50+ function uncertain_kernel (:: AminoAcidAlphabet , x:: UInt64 )
51+ # Zero out normal AA, set rest to 0xFF
52+ y = ~ simd_lt_byte (x, 0x16 )
53+
54+ # Zero out 0x1a, set rest to other bitpatterns
55+ z = x ⊻ 0x1a1a1a1a1a1a1a1a
56+
57+ return y & z
58+ end
3159
3260# Zeros out A, C, G, T or Gap
33- function ambiguous_kernel (x:: UInt64 )
61+ function ambiguous_kernel (A :: NucleicAcidAlphabet{4} , x:: UInt64 )
3462 # The y part makes every nibble 0xF, unless it's 0 to begin with
3563 y = x | (x >>> 2 )
3664 y |= y >>> 1
3765 y &= 0x1111111111111111
38- y *= 0xF
39- uncertain_kernel (x) & y
66+ y *= 0x0F
67+ return uncertain_kernel (A, x) & y
68+ end
69+
70+ # Zero out all except ambiguous symbols (AA_B, AA_J, AA_Z, AA_X), 0x16:0x19
71+ function ambiguous_kernel (:: AminoAcidAlphabet , x:: UInt64 )
72+ return simd_lt_byte (sub_byte (x, 0x16 ), 0x04 )
4073end
4174
4275# Zeros out all except A, C, G, T and Gap
43- function unambiguous_kernel (x:: UInt64 )
76+ function unambiguous_kernel (:: NucleicAcidAlphabet{4} , x:: UInt64 )
4477 y = enumerate_nibbles (x)
4578 y = (y >>> 1 ) | (y >>> 2 )
4679 y &= 0x1111111111111111
47- y ⊻= 0x1111111111111111
80+ return y ⊻= 0x1111111111111111
81+ end
82+
83+ # Zero out the four ambiguous amino acids B, J, Z, X
84+ function unambiguous_kernel (:: AminoAcidAlphabet , x:: UInt64 )
85+ return ~ simd_lt_byte (sub_byte (x, 0x16 ), 0x04 )
4886end
4987
88+
5089# For debug
5190#=
5291function make_all_nibbles()
@@ -56,4 +95,4 @@ function make_all_nibbles()
5695 end
5796 x
5897end
59- =#
98+ =#
0 commit comments