diff --git a/docs/src/sequence_search.md b/docs/src/sequence_search.md index ca49b70e..ac99ea99 100644 --- a/docs/src/sequence_search.md +++ b/docs/src/sequence_search.md @@ -250,6 +250,11 @@ julia> qa = PWMSearchQuery(motifs, 1.0); julia> findfirst(qa, subject) 3 +julia> findall(qa, subject) +3-element Vector{Int64}: + 3 + 5 + 9 ``` [Wasserman2004]: https://doi.org/10.1038/nrg1315 diff --git a/src/BioSequences.jl b/src/BioSequences.jl index aef223a9..4676840e 100644 --- a/src/BioSequences.jl +++ b/src/BioSequences.jl @@ -252,6 +252,50 @@ Base.eltype(::Type{<:Search{Q}}) where {Q<:HasRangeEltype} = UnitRange{Int} Base.eltype(::Type{<:Search}) = Int Base.IteratorSize(::Type{<:Search}) = Base.SizeUnknown() +""" + findall(pattern, sequence::BioSequence[,rng::UnitRange{Int}]; overlap::Bool=true)::Vector + +Find all occurrences of `pattern` in `sequence`. + +The return value is a vector of ranges of indices where the matching sequences were found. +If there are no matching sequences, the return value is an empty vector. + +The search is restricted to the specified range when `rng` is set. + +With the keyword argument `overlap` set as `true`, the start index for the next search gets set to the start of the current match plus one; if set to `false`, the start index for the next search gets set to the end of the current match plus one. +The default value for the keyword argument `overlap` is `true`. + +The `pattern` can be a `Biosymbol` or a search query. + +See also [`ExactSearchQuery`](@ref), [`ApproximateSearchQuery`](@ref), [`PWMSearchQuery`](@ref). + +# Examples +```jldoctest +julia> seq = dna"ACACACAC"; + +julia> findall(DNA_A, seq) +4-element Vector{Int64}: + 1 + 3 + 5 + 7 + +julia> findall(ExactSearchQuery(dna"ACAC"), seq) +3-element Vector{UnitRange{Int64}}: + 1:4 + 3:6 + 5:8 + +julia> findall(ExactSearchQuery(dna"ACAC"), seq; overlap=false) +2-element Vector{UnitRange{Int64}}: + 1:4 + 5:8 + +julia> findall(ExactSearchQuery(dna"ACAC"), seq, 2:7; overlap=false) +1-element Vector{UnitRange{Int64}}: + 3:6 +``` +""" function Base.findall(pat, seq::BioSequence; overlap::Bool = DEFAULT_OVERLAP) return collect(search(pat, seq; overlap)) end diff --git a/src/search/ExactSearchQuery.jl b/src/search/ExactSearchQuery.jl index 948960fd..fbf08ad7 100644 --- a/src/search/ExactSearchQuery.jl +++ b/src/search/ExactSearchQuery.jl @@ -28,6 +28,11 @@ julia> findnext(query, seq, 6) julia> findprev(query, seq, 7) 3:5 +julia> findall(query, seq) +2-element Vector{UnitRange{Int64}}: + 3:5 + 8:10 + julia> occursin(query, seq) true