-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBLASTscript.py
More file actions
94 lines (76 loc) · 4.53 KB
/
BLASTscript.py
File metadata and controls
94 lines (76 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#########################################################################################
## Extract sequence from random two samples created by https://web.expasy.org/randseq/ ##
#########################################################################################
seq1 = "SEQUENCE 200 AA; 22287 MW; CA76AFAD7E9B646F CRC64; RNYAILMLVP FSFGVKDLRA ILGELLRRNI YQKSTEFFHN PKVRMHLRDK PLGMGSHLQD PYGRAGSKDI QIAVISELEV RATICVETFF WIVENAGVLQ DYIKSDAENP DRSEHSVSWK LEATSANGKV AFYRANLPKP YVGTIIGLGQ DGELTSKRAE CNDLQDTASQ KGPEPGLVEA VCFDLIKYSA MVTTTQMLTH"
seq2 = "SEQUENCE 100 AA; 11150 MW; 86E46D87BAFAF057 CRC64; RKYLKLHLET ILSTDRPPFY LNKRGLEPKV SDRIQMEEGV DIAASQSLNQ FASQMNLAYK TVCDLRSVRV FATKIEVILV WGEPRPLDGG AFGITGAHES DYIKSDAENP DRSEHSVSWK LEATSANGKV AFYRANLPKP PLGMGSHLQD PYGRAGSKDI QIAVISELEV RATICVETFF WIVENAGVLQ MVTTTQMLTH"
posSeq = 7 # Position in list where the acids start
# Split string into parts (list) to extract amino acids starting at "posSeq"
seq1_acids = seq1.split()[posSeq:]
seq2_acids = seq2.split()[posSeq:]
# Merge list elements back to single string
seq1_acids_merged = ''.join(seq1_acids[:])
seq2_acids_merged = ''.join(seq2_acids[:])
################################################
## Input range to compare and comparison mode ##
################################################
n_acids = len(seq1_acids_merged) # Amount of amino acids
print("\nLength of sequence: ", n_acids)
print("Input starting position for comparison:")
comp_start = int(input())
print("Input ending position for comparison:")
comp_end = int(input())
print("Start set to:", comp_start, "\n" "End set to:", comp_end)
n_acids_interval = range(comp_start - 1, comp_end - 1) # Interval for search
print("\nAvailable comparison modes:\n",
"(1) Search for matching nucleotides / amino acids\n",
"(2) Search for mutated nucleosides / amino acids\n",
"(3) Search for specific nucleoside / amino acid sequence\n",
"Input 1,2 or 3:")
comp_mode = int(input()) # Input comparison mode 1, 2 or 3
##########################################
## Compare sequences and output results ##
##########################################
def find_sim_seq(s1, s2, interval):
ind_matches = []
all_matches = []
for i in interval:
if s1[i] == s2[i]: # Compare for match
ind_matches.append(i) # Append position to list with all matches
match = [s1[i], s2[i]]
position = [match[0][0], str(i + 1), match[1][0]] # Combine code with position
match = ''.join(position[:]) # Get rid of whitespace for correct code displaying
all_matches.append(match) # Append to list with matching code + position
amount_matches = len(ind_matches) # Total amount of matches
return amount_matches, all_matches
def find_mut_seq(s1, s2, interval):
ind_mutations = []
all_mutations = []
for i in interval:
if s1[i] != s2[i]: # Compare for mutations
ind_mutations.append(i) # Append position to list with all mutations
match = [s1[i], s2[i]]
position = [match[0][0], str(i + 1), match[1][0]] # Combine code with position
match = ''.join(position[:]) # Get rid of whitespace for correct code displaying
all_mutations.append(match) # Append to list with mutated code + position
amount_mutations = len(ind_mutations) # Total amount of mutations
return amount_mutations, all_mutations
if comp_mode == 1: # Find sequence matches
print("Search for matching sequences initiated.\nGenerating results...")
X = find_sim_seq(seq1_acids_merged, seq2_acids_merged, n_acids_interval)
print("\nMatches found in the sequence interval:\n", X[1])
print("\nTotal amount of matches:", X[0])
elif comp_mode == 2: # Find mutations
print("Search for mutated sequences initiated.\nGenerating results...")
X = find_mut_seq(seq1_acids_merged, seq2_acids_merged, n_acids_interval)
print("\nMutations found in the sequence interval:\n", X[1])
print("\nTotal amount of mutations:", X[0])
elif comp_mode == 3: # Find defined sequence
print("Input defined sequence to search for in 'seq1'.\nE.g.: 'LN' or 'ATC'")
seq_input = input() # Input sequence string
print("Search for defined sequence initiated.\nGenerating results...")
if seq1.find(seq_input, comp_start, comp_end) != -1: # Search for defined sequence
print("\nPositive,", "'", seq_input, "'", "found in sequence!")
else:
print("\nNegative,", "'", seq_input, "'", "NOT found in sequence!")
else:
print("Wrong input. Restart script and type: '1', '2' or '3'")