For an assignment, I had to loop through a text file, and report any positions that a user entered sequence was found. Is there any way I can make this code more efficient, because it seems like I'm writing a lot of nested loops, which I've learned can be dangerous/complicated. Any and all help is appreciated!
import os #OperatingSystem
import sys #System
import re #Regex (Regular Expressions)
def checkSequenceCharacters(seq):
for c in seq:
if c != 'A' and c != 'G' and c != 'T' and c != 'C':
print("[!] ERROR: Not a valid sequence (A/G/T/C ONLY)")
sys.exit(0)
def checkSequenceLength(seq):
if len(seq) != 5:
print("[!] ERROR: Has to be exactally 5 letters long!")
sys.exit(0)
seq = raw_input("Enter a 5 basepair sequence using only A,G,T,C: \n")
checkSequenceCharacters(seq)
checkSequenceLength(seq)
input_file_path = os.getenv("HOME") + "/Desktop/sequencer/inputfile.txt"
output_file_path = os.getenv("HOME") + "/Desktop/sequencer/output/" + seq + ".txt"
try:
input_file = open(input_file_path)
output_file = open(output_file_path, 'w')
count = 0
line_location = 0
lines = []
for line in input_file:
line_location = line_location + 1
arr = re.findall(seq,line)
if arr:
x = arr[0]
if x == seq:
count = count + 1
lines.append(line)
print("[+] Sequence found at line " + str(line_location))
output_file.write("Line " + str(line_location) + ": " + line + "\n")
if count != 0:
print("[*] Matches: " + str(count))
print("[*] FILE CREATED AT: " + output_file_path)
else:
print("[!] ERROR: No matches found!")
finally:
input_file.close()
output_file.close()