Skip to main content
edited tags
Link
200_success
  • 145.7k
  • 22
  • 191
  • 481
Source Link
Ben A
  • 10.8k
  • 5
  • 40
  • 103

Looping through file finding sequences

For an assignment, I had to loop through a text file, and report any positions that a user entered sequence was found. Is there any way I can make this code more efficient, because it seems like I'm writing a lot of nested loops, which I've learned can be dangerous/complicated. Any and all help is appreciated!

import os #OperatingSystem
import sys #System
import re #Regex (Regular Expressions)

def checkSequenceCharacters(seq):
    for c in seq:
        if c != 'A' and c != 'G' and c != 'T' and c != 'C':
            print("[!] ERROR: Not a valid sequence (A/G/T/C ONLY)")
            sys.exit(0)

def checkSequenceLength(seq):
    if len(seq) != 5:
        print("[!] ERROR: Has to be exactally 5 letters long!")
        sys.exit(0)

seq = raw_input("Enter a 5 basepair sequence using only A,G,T,C: \n")

checkSequenceCharacters(seq)
checkSequenceLength(seq)

input_file_path = os.getenv("HOME") + "/Desktop/sequencer/inputfile.txt"
output_file_path = os.getenv("HOME") + "/Desktop/sequencer/output/" + seq + ".txt"

try:
    input_file = open(input_file_path)
    output_file = open(output_file_path, 'w')
    count = 0
    line_location = 0
    lines = []
    for line in input_file:
        line_location = line_location + 1
        arr = re.findall(seq,line)
        if arr:
            x = arr[0]
            if x == seq:
                count = count + 1
                lines.append(line)
                print("[+] Sequence found at line " + str(line_location))
                output_file.write("Line " + str(line_location) + ": " + line + "\n")
    if count != 0:
        print("[*] Matches: " + str(count))
        print("[*] FILE CREATED AT: " + output_file_path)
    else:
        print("[!] ERROR: No matches found!")
finally:
    input_file.close()
    output_file.close()