I am aware, that libraries exist for parsing python code, however, for the sake of learning how they parse errors, I'm creating a script that checks a file for only 6 Pep8 errors just for reference.
This is how my current 6 Pep8 functions look (they append an issue to the issues list if the issue was found)
"""
[S001] Line is longer than 79 characters
[S002] Indentation is not a multiple of four
[S003] Unnecessary semicolon after a statement (note, semicolons are admissible in comments)
[S004] At least two spaces before inline comments required
[S005] TODO found (only in comments; the case does not matter)
[S006] More than two blank lines used before this line (must be output for the first non-empty line)
"""
def S001(self, ln_num: int, line: str):
if len(line) > 79:
self.issues.append(f"Line {ln_num}: S001 Too Long")
def S002(self, ln_num: int, line: str):
indentation_length = len(line) - len(line.lstrip())
if indentation_length % 4 != 0:
self.issues.append(f"Line {ln_num}: S002 Indentation is not a multiple of four")
def S003(self, ln_num: int, line: str):
regex1 = re.compile("(.*)((;(\s)*#)|(;$))")
regex2 = re.compile("#.*;")
if regex1.search(line) and not regex2.search(line):
self.issues.append(f"Line {ln_num}: S003 Unnecessary semicolon")
def S004(self, ln_num: int, line: str):
regex = re.compile("(([^ ]{2})|(\s[^ ])|([^ ]\s))#")
if regex.search(line):
self.issues.append(f"Line {ln_num}: S004 At least two spaces before inline comments required")
def S005(self, ln_num: int, line: str):
regex = re.compile("#(.*)todo", flags=re.IGNORECASE)
if regex.search(line):
self.issues.append(f"Line {ln_num}: S005 TODO found")
def S006(self, ln_num: int, line: str):
if self.code[ln_num-4:ln_num-1] == ['', '', ''] and line != "":
self.issues.append(f"Line {ln_num}: S006 More than two blank lines used before this line")
Testcases:
""" Test case 1 """
print('What\'s your name?') # reading an input
name = input();
print(f'Hello, {name}'); # here is an obvious comment: this prints greeting with a name
very_big_number = 11_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000
print(very_big_number)
def some_fun():
print('NO TODO HERE;;')
pass; # Todo something
""" END """
""" Test Case 2 """
print('hello')
print('hello');
print('hello');;;
print('hello'); # hello
# hello hello hello;
greeting = 'hello;'
print('hello') # ;
""" END """
""" Test Case 3 """
print('hello')
print('hello') # TODO
print('hello') # TODO # TODO
# todo
# TODO just do it
print('todo')
print('TODO TODO')
todo()
todo = 'todo'
""" END """
""" Test Case 4 """
print("hello")
print("bye")
print("check")
""" END """
""" Test Case 5 """
print('hello!')
# just a comment
print('hello!') #
print('hello!') # hello
print('hello!') # hello
print('hello!')# hello
""" END """
Testcase 1 Expected Output:
Line 1: S004 At least two spaces before inline comment required
Line 2: S003 Unnecessary semicolon
Line 3: S001 Too long
Line 3: S003 Unnecessary semicolon
Line 6: S001 Too long
Line 11: S006 More than two blank lines used before this line
Line 13: S003 Unnecessary semicolon
Line 13: S004 At least two spaces before inline comment required
Line 13: S005 TODO found
I am aware my code, is not optimal and doesn't satisfy every edge case, but I want an idea, on how they parse the errors properly. I would like improvements or better ideas on how to parse for errors since I personally don't like my answers.