Save the script below as script.py and run it like this:
python3 script.py input.html
This script parses the HTML and checks for the attributes (width and cellpadding). The advantage of this approach is that if you change the formatting of the HTML file it will still work because the script parses the HTML instead of relying on exact string matching.
from html.parser import HTMLParser
import sys
def print_tag(tag, attrs, end=False):
line = "<"
if end:
line += "/"
line += tag
for attr, value in attrs:
line += " " + attr + '="' + value + '"'
print(line + ">", end="")
if len(sys.argv) < 2:
print("ERROR: expected argument - filename")
sys.exit(1)
with open(sys.argv[1], 'r', encoding='cp1252') as content_file:
content = content_file.read()
do_print = False
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
global do_print
if tag == "table":
if ("width", "100%") in attrs and ("cellpadding", "4") in attrs:
do_print = True
if do_print:
print_tag(tag, attrs)
def handle_endtag(self, tag):
global do_print
if do_print:
print_tag(tag, attrs=(), end=True)
if tag == "table":
do_print = False
def handle_data(self, data):
global do_print
if do_print:
print(data, end="")
parser = MyHTMLParser()
parser.feed(content)