I am new to parsing in python . I want to parse the following type of text
value one = 5
value two = 10
%some text here
value three = 15
%some text
value one = 12
value two = 13
%some text here
value three = 11 .. and this goes on I want to extract .value one. .value two. and .value three. and arrange them in a tabular format for processing. Any ideas on how to do it
I tried the following till now. It gives me error: local value value two referenced before assignment
import re
import pandas as pd
val_dict = { 'value_one':re.compile(r'value one = (?P<value_one>.*)\n'),
'value_two':re.compile(r'value two = (?P<value_two>.*)\n'),
'value_three':re.compile(r'value three = (?P<value_three>.*)\n')}
def _parse_line(line):
for key, val in val_dict.items():
match = val.search(line)
if match:
return key, match
# if there are no matches
return None, None
def parse_file(filepath):
data = []
with open(filepath, 'r') as file_object:
line = file_object.readline()
while line:
key, match = _parse_line(line)
if key == 'value_one':
value_one = match.group('value_one')
value_one = int(value_one)
if key == 'value_two':
value_two = match.group('value_two')
value_two = int(value_two)
if key == 'value_three':
value_three = match.group('value_three')
value_three = int(value_three)
row = {
'value one': value_one,
'value two': value_two,
'value three': value_three
}
# append the dictionary to the data list
data.append(row)
line = file_object.readline()
data = pd.DataFrame(data)
data.set_index(['value one', 'value two', 'value three'], inplace=True)
data = data.groupby(level=data.index.names).first()
data = data.apply(pd.to_numeric, errors='ignore')
return data
if __name__ == '__main__':
filepath = 'test3.txt'
data = parse_file(filepath)
_parse_lineand how you are managing thedictsreturned by the same