edited title

Link

edited Jul 22, 2015 at 16:26

77H3jjuu

123
6

File parser to get extract data from text file

deleted 28 characters in body; edited title

Source Link

edited Jul 22, 2015 at 16:04

Jamal

35.2k
13
134
238

file File parser to get extract data from text file

I am trying to extract the data from input file and store it for plotting. I have tested this code for a few files of same format. I I am not sure if the code works correctly with the little change in input file (like more blank spaces in between). I might have also done some terrible mistakes which iI cannot findoutfind out while testing. Basically iI need this code to be bug free.

Is there any better way of doing this task, improvements and better ways of parsing the file.

Thanks in advance.Is there any better way of doing this task, improvements and better ways of parsing the file?

codeCode:

Post Reopened by Mast♦, Simon Forsberg

occurred Jul 22, 2015 at 15:12

deleted 186 characters in body

Source Link

edited Jul 22, 2015 at 13:57

77H3jjuu

123
6

    def parseFile(input, output, mode):
        infile  = open(input)
        outfile = open(output, mode)
    
        for line in infile:
            if re.match("random seed", line):
                tokens      = re.search(r'random seed = (.*)', line, re.M|re.I)
                seed        = tokens.group(1)
                #print seed
    
            if re.match("input", line):
                tokens      = re.search(r'(.*)n = (.*), m = (.*), k = (.*) \[(.*) ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
                n           = tokens.group(2).strip()
                m           = tokens.group(3).strip()
                k           = tokens.group(4).strip()
                t           = tokens.group(5).strip()
                inPeak      = tokens.group(6).strip()
                inCurr      = tokens.group(7).strip()
                #print n, m, k, t, peak, curr
    
            if re.match("root build", line):
                tokens      = re.search(r'root build (.*)\[zero: (.*)ms\] \[pos: (.*)ms\] \[adj: (.*)ms\] \[adjsort: (.*)ms\] \[shade: (.*)ms\] done. \[(.*)ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
                zero        = tokens.group(2).strip()
                pos         = tokens.group(3).strip()
                adj         = tokens.group(4).strip()
                adjSort     = tokens.group(5).strip()
                shade       = tokens.group(6).strip()
                rTotalTime  = tokens.group(7).strip()
                rPeak       = tokens.group(8).strip()
                rCurr       = tokens.group(9).strip()
                #print zero, pos, adj, adjSort, shade, rTotalTime, rPeak, rCurr
    
            if re.match("oracle:", line):
                tokens  = re.search(r'oracle: (.*) (.*)ms \[ (.*)GiB (.*)GiB/s (.*)GHz (.*)GHz\] (.) -- (.*)', line, re.M|re.L);
                sum         = tokens.group(1).strip()
                oracleTime  = tokens.group(2).strip()
                inSize      = tokens.group(3).strip()
                transRate   = tokens.group(4).strip()
                mulRate     = tokens.group(5).strip()
                instrRate   = tokens.group(6).strip()
                yes         = tokens.group(7).strip()
                kpath       = tokens.group(8).strip()
                #print sum, time, inSize, transRate, mulRate, instrRate, yes, kpath

    for line in infile:
        if re.match("random seed", line):
            tokens      = re.search(r'random seed = (.*)', line, re.M|re.I)
            seed        = tokens.group(1)
            #print seed

        if re.match("input", line):
            tokens      = re.search(r'(.*)n = (.*), m = (.*), k = (.*) \[(.*) ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
            n           = tokens.group(2).strip()
            m           = tokens.group(3).strip()
            k           = tokens.group(4).strip()
            t           = tokens.group(5).strip()
            inPeak      = tokens.group(6).strip()
            inCurr      = tokens.group(7).strip()
            #print n, m, k, t, peak, curr

        if re.match("root build", line):
            tokens      = re.search(r'root build (.*)\[zero:(.*)ms\] \[pos:(.*)ms\] \[adj:(.*)ms\] \[adjsort:(.*)ms\] \[shade:(.*)ms\] done. \[(.*)ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
            zero        = tokens.group(2).strip()
            pos         = tokens.group(3).strip()
            adj         = tokens.group(4).strip()
            adjSort     = tokens.group(5).strip()
            shade       = tokens.group(6).strip()
            rTotalTime  = tokens.group(7).strip()
            rPeak       = tokens.group(8).strip()
            rCurr       = tokens.group(9).strip()
            #print zero, pos, adj, adjSort, shade, rTotalTime, rPeak, rCurr

        if re.match("oracle:", line):
            tokens  = re.search(r'oracle: (.*) (.*)ms \[ (.*)GiB (.*)GiB/s (.*)GHz (.*)GHz\] (.) -- (.*)', line, re.M|re.L);
            sum         = tokens.group(1).strip()
            oracleTime  = tokens.group(2).strip()
            inSize      = tokens.group(3).strip()
            transRate   = tokens.group(4).strip()
            mulRate     = tokens.group(5).strip()
            instrRate   = tokens.group(6).strip()
            yes         = tokens.group(7).strip()
            kpath       = tokens.group(8).strip()
            #print sum, time, inSize, transRate, mulRate, instrRate, yes, kpath

        if re.match("command done", line):
            tokens      = re.search(r'command done \[(.*)ms\]', line, re.M|re.L)
            cmdTime     = tokens.group(1).strip()
            #print cmdTime

        if re.match("grand total", line, re.M|re.L):
            tokens      = re.search(r'grand total \[(.*)ms\] {peak: (.*)GiB}(.*)', line, re.M|re.L)
            totalTime   = tokens.group(1).strip()
            gPeak       = tokens.group(2).strip()
            #print totalTime

        if re.match("host", line):
            tokens      = re.search(r'host: (.*)', line, re.M|re.L)
            host        = tokens.group(1).strip()
            #print host
        if re.match("build:", line):
            tokens      = re.search(r'build: (.*) with(.*)', line, re.M|re.L)
            build       = tokens.group(1).strip()
            #print build

    outfile.write("%s %s %s "% (host, yes, kpath))
    outfile.write(" %6d %8d %2d %10d %8.2lf %8.2lf %8.2lf"%
                    (int(n), int(m), int(k), int(seed), float(t), float(inPeak),
                    float(inCurr)))
    outfile.write(" %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf"%
                    (float(zero), float(pos), float(adj), float(adjSort),
                    float(shade), float(rTotalTime), float(rPeak), float(rCurr)))
    outfile.write(" %s %10.2f %6.3lf %7.2lf %7.2lf %7.2lf"%
                    (sum, float(oracleTime), float(inSize), float(transRate),
                    float(mulRate), float(instrRate)))
    outfile.write(" %8.2lf %8.2lf %8.2lf"%
                    (float(cmdTime), float(totalTime), float(gPeak)))
    outfile.write(" %s\n"% (build))

    #close files
    outfile.close()
    infile.close()
#end of parseFile

    def parseFile(input, output, mode):
        infile  = open(input)
        outfile = open(output, mode)
    
        for line in infile:
            if re.match("random seed", line):
                tokens      = re.search(r'random seed = (.*)', line, re.M|re.I)
                seed        = tokens.group(1)
                #print seed
    
            if re.match("input", line):
                tokens      = re.search(r'(.*)n = (.*), m = (.*), k = (.*) \[(.*) ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
                n           = tokens.group(2).strip()
                m           = tokens.group(3).strip()
                k           = tokens.group(4).strip()
                t           = tokens.group(5).strip()
                inPeak      = tokens.group(6).strip()
                inCurr      = tokens.group(7).strip()
                #print n, m, k, t, peak, curr
    
            if re.match("root build", line):
                tokens      = re.search(r'root build (.*)\[zero: (.*)ms\] \[pos: (.*)ms\] \[adj: (.*)ms\] \[adjsort: (.*)ms\] \[shade: (.*)ms\] done. \[(.*)ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
                zero        = tokens.group(2).strip()
                pos         = tokens.group(3).strip()
                adj         = tokens.group(4).strip()
                adjSort     = tokens.group(5).strip()
                shade       = tokens.group(6).strip()
                rTotalTime  = tokens.group(7).strip()
                rPeak       = tokens.group(8).strip()
                rCurr       = tokens.group(9).strip()
                #print zero, pos, adj, adjSort, shade, rTotalTime, rPeak, rCurr
    
            if re.match("oracle:", line):
                tokens  = re.search(r'oracle: (.*) (.*)ms \[ (.*)GiB (.*)GiB/s (.*)GHz (.*)GHz\] (.) -- (.*)', line, re.M|re.L);
                sum         = tokens.group(1).strip()
                oracleTime  = tokens.group(2).strip()
                inSize      = tokens.group(3).strip()
                transRate   = tokens.group(4).strip()
                mulRate     = tokens.group(5).strip()
                instrRate   = tokens.group(6).strip()
                yes         = tokens.group(7).strip()
                kpath       = tokens.group(8).strip()
                #print sum, time, inSize, transRate, mulRate, instrRate, yes, kpath

        if re.match("command done", line):
            tokens      = re.search(r'command done \[(.*)ms\]', line, re.M|re.L)
            cmdTime     = tokens.group(1).strip()
            #print cmdTime

        if re.match("grand total", line, re.M|re.L):
            tokens      = re.search(r'grand total \[(.*)ms\] {peak: (.*)GiB}(.*)', line, re.M|re.L)
            totalTime   = tokens.group(1).strip()
            gPeak       = tokens.group(2).strip()
            #print totalTime

        if re.match("host", line):
            tokens      = re.search(r'host: (.*)', line, re.M|re.L)
            host        = tokens.group(1).strip()
            #print host
        if re.match("build:", line):
            tokens      = re.search(r'build: (.*) with(.*)', line, re.M|re.L)
            build       = tokens.group(1).strip()
            #print build

    outfile.write("%s %s %s "% (host, yes, kpath))
    outfile.write(" %6d %8d %2d %10d %8.2lf %8.2lf %8.2lf"%
                    (int(n), int(m), int(k), int(seed), float(t), float(inPeak),
                    float(inCurr)))
    outfile.write(" %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf"%
                    (float(zero), float(pos), float(adj), float(adjSort),
                    float(shade), float(rTotalTime), float(rPeak), float(rCurr)))
    outfile.write(" %s %10.2f %6.3lf %7.2lf %7.2lf %7.2lf"%
                    (sum, float(oracleTime), float(inSize), float(transRate),
                    float(mulRate), float(instrRate)))
    outfile.write(" %8.2lf %8.2lf %8.2lf"%
                    (float(cmdTime), float(totalTime), float(gPeak)))
    outfile.write(" %s\n"% (build))

    #close files
    outfile.close()
    infile.close()
#end of parseFile

def parseFile(input, output, mode):
    infile  = open(input)
    outfile = open(output, mode)

    for line in infile:
        if re.match("random seed", line):
            tokens      = re.search(r'random seed = (.*)', line, re.M|re.I)
            seed        = tokens.group(1)
            #print seed

        if re.match("input", line):
            tokens      = re.search(r'(.*)n = (.*), m = (.*), k = (.*) \[(.*) ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
            n           = tokens.group(2).strip()
            m           = tokens.group(3).strip()
            k           = tokens.group(4).strip()
            t           = tokens.group(5).strip()
            inPeak      = tokens.group(6).strip()
            inCurr      = tokens.group(7).strip()
            #print n, m, k, t, peak, curr

        if re.match("root build", line):
            tokens      = re.search(r'root build (.*)\[zero:(.*)ms\] \[pos:(.*)ms\] \[adj:(.*)ms\] \[adjsort:(.*)ms\] \[shade:(.*)ms\] done. \[(.*)ms\] {peak:(.*)GiB} {curr:(.*)GiB}', line, re.M|re.L)
            zero        = tokens.group(2).strip()
            pos         = tokens.group(3).strip()
            adj         = tokens.group(4).strip()
            adjSort     = tokens.group(5).strip()
            shade       = tokens.group(6).strip()
            rTotalTime  = tokens.group(7).strip()
            rPeak       = tokens.group(8).strip()
            rCurr       = tokens.group(9).strip()
            #print zero, pos, adj, adjSort, shade, rTotalTime, rPeak, rCurr

        if re.match("oracle:", line):
            tokens  = re.search(r'oracle: (.*) (.*)ms \[ (.*)GiB (.*)GiB/s (.*)GHz (.*)GHz\] (.) -- (.*)', line, re.M|re.L);
            sum         = tokens.group(1).strip()
            oracleTime  = tokens.group(2).strip()
            inSize      = tokens.group(3).strip()
            transRate   = tokens.group(4).strip()
            mulRate     = tokens.group(5).strip()
            instrRate   = tokens.group(6).strip()
            yes         = tokens.group(7).strip()
            kpath       = tokens.group(8).strip()
            #print sum, time, inSize, transRate, mulRate, instrRate, yes, kpath

        if re.match("command done", line):
            tokens      = re.search(r'command done \[(.*)ms\]', line, re.M|re.L)
            cmdTime     = tokens.group(1).strip()
            #print cmdTime

        if re.match("grand total", line, re.M|re.L):
            tokens      = re.search(r'grand total \[(.*)ms\] {peak:(.*)GiB}(.*)', line, re.M|re.L)
            totalTime   = tokens.group(1).strip()
            gPeak       = tokens.group(2).strip()
            #print totalTime

        if re.match("host", line):
            tokens      = re.search(r'host:(.*)', line, re.M|re.L)
            host        = tokens.group(1).strip()
            #print host
        if re.match("build:", line):
            tokens      = re.search(r'build:(.*) with(.*)', line, re.M|re.L)
            build       = tokens.group(1).strip()
            #print build

    outfile.write("%s %s %s "% (host, yes, kpath))
    outfile.write(" %6d %8d %2d %10d %8.2lf %8.2lf %8.2lf"%
                    (int(n), int(m), int(k), int(seed), float(t), float(inPeak),
                    float(inCurr)))
    outfile.write(" %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf %8.2lf"%
                    (float(zero), float(pos), float(adj), float(adjSort),
                    float(shade), float(rTotalTime), float(rPeak), float(rCurr)))
    outfile.write(" %s %10.2f %6.3lf %7.2lf %7.2lf %7.2lf"%
                    (sum, float(oracleTime), float(inSize), float(transRate),
                    float(mulRate), float(instrRate)))
    outfile.write(" %8.2lf %8.2lf %8.2lf"%
                    (float(cmdTime), float(totalTime), float(gPeak)))
    outfile.write(" %s\n"% (build))

    #close files
    outfile.close()
    infile.close()
#end of parseFile

Post Closed as "Not suitable for this site" by Simon Forsberg

occurred Jul 22, 2015 at 13:16

edited tags

Link

edited Jul 22, 2015 at 13:10

77H3jjuu

123
6

Loading

Source Link

asked Jul 22, 2015 at 11:00

77H3jjuu

123
6

Loading

Stack Exchange Network

Return to Question

File parser to get extract data from text file

file File parser to get extract data from text file

file parser to get extract data from text file

File parser to get extract data from text file