Skip to main content
added 22 characters in body
Source Link
Sergiy Kolodyazhnyy
  • 16.9k
  • 12
  • 58
  • 111

Possible suggestion for improvement ( and to address one of the things that Stephane has mentioned ) is to change if item.replace(",","").isdigit() line into if item.replace(",","").replace(".","").isdigit(). This will allow us to deal with floating point numbers (such as 3.1415) as well.

Lengthy ? Yes. Explicit ? Yes. Works ? Well, yes.

Lengthy ? Yes. Explicit ? Yes. Works ? Well, yes.

Possible suggestion for improvement ( and to address one of the things that Stephane has mentioned ) is to change if item.replace(",","").isdigit() line into if item.replace(",","").replace(".","").isdigit(). This will allow us to deal with floating point numbers (such as 3.1415) as well.

Lengthy ? Yes. Explicit ? Yes. Works ? Well, yes.

added 22 characters in body
Source Link
Sergiy Kolodyazhnyy
  • 16.9k
  • 12
  • 58
  • 111
 #!/usr/bin/env python3
import sys

with open(sys.argv[1]) as fd:
    for line in fd:
        # we're going to store everything into list of words
        # and record individual characters into 'item' string
        # and rebuild everything as we go.
        words = []
        item_found = False
        item = ""
        counter = 0
        for char in line:
            # if we see ( or [ we start recording chars
            # difference is that [ means item already been edited
            # so no need to do anything - just put it into words list
            # as is
            if char == "(" or char == "[":
                item_found = True
                counter = counter + 1
                continue

            if char == ")":
                item_found = False
                if item.replace(",","").isdigit():
                   words.append("[" + item + "]")
                else:
                   words.append("("+item+")")
                item = ""
 
            if char == "]":
              item_found = False
              item = item + char
              words.append("[" + item) 
              item = ""

            if item_found:
                item = item + char

        # if we didn't see any open brackets or no closing brackets
        # just print the line as is - otherwise give us the altered one
        if counter == 0 or item_found:
            print(line.strip())
        else:
            print(", ".join(words))
                
 

                
$ # original input file
$ cat input.txt
(1), (3), (1,2,3), (1,2,3,4,5,6,7,8,9), (Fig1) (Fig1,Fig2), (Table-1, Table-2)
(table-25),[1,2,3],(figure-35)
(figure-1),(figure-2)
$ # script output 
$ ./change_brackets.py input.txt                                                          
[1], [3], [1,2,3], [1,2,3,4,5,6,7,8,9], (Fig1), (Fig1,Fig2), (Table-1, Table-2)
(table-25), [1,2,3], (1,2,3]figurefigure-35)
(figure-1), (figure-2)
#!/usr/bin/env python3
import sys

with open(sys.argv[1]) as fd:
    for line in fd:
        # we're going to store everything into list of words
        # and record individual characters into 'item' string
        # and rebuild everything as we go.
        words = []
        item_found = False
        item = ""
        counter = 0
        for char in line:
            # if we see ( or [ we start recording chars
            # difference is that [ means item already been edited
            # so no need to do anything - just put it into words list
            # as is
            if char == "(" or char == "[":
                item_found = True
                counter = counter + 1
                continue

            if char == ")":
                item_found = False
                if item.replace(",","").isdigit():
                   words.append("[" + item + "]")
                else:
                   words.append("("+item+")")
                item = ""
 
            if char == "]":
              item_found = False
              item = item + char
              words.append("[" + item) 

            if item_found:
                item = item + char

        # if we didn't see any open brackets or no closing brackets
        # just print the line as is - otherwise give us the altered one
        if counter == 0 or item_found:
            print(line.strip())
        else:
            print(", ".join(words))
                
 

                
$ # original input file
$ cat input.txt
(1), (3), (1,2,3), (1,2,3,4,5,6,7,8,9), (Fig1) (Fig1,Fig2), (Table-1, Table-2)
(table-25),[1,2,3],(figure-35)
(figure-1),(figure-2)
$ # script output 
$ ./change_brackets.py input.txt                                                          
[1], [3], [1,2,3], [1,2,3,4,5,6,7,8,9], (Fig1), (Fig1,Fig2), (Table-1, Table-2)
(table-25), [1,2,3], (1,2,3]figure-35)
(figure-1), (figure-2)
 #!/usr/bin/env python3
import sys

with open(sys.argv[1]) as fd:
    for line in fd:
        # we're going to store everything into list of words
        # and record individual characters into 'item' string
        # and rebuild everything as we go.
        words = []
        item_found = False
        item = ""
        counter = 0
        for char in line:
            # if we see ( or [ we start recording chars
            # difference is that [ means item already been edited
            # so no need to do anything - just put it into words list
            # as is
            if char == "(" or char == "[":
                item_found = True
                counter = counter + 1
                continue

            if char == ")":
                item_found = False
                if item.replace(",","").isdigit():
                   words.append("[" + item + "]")
                else:
                   words.append("("+item+")")
                item = ""
 
            if char == "]":
              item_found = False
              item = item + char
              words.append("[" + item) 
              item = ""

            if item_found:
                item = item + char

        # if we didn't see any open brackets or no closing brackets
        # just print the line as is - otherwise give us the altered one
        if counter == 0 or item_found:
            print(line.strip())
        else:
            print(", ".join(words))
                

                
$ # original input file
$ cat input.txt
(1), (3), (1,2,3), (1,2,3,4,5,6,7,8,9), (Fig1) (Fig1,Fig2), (Table-1, Table-2)
(table-25),[1,2,3],(figure-35)
(figure-1),(figure-2)
$ # script output 
$ ./change_brackets.py input.txt                                                          
[1], [3], [1,2,3], [1,2,3,4,5,6,7,8,9], (Fig1), (Fig1,Fig2), (Table-1, Table-2)
(table-25), [1,2,3], (figure-35)
(figure-1), (figure-2)
added 740 characters in body
Source Link
Sergiy Kolodyazhnyy
  • 16.9k
  • 12
  • 58
  • 111
#!/usr/bin/env python3
import sys

with open(sys.argv[1]) as fd:
    for line in fd:
        # we're going to store everything into list of words
        # and record individual characters into 'item' string
        # and rebuild everything as we go.
        words = []
        item_found = False
        item = ""
        counter = 0
        for char in line:
            # if we see ( or [ we start recording chars
            # difference is that [ means item already been edited
            # so no need to do anything - just put it into words list
            # as is
            if char == "(" or char == "[":
                item_found = True
                counter = counter + 1
                continue

            if char == ")":
                item_found = False
                if item.replace(",","").isdigit():
                   words.append("[" + item + "]")
                else:
                   words.append("("+item+")")
                item = ""
 
            if char == "]":
              item_found = False
              item = item + char
              words.append("[" + item) 

            if item_found:
                item = item + char

        # if we didn't see any open brackets or no closing brackets
        # just print the line as is - otherwise give us the altered one
        if counter == 0 or item_found:
            print(line.strip())
        else:
            print(", ".join(words))
                


                

With 40,000 lines of text it performs fairly quick:

$ wc -l big_input.txt                                                                     
40000 big_input.txt
$ time ./change_brackets.py big_input.txt  > /dev/null                                    
    0m01.64s real     0m01.60s user     0m00.01s system

Lengthy ? Yes. Explicit ? Yes. Works ? Well, yes.

#!/usr/bin/env python3
import sys

with open(sys.argv[1]) as fd:
    for line in fd:
        # we're going to store everything into list of words
        # and record individual characters into 'item' string
        # and rebuild everything as we go.
        words = []
        item_found = False
        item = ""
        for char in line:
            # if we see ( or [ we start recording chars
            # difference is that [ means item already been edited
            # so no need to do anything - just put it into words list
            # as is
            if char == "(" or char == "[":
                item_found = True
                continue

            if char == ")":
                item_found = False
                if item.replace(",","").isdigit():
                   words.append("[" + item + "]")
                else:
                   words.append("("+item+")")
                item = ""
 
            if char == "]":
              item_found = False
              item = item + char
              words.append("[" + item) 

            if item_found:
                item = item + char
        print(", ".join(words))
                
#!/usr/bin/env python3
import sys

with open(sys.argv[1]) as fd:
    for line in fd:
        # we're going to store everything into list of words
        # and record individual characters into 'item' string
        # and rebuild everything as we go.
        words = []
        item_found = False
        item = ""
        counter = 0
        for char in line:
            # if we see ( or [ we start recording chars
            # difference is that [ means item already been edited
            # so no need to do anything - just put it into words list
            # as is
            if char == "(" or char == "[":
                item_found = True
                counter = counter + 1
                continue

            if char == ")":
                item_found = False
                if item.replace(",","").isdigit():
                   words.append("[" + item + "]")
                else:
                   words.append("("+item+")")
                item = ""
 
            if char == "]":
              item_found = False
              item = item + char
              words.append("[" + item) 

            if item_found:
                item = item + char

        # if we didn't see any open brackets or no closing brackets
        # just print the line as is - otherwise give us the altered one
        if counter == 0 or item_found:
            print(line.strip())
        else:
            print(", ".join(words))
                


                

With 40,000 lines of text it performs fairly quick:

$ wc -l big_input.txt                                                                     
40000 big_input.txt
$ time ./change_brackets.py big_input.txt  > /dev/null                                    
    0m01.64s real     0m01.60s user     0m00.01s system

Lengthy ? Yes. Explicit ? Yes. Works ? Well, yes.

Source Link
Sergiy Kolodyazhnyy
  • 16.9k
  • 12
  • 58
  • 111
Loading