Revisions to sed replace with exception list

added 194 characters in body

Source Link

edited Aug 19, 2021 at 18:20

36k
6
25
60

Using any awk in any shell on every Unix box and using literal string operations so we don't care about any regexp or backreference metachars in the input or exceptions list:

$ cat tst.awk
NR==FNR {
    map[$0]mask[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in mapmask) {
        while ( s=index($0,exception) ) {
            $0 = substr($0,1,s-1) map[exception]mask[exception] substr($0,s+length(exception))
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        while ( s=index($0,map[exception]mask[exception]) ) {
            $0 = substr($0,1,s-1) exception substr($0,s+length(map[exception]mask[exception]))
        }
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y when masking the exceptions in the first loop.

Using any awk in any shell on every Unix box and using literal string operations so we don't care about any regexp or backreference metachars in the input or exceptions list:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        while ( s=index($0,exception) ) {
            $0 = substr($0,1,s-1) map[exception] substr($0,s+length(exception))
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        while ( s=index($0,map[exception]) ) {
            $0 = substr($0,1,s-1) exception substr($0,s+length(map[exception]))
        }
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y when masking the exceptions in the first loop.

Using any awk in any shell on every Unix box and using literal string operations so we don't care about any regexp or backreference metachars in the input or exceptions list:

$ cat tst.awk
NR==FNR {
    mask[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in mask) {
        while ( s=index($0,exception) ) {
            $0 = substr($0,1,s-1) mask[exception] substr($0,s+length(exception))
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        while ( s=index($0,mask[exception]) ) {
            $0 = substr($0,1,s-1) exception substr($0,s+length(mask[exception]))
        }
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y when masking the exceptions in the first loop.

added 194 characters in body

Source Link

edited Aug 19, 2021 at 18:15

Ed Morton

36k
6
25
60

Using any awk in any shell on every Unix box and using literal string operations so we don't care about any regexp or backreference metachars in the input or exceptions list:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        gsubwhile (/[^^\\]/,"[&]" s=index($0,exception)
  ) {
      gsub(/\^/,"\\^",exception)
      $0 = gsubsubstr(/\\/$0,"\\\\"1,exceptions-1)
        ifmap[exception] substr( gsub$0,s+length(exception,map[exception]) ) {
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        gsubwhile ( s=index($0,map[exception]) ) {
            $0 = substr($0,1,s-1) exception substr($0,s+length(map[exception]))
        }
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y when masking the exceptions in the first loop.

Using any awk in any shell on every Unix box:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        gsub(/[^^\\]/,"[&]",exception)
         gsub(/\^/,"\\^",exception)
        gsub(/\\/,"\\\\",exception)
        if ( gsub(exception,map[exception]) ) {
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        gsub(map[exception],exception)
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y in the first loop.

Using any awk in any shell on every Unix box and using literal string operations so we don't care about any regexp or backreference metachars in the input or exceptions list:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        while ( s=index($0,exception) ) {
            $0 = substr($0,1,s-1) map[exception] substr($0,s+length(exception))
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        while ( s=index($0,map[exception]) ) {
            $0 = substr($0,1,s-1) exception substr($0,s+length(map[exception]))
        }
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y when masking the exceptions in the first loop.

added 150 characters in body

Source Link

edited Aug 19, 2021 at 17:54

Ed Morton

36k
6
25
60

Using any awk in any shell on every Unix box:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        gsub(/[^^\\]/,"[&]",exception)
        gsub(/\^/,"\\^",exception)
        gsub(/\\/,"\\\\",exception)
        if ( gsub(exception,map[exception]) ) {
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        gsub(map[exception],exception)
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have overlapping exceptions that are substrings of other exceptions like dankfoo and dankdankfoo - ifsince you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer stringssuperstrings come first. It can be handledbefore the shorter substrings and iterate on them in the script too but I'm not going to do that sinceorder they were input so you don't have any cases like thatreplace xdankdankfooy with xdank<replacement>y instead of x<replacement>y in your sample input/outputthe first loop.

Using any awk in any shell on every Unix box:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        if ( gsub(exception,map[exception]) ) {
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        gsub(map[exception],exception)
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have overlapping exceptions like dankfoo and dankdankfoo - if you do then make sure the exceptions file is sorted such that the longer strings come first. It can be handled in the script too but I'm not going to do that since you don't have any cases like that in your sample input/output.

Using any awk in any shell on every Unix box:

$ cat tst.awk
NR==FNR {
    map[$0] = RS NR RS
    next
}
{
    delete changed
    for (exception in map) {
        gsub(/[^^\\]/,"[&]",exception)
        gsub(/\^/,"\\^",exception)
        gsub(/\\/,"\\\\",exception)
        if ( gsub(exception,map[exception]) ) {
            changed[exception]
        }
    }

    gsub(/dank/,"monk")

    for (exception in changed) {
        gsub(map[exception],exception)
    }

    print
}

$ awk -f tst.awk exceptions file
xdankine remonkus
dankzwd
monke monkbe
testmonk

The above assumes you don't have exceptions that are substrings of other exceptions like dankfoo and dankdankfoo since you don't show cases like that in the example in your question. If you do then make sure the exceptions file is sorted such that the longer superstrings come before the shorter substrings and iterate on them in the order they were input so you don't replace xdankdankfooy with xdank<replacement>y instead of x<replacement>y in the first loop.

added 321 characters in body

Source Link

edited Aug 19, 2021 at 17:39

Ed Morton

36k
6
25
60

Loading

added 74 characters in body

Source Link

edited Aug 19, 2021 at 17:20

Ed Morton

36k
6
25
60

Loading

Source Link

answered Aug 19, 2021 at 17:11

Ed Morton

36k
6
25
60

Loading

Stack Exchange Network

Return to Answer