Skip to main content
deleted 51 characters in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 118

Alternative GNU awk for the word-boundries ("\<" and "\>"):

gawkawk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( "\\<"muts[x]"\\>",?"muts[x]",?", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  for(r in muts) {
      if(muts[r] ~/^[Ss]/) printf("%s", sep muts[r]); sep="," }
  print ""; sep=""
}' fileB  fileA

Alternative GNU awk for the word-boundries ("\<" and "\>"):

gawk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( "\\<"muts[x]"\\>,", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  for(r in muts) {
      if(muts[r] ~/^[Ss]/) printf("%s", sep muts[r]); sep="," }
  print ""; sep=""
}' fileB  fileA

Alternative awk:

awk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( ",?"muts[x]",?", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  for(r in muts) {
      if(muts[r] ~/^[Ss]/) printf("%s", sep muts[r]); sep="," }
  print ""; sep=""
}' fileB  fileA
added 27 characters in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 118

Alternative GNU awk for the word-boundries ("\<" and "\>"):

gawk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( "\\<"muts[x]"\\>,", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  sep=""; for(r in muts) {
      if(muts[r] ~/^[Ss]/) printf("%s", sep muts[r]); sep="," }
  print """"; sep=""
}' fileB  fileA

Output:

id      clade   mutation        missing_mutation        remaining_mutation
243     40A     siti,toto,mumu  xixi,saxa       siti,mumu
254
267     40B     lala,sisi,sojo  huhu    sisi

Alternative GNU awk for the word-boundries ("\<" and "\>"):

gawk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( "\\<"muts[x]"\\>,", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  sep=""; for(r in muts) { printf("%s", sep muts[r]); sep="," }
  print ""
}' fileB  fileA

Output:

id      clade   mutation        missing_mutation        remaining_mutation
243     40A     siti,toto,mumu  xixi,saxa       siti,mumu
254
267     40B     lala,sisi,sojo  huhu    sisi

Alternative GNU awk for the word-boundries ("\<" and "\>"):

gawk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( "\\<"muts[x]"\\>,", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  for(r in muts) {
      if(muts[r] ~/^[Ss]/) printf("%s", sep muts[r]); sep="," }
  print ""; sep=""
}' fileB  fileA
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 118

Alternative GNU awk for the word-boundries ("\<" and "\>"):

gawk 'BEGIN{ FS=OFS="\t" }
NR==FNR{ mutations[$1] =$2; next }

FNR>1  {
         split($3, muts, "," );
         for(x in muts) { 
             if (gsub( "\\<"muts[x]"\\>,", "", mutations[$2])>0) delete muts[x] }
       }

FNR==1 { $4="missing_mutation"; $5="remaining_mutation" }

{ printf ("%s", $0 OFS mutations[$2] OFS );
  sep=""; for(r in muts) { printf("%s", sep muts[r]); sep="," }
  print ""
}' fileB  fileA

Output:

id      clade   mutation        missing_mutation        remaining_mutation
243     40A     siti,toto,mumu  xixi,saxa       siti,mumu
254
267     40B     lala,sisi,sojo  huhu    sisi