This solution merges two or more files and fills missing/blank fields with "NA" (requires GNU awk):
awk 'BEGIN {
FS = OFS = "#"
PROCINFO["sorted_in"] = "@val_str_asc"
}
FNR == 1 {
filecount++
numfields[filecount] = NF
if (NR == 1) {
a = split($0, header, FS)
} else {
for (i = 2; i <= NF; i++) {
header[++a] = $i
}
}
}
FNR > 1 {
for (j = 2; j <= NF; j++) {
b[$1][filecount, j] = $j
}
}
END {
for (k = 1; k <= length(header); k++) {
printf "%s%s", header[k], ((k < length(header)) ? OFS : ORS)
}
for (l in b) {
printf "%s", l OFS
for (m = 1; m <= filecount; m++) {
for (n = 2; n <= numfields[m]; n++) {
printf "%s%s",
(b[l][m, n] == "" ? "NA" : b[l][m, n]),
((m + n < filecount + numfields[m]) ? OFS : ORS)
}
}
}
}' file*
1#a1#a2#a3#extra_field_1
2#b1#b2#b3#extra_field_2
3#c1#c2#c3#extra_field_3
4#d1#NA#d3#extra_field_4
5#NA#e2#e3#extra_field_5
6#f1#f2#NA#NA
Different example data:
head file*
==> file1 <==
ID,Value
A1,10
A2,20
A3,30
A4,40
==> file2 <==
ID,Score,Extra
A2,200,True
A1,100,False
==> file3 <==
ID,Evaluation
A1,Correct
A3,Incorrect
==> file4 <==
ID,Value1,Value2,Value3,Value4
A1,,1,1
A2,3,3,3,3
awk 'BEGIN {
FS = OFS = ","
PROCINFO["sorted_in"] = "@val_str_asc"
}
FNR == 1 {
filecount++
numfields[filecount] = NF
if (NR == 1) {
a = split($0, header, FS)
} else {
for (i = 2; i <= NF; i++) {
header[++a] = $i
}
}
}
FNR > 1 {
for (j = 2; j <= NF; j++) {
b[$1][filecount, j] = $j
}
}
END {
for (k = 1; k <= length(header); k++) {
printf "%s%s", header[k], ((k < length(header)) ? OFS : ORS)
}
for (l in b) {
printf "%s", l OFS
for (m = 1; m <= filecount; m++) {
for (n = 2; n <= numfields[m]; n++) {
printf "%s%s",
(b[l][m, n] == "" ? "NA" : b[l][m, n]),
((m + n < filecount + numfields[m]) ? OFS : ORS)
}
}
}
}' file1 file2 file3 file4
ID,Value,Score,Extra,Evaluation,Value1,Value2,Value3,Value4
A1,10,100,False,Correct,NA,1,1,NA
A2,20,200,True,NA,3,3,3,3
A3,30,NA,NA,Incorrect,NA,NA,NA,NA
A4,40,NA,NA,NA,NA,NA,NA,NA