Given some CSV data in data.csv,
A;B;C
1;2;3
4;5;6
-1.2;3;3.3
the following script would calculate the sum of the column named by the colname variable given on the command line:
BEGIN {
FS = ";"
if (colname == "") {
print "Did not get column name (colname) to work with" >"/dev/stderr"
exit 1
}
}
FNR == 1 {
colnum = 0
for (i = 1; i <= NF; ++i)
if ($i == colname) {
colnum = i
break
}
if (colnum == 0) {
printf "Did not find named column (colname = \"%s\")\n", colname >"/dev/stderr"
exit 1
}
sum = 0
next
}
{
sum += $colnum
}
END {
print sum
}
Testing it:
$ awk -v colname='A' -f script.awk data.csv
3.8
$ awk -v colname='B' -f script.awk data.csv
10
$ awk -v colname='C' -f script.awk data.csv
12.3
$ awk -v colname='D' -f script.awk data.csv
Did not find named column (colname = "D")
Shorter variant of the script without so much error checking:
BEGIN { FS = ";" }
FNR == 1 {
for (i = 1; i <= NF; ++i)
if ($i == colname) break
if (i > NF) exit 1
next
}
{ sum += $i }
END { print sum }
or, as a "one-liner":
$ awk -v colname='A' -F ';' 'FNR == 1 { for (i = 1; i <= NF; ++i) if ($i == colname) break; if (i > NF) exit 1; next } { sum += $i } END { print sum }' data.csv