We can use python, with the zip_longest method from the itertools module to interleave the columnar lists.
python3 -c 'import sys, itertools as it
fs,rs = "\t","\n"
ofs,ors = fs,rs
t = ()
with open(sys.argv[1]) as f:
for nr,rec in enumerate(f):
F = rec.rstrip(rs).split(fs)
if not nr:
LoL = [[e] for e in F]
else:
for idx,el in enumerate(F):
if el == "": continue
if not len(t):
LoL[idx].append(el)
t = el,
else:
p = LoL[idx]
t = p[-1].replace("(",")").split(")")
if t[0] == el:
p[-1] = "%s(%d)" % (el,(int(t[1])+1 if len(t) > 1 else 2))
else: p.append(el)
# output
for tup in it.zip_longest(*sorted(LoL,reverse=True,key=len),fillvalue=""):
print(*tup,sep=ofs)
' file
Output:
b c a d e
11 22 11(2) 56 11(4)
44 56 12 89
56 78(2) 22(2) 91
60(3) 91
91(2) 98
95
The above assumes that the elements in each column don't come repeated later on down in that column.
Should this scenario not hold, we could use the following approach which combines the ordering of list, uniquifying nature of a set, and the list method count that enumerates how many times a given element was present in the input list.
python3 -c 'import sys, itertools as it
fs,rs = "\t","\n"
ofs = fs
with open(sys.argv[1]) as f:
for nr,_ in enumerate(f):
F = _.rstrip(rs).split(fs)
if not nr:
LoL = [[e] for e in F]
else:
for i,e in enumerate(F):
if len(e): LoL[i] += [e]
for idx in range(len(LoL)):
s = set(LoL[idx])
l = []
for el in LoL[idx]:
if el in s:
s -= {el}
k = LoL[idx].count(el)
if k > 1: el += f"({k})"
l += [el]
LoL[idx] = l
for t in it.zip_longest(*sorted(LoL,key=len,reverse=True),fillvalue=""):
print(*t,sep=ofs)
' file