I'm trying to find the three most populated city parts(BYDEL) in 1992
I have a csv file looking like this: http://data.kk.dk/dataset/9070067f-ab57-41cd-913e-bc37bfaf9acd/resource/9fbab4aa-1ee0-4d25-b2b4-b7b63537d2ec/download/befkbhalderkoencivst.csv>
The csv file can be explained as:
AAR: Which year the observation was made
BYDEL: Which part of the city, described by an integer contained in following dict; 1=Indre By, 2=Østerbro, 3=Nørrebro, 4=Vesterbro/Kgs. Enghave, 5=Valby, 6=Vanløse, 7=Brønshøj-Husum, 8=Bispebjerg, 9=Amager Øst, 10=Amager Vest, 99=Udenfor inddeling
ALDER: The age of the observed people
PERSONER: Number of observations with the given features of the row
I have a solution but it is very repetitive and i think that it could be done smarter but I don't have enough experience with python. Could anyone point me in the right direction?
My code/solution looks like this:
df = pd.read_csv('befkbh.csv',quotechar='"',skipinitialspace=True, delimiter=',', encoding='latin1').fillna(0)
data = df.as_matrix()
Q31 = collections.defaultdict(list)
Q32 = collections.defaultdict(list)
Q33 = collections.defaultdict(list)
Q34 = collections.defaultdict(list)
Q35 = collections.defaultdict(list)
Q36 = collections.defaultdict(list)
Q37 = collections.defaultdict(list)
Q38 = collections.defaultdict(list)
Q39 = collections.defaultdict(list)
Q310 = collections.defaultdict(list)
Q399 = collections.defaultdict(list)
for row in data:
key = row[0]
if key == "" or key == 0: continue
if key == 1992:
if row[2] == 1:
val = 0 if(row[5]) =="" else float(row[5])
Q31.setdefault(key,[]).append(val)
if row[2] == 2:
val = 0 if(row[5]) =="" else float(row[5])
Q32.setdefault(key,[]).append(val)
if row[2] == 3:
val = 0 if(row[5]) =="" else float(row[5])
Q33.setdefault(key,[]).append(val)
if row[2] == 4:
val = 0 if(row[5]) =="" else float(row[5])
Q34.setdefault(key,[]).append(val)
if row[2] == 5:
val = 0 if(row[5]) =="" else float(row[5])
Q35.setdefault(key,[]).append(val)
if row[2] == 6:
val = 0 if(row[5]) =="" else float(row[5])
Q36.setdefault(key,[]).append(val)
if row[2] == 7:
val = 0 if(row[5]) =="" else float(row[5])
Q37.setdefault(key,[]).append(val)
if row[2] == 8:
val = 0 if(row[5]) =="" else float(row[5])
Q38.setdefault(key,[]).append(val)
if row[2] == 9:
val = 0 if(row[5]) =="" else float(row[5])
Q39.setdefault(key,[]).append(val)
if row[2] == 10:
val = 0 if(row[5]) =="" else float(row[5])
Q310.setdefault(key,[]).append(val)
if row[2] == 99:
val = 0 if(row[5]) =="" else float(row[5])
Q399.setdefault(key,[]).append(val)
Q312 = {}
for k, v in Q31.items(): Q312[k] = sum(v)
for k, v in Q312.items(): print ("{}:{}".format(k,v))
Q322 = {}
for k, v in Q32.items(): Q322[k] = sum(v)
for k, v in Q322.items(): print ("{}:{}".format(k,v))
Q332 = {}
for k, v in Q33.items(): Q332[k] = sum(v)
for k, v in Q332.items(): print ("{}:{}".format(k,v))
Q342 = {}
for k, v in Q34.items(): Q342[k] = sum(v)
for k, v in Q342.items(): print ("{}:{}".format(k,v))
Q352 = {}
for k, v in Q35.items(): Q352[k] = sum(v)
for k, v in Q352.items(): print ("{}:{}".format(k,v))
Q362 = {}
for k, v in Q36.items(): Q362[k] = sum(v)
for k, v in Q362.items(): print ("{}:{}".format(k,v))
Q372 = {}
for k, v in Q37.items(): Q372[k] = sum(v)
for k, v in Q372.items(): print ("{}:{}".format(k,v))
Q382 = {}
for k, v in Q38.items(): Q382[k] = sum(v)
for k, v in Q382.items(): print ("{}:{}".format(k,v))
Q392 = {}
for k, v in Q39.items(): Q392[k] = sum(v)
for k, v in Q392.items(): print ("{}:{}".format(k,v))
Q3102 = {}
for k, v in Q310.items(): Q3102[k] = sum(v)
for k, v in Q3102.items(): print ("{}:{}".format(k,v))
Q3992 = {}
for k, v in Q399.items(): Q3992[k] = sum(v)
for k, v in Q3992.items(): print ("{}:{}".format(k,v))