I am trying to take a file with multiple entries and write it to a new file. The new file should contain stripped and split by comma data in separate list. Once I have done that I want to calculate the z score using row 3(observed) and row 4(expected). The z score formula I am using is Zi = (Observed i - Expected i )/sqrt(Expected i ).Then I want to add the Zscores to the list of data in the input file, which is where I am having trouble. I am using output_file = open(outpath,"w") but nothing is being written to the output file.
example input_file data: Ashe,1853282.679,1673876.66,1,2 Alleghany,1963178.059,1695301.229,0 ,1 Surry,2092564.258,1666785.835,5 ,6 Currituck,3464227.016,1699924.786,1 ,1 Northampton,3056933.525,1688585.272,9 ,3 Hertford,3180151.244,1670897.027,7 ,3 Camden,3403469.566,1694894.58,0 ,1 Gates,3264377.534,1704496.938,0 ,1 Warren,2851154.003,1672865.891,4 ,2
my code:
import os
from math import sqrt
def calculateZscore(inpath,outpath):
"Z score calc"
input_file = open(inpath,"r")
lines = input_file.readlines()
output_file = open(outpath,"w")
county = []
x_coor = []
y_coor = []
observed = []
expected = []
score = 0
result = 0
for line in lines:
row = line.split(',')
county.append(row[0].strip())
x_coor.append(row[1].strip())
y_coor.append(row[2].strip())
observed.append(int(row[3].strip()))
expected.append(int (row[4].strip()))
o = observed
e = expected
length_o = len(o)
length_e = len(e)
score = 0
for i in range(length_o):
score += (o[i] - e[i])
result += (score/(sqrt(e[i])))
def main():
"collects data for code "
workingDirec = raw_input("What is the working directory?")
original_file = raw_input("The input filename is?")
full_original = os.path.join(workingDirec,original_file)
chi_square = raw_input("The name of the chi-squared stats table file is?")
full_chi_square = os.path.join(workingDirec,chi_square)
output_file = raw_input ("What is the output filename?")
full_output = os.path.join(workingDirec,output_file)
calculateZscore(full_original,full_output)