I am looping through csv files to append to a DataFrame table but it seems that every time I loop and append, there is an index column added to the Table. Very confusing and I am very stuck, any help would be great.
My code:
import sqlite3 as sql
import pandas as pd
import hashlib
import os
import csv
from pandas import ExcelWriter
def obtain_data(filename, connect, type):
writer =
ExcelWriter('path\\new_excel_sheets\\'+filename+'.xlsx')
table =
ExcelWriter('path\\new_excel_sheets\\hash_table.xlsx')
if type == True:
print(filename)
df = pd.DataFrame.from_csv('path'+filename,
index_col=None)
else:
workbook = pd.ExcelFile('path' + filename)
df = workbook.parse('Sheet1')
df = df.rename(columns={'INDEX': 'INDX'})
df = df.rename(columns={'Index': 'INDXS'})
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
hash_t = str(hashlib.md5(header_list.encode('utf-8')).hexdigest())
c = connect.cursor()
print(filename)
print(hash_t)
if hash_t == 'd22db04a2f009f222da57e91acdce21b':
next_open = df['DATE'][1]
next_open_value = df['DATE'][2]
df.insert(3, next_open, next_open_value)
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
new_hash_t = str(hashlib.md5(header_list.encode('utf-
8')).hexdigest())
df = df.drop(df.index[1:])
hashing = {str(new_hash_t): str(filename)}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append')
df.to_sql(name=new_hash_t, con=connect, if_exists='append')
except:
raise IndexError('Could not transform ' + str(filename) + ' into
database.')
elif hash_t == '484fbe4de83acb41480dd935d82d7fbe':
next_open = df['DATE'][1]
next_open_value = df['DATE'][2]
df.insert(3, next_open, next_open_value)
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
new_hash_t = str(hashlib.md5(header_list.encode('utf-
8')).hexdigest())
df = df.drop(df.index[2])
df['DATE'][1] = df['DATE'][0]
hashing = {new_hash_t: filename}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append')
df.to_sql(name=new_hash_t, con=connect, if_exists='append')
except:
raise IndexError('Could not transform ' + str(filename) + ' into
database.')
else:
hashing = {hash_t: filename}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append',
index=False)
df.to_sql(name=hash_t, con=connect, if_exists='append',
index=True)
except:
raise IndexError('Could not transform ' + str(filename) + '
into database.')
df.to_excel(writer)
print(filename + ' has been completed succesfully.')
final_results = {'df': df, 'hash_t': hash_t}
return final_results
csv_files = []
usable_files = []
for filename in os.listdir(filepath):
if filename.endswith(".xlsx"):
print('Found an XLSX file ' + str(filename))
usable_files.append(filename)
elif filename.endswith('.CSV'):
print('Found a CSV File ' + filename)
csv_files.append(filename)
else:
print('Found an unusable file ' + str(filename))
for file in usable_files:
connect = sql.connect(SQLite3 connection)
obtain_data(file, connect, False)
for file in csv_files:
connect = sql.connect(SQLite3 connection)
obtain_data(file, connect, True)
print('All files have been made into Tables')
The SQLite3 database does everything right, but when I append to it it adds an index column. I am not sure how to put index columns in here(feel free to teach me) so bear with me here. The table goes from looking like this
rowid, 0 , 1, 2, etc
0, value, value, value, etc
1, value, value, value, etc
but when I loop through(say 4 times), it changes to this
rowid, index, 0, 1, 2, etc
0, 0, 0, 0, 0, value
0, 0, 0, 0, 0, value
This is a very weird problem so any help would be appreciated, thanks!