1

I am looping through csv files to append to a DataFrame table but it seems that every time I loop and append, there is an index column added to the Table. Very confusing and I am very stuck, any help would be great.

My code:

import sqlite3 as sql
import pandas as pd
import hashlib
import os
import csv
from pandas import ExcelWriter


def obtain_data(filename, connect, type):
    writer = 

    ExcelWriter('path\\new_excel_sheets\\'+filename+'.xlsx')
    table = 
     ExcelWriter('path\\new_excel_sheets\\hash_table.xlsx')
    if type == True:
        print(filename)
        df = pd.DataFrame.from_csv('path'+filename, 
index_col=None)
    else:
        workbook = pd.ExcelFile('path' + filename)
        df = workbook.parse('Sheet1')
    df = df.rename(columns={'INDEX': 'INDX'})
    df = df.rename(columns={'Index': 'INDXS'})
     headers = df.dtypes.index
    header_list = str(headers.tolist())
    header_list = ''.join(header_list)
    hash_t = str(hashlib.md5(header_list.encode('utf-8')).hexdigest())
    c = connect.cursor()
    print(filename)
    print(hash_t)
    if hash_t == 'd22db04a2f009f222da57e91acdce21b':
        next_open = df['DATE'][1]
        next_open_value = df['DATE'][2]
        df.insert(3, next_open, next_open_value)
        headers = df.dtypes.index
        header_list = str(headers.tolist())
        header_list = ''.join(header_list)
        new_hash_t = str(hashlib.md5(header_list.encode('utf-
        8')).hexdigest())
        df = df.drop(df.index[1:])
        hashing = {str(new_hash_t): str(filename)}
        df2 = pd.DataFrame.from_dict(hashing, orient='index')
        try:
            df2.to_sql(name='Hash Table', con=connect, if_exists='append')
            df.to_sql(name=new_hash_t, con=connect, if_exists='append')
        except:
            raise IndexError('Could not transform ' + str(filename) + ' into 
            database.')
    elif hash_t == '484fbe4de83acb41480dd935d82d7fbe':
        next_open = df['DATE'][1]
        next_open_value = df['DATE'][2]
        df.insert(3, next_open, next_open_value)
        headers = df.dtypes.index
        header_list = str(headers.tolist())
        header_list = ''.join(header_list)
        new_hash_t = str(hashlib.md5(header_list.encode('utf-
        8')).hexdigest())
        df = df.drop(df.index[2])
        df['DATE'][1] = df['DATE'][0]
        hashing = {new_hash_t: filename}
        df2 = pd.DataFrame.from_dict(hashing, orient='index')
        try:
            df2.to_sql(name='Hash Table', con=connect, if_exists='append')
            df.to_sql(name=new_hash_t, con=connect, if_exists='append')
        except:
            raise IndexError('Could not transform ' + str(filename) + ' into 
             database.')
    else:
        hashing = {hash_t: filename}
        df2 = pd.DataFrame.from_dict(hashing, orient='index')
        try:
            df2.to_sql(name='Hash Table', con=connect, if_exists='append', 
            index=False)
            df.to_sql(name=hash_t, con=connect, if_exists='append', 
              index=True)
        except:
             raise IndexError('Could not transform ' + str(filename) + ' 
                 into database.')
    df.to_excel(writer)
    print(filename + ' has been completed succesfully.')
    final_results = {'df': df, 'hash_t': hash_t}
    return final_results

csv_files = []
usable_files = []
for filename in os.listdir(filepath):
    if filename.endswith(".xlsx"):
        print('Found an XLSX file ' + str(filename))
        usable_files.append(filename)
    elif filename.endswith('.CSV'):
        print('Found a CSV File ' + filename)
        csv_files.append(filename)
    else:
        print('Found an unusable file ' + str(filename))


for file in usable_files:
    connect = sql.connect(SQLite3 connection)
    obtain_data(file, connect, False)
for file in csv_files:
    connect = sql.connect(SQLite3 connection)
    obtain_data(file, connect, True)
print('All files have been made into Tables')

The SQLite3 database does everything right, but when I append to it it adds an index column. I am not sure how to put index columns in here(feel free to teach me) so bear with me here. The table goes from looking like this

rowid, 0 , 1, 2, etc
0, value, value, value, etc
1, value, value, value, etc

but when I loop through(say 4 times), it changes to this

rowid, index, 0, 1, 2, etc
0, 0, 0, 0, 0, value
0, 0, 0, 0, 0, value

This is a very weird problem so any help would be appreciated, thanks!

1
  • Consider posting less code next time isolating your problem with a verified example. -1 Commented Aug 11, 2017 at 19:28

1 Answer 1

1

Simply set index parameter to False in all to_sql() calls (by default parameter is set to True):

df2.to_sql(name='Hash Table', con=connect, if_exists='append', index=False)

And any flat file outputs:

df.to_excel(writer, index=False)

df.to_csv(filename, index=False)
Sign up to request clarification or add additional context in comments.

1 Comment

I confirm that this solution has worked in my case

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.