Python Pandas DataFrame adds index column when appending

Question

I am looping through csv files to append to a DataFrame table but it seems that every time I loop and append, there is an index column added to the Table. Very confusing and I am very stuck, any help would be great.

My code:

import sqlite3 as sql
import pandas as pd
import hashlib
import os
import csv
from pandas import ExcelWriter


def obtain_data(filename, connect, type):
    writer = 

    ExcelWriter('path\\new_excel_sheets\\'+filename+'.xlsx')
    table = 
     ExcelWriter('path\\new_excel_sheets\\hash_table.xlsx')
    if type == True:
        print(filename)
        df = pd.DataFrame.from_csv('path'+filename, 
index_col=None)
    else:
        workbook = pd.ExcelFile('path' + filename)
        df = workbook.parse('Sheet1')
    df = df.rename(columns={'INDEX': 'INDX'})
    df = df.rename(columns={'Index': 'INDXS'})
     headers = df.dtypes.index
    header_list = str(headers.tolist())
    header_list = ''.join(header_list)
    hash_t = str(hashlib.md5(header_list.encode('utf-8')).hexdigest())
    c = connect.cursor()
    print(filename)
    print(hash_t)
    if hash_t == 'd22db04a2f009f222da57e91acdce21b':
        next_open = df['DATE'][1]
        next_open_value = df['DATE'][2]
        df.insert(3, next_open, next_open_value)
        headers = df.dtypes.index
        header_list = str(headers.tolist())
        header_list = ''.join(header_list)
        new_hash_t = str(hashlib.md5(header_list.encode('utf-
        8')).hexdigest())
        df = df.drop(df.index[1:])
        hashing = {str(new_hash_t): str(filename)}
        df2 = pd.DataFrame.from_dict(hashing, orient='index')
        try:
            df2.to_sql(name='Hash Table', con=connect, if_exists='append')
            df.to_sql(name=new_hash_t, con=connect, if_exists='append')
        except:
            raise IndexError('Could not transform ' + str(filename) + ' into 
            database.')
    elif hash_t == '484fbe4de83acb41480dd935d82d7fbe':
        next_open = df['DATE'][1]
        next_open_value = df['DATE'][2]
        df.insert(3, next_open, next_open_value)
        headers = df.dtypes.index
        header_list = str(headers.tolist())
        header_list = ''.join(header_list)
        new_hash_t = str(hashlib.md5(header_list.encode('utf-
        8')).hexdigest())
        df = df.drop(df.index[2])
        df['DATE'][1] = df['DATE'][0]
        hashing = {new_hash_t: filename}
        df2 = pd.DataFrame.from_dict(hashing, orient='index')
        try:
            df2.to_sql(name='Hash Table', con=connect, if_exists='append')
            df.to_sql(name=new_hash_t, con=connect, if_exists='append')
        except:
            raise IndexError('Could not transform ' + str(filename) + ' into 
             database.')
    else:
        hashing = {hash_t: filename}
        df2 = pd.DataFrame.from_dict(hashing, orient='index')
        try:
            df2.to_sql(name='Hash Table', con=connect, if_exists='append', 
            index=False)
            df.to_sql(name=hash_t, con=connect, if_exists='append', 
              index=True)
        except:
             raise IndexError('Could not transform ' + str(filename) + ' 
                 into database.')
    df.to_excel(writer)
    print(filename + ' has been completed succesfully.')
    final_results = {'df': df, 'hash_t': hash_t}
    return final_results

csv_files = []
usable_files = []
for filename in os.listdir(filepath):
    if filename.endswith(".xlsx"):
        print('Found an XLSX file ' + str(filename))
        usable_files.append(filename)
    elif filename.endswith('.CSV'):
        print('Found a CSV File ' + filename)
        csv_files.append(filename)
    else:
        print('Found an unusable file ' + str(filename))


for file in usable_files:
    connect = sql.connect(SQLite3 connection)
    obtain_data(file, connect, False)
for file in csv_files:
    connect = sql.connect(SQLite3 connection)
    obtain_data(file, connect, True)
print('All files have been made into Tables')

The SQLite3 database does everything right, but when I append to it it adds an index column. I am not sure how to put index columns in here(feel free to teach me) so bear with me here. The table goes from looking like this

rowid, 0 , 1, 2, etc
0, value, value, value, etc
1, value, value, value, etc

but when I loop through(say 4 times), it changes to this

rowid, index, 0, 1, 2, etc
0, 0, 0, 0, 0, value
0, 0, 0, 0, 0, value

This is a very weird problem so any help would be appreciated, thanks!

Consider posting less code next time isolating your problem with a verified example. -1 — Anton vBR
– Anton vBR, Commented Aug 11, 2017 at 19:28

Parfait · Accepted Answer · 2017-08-11 19:01:28Z

1

Simply set index parameter to False in all to_sql() calls (by default parameter is set to True):

df2.to_sql(name='Hash Table', con=connect, if_exists='append', index=False)

And any flat file outputs:

df.to_excel(writer, index=False)

df.to_csv(filename, index=False)

answered Aug 11, 2017 at 19:01

Parfait

108k19 gold badges103 silver badges138 bronze badges

Sign up to request clarification or add additional context in comments.

1 Comment

Bilal Over a year ago

I confirm that this solution has worked in my case

Collectives™ on Stack Overflow

Python Pandas DataFrame adds index column when appending

1 Answer 1

1 Comment

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

1 Comment

Your Answer

Sign up or log in

Post as a guest

Related