set date column to index date.time pandas python

Question

Problem is resolved.

I'm making a data frame from a several | seprated files. I read in my data, format my date column, and then set my date to datetime index. My desired output is a dataframe that is time stamped so that I can group by Time Grouper. When I run the code to timestamp the index I get an error that is included along with my code and out put without implementing the timestamp:

import numpy as np
import pandas as pd
import glob


df = pd.concat((pd.read_csv(f, sep='|', header=None, low_memory=False, names=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', \
                                                            '12', '13', 'date', '15', '16', '17', '18', '19', '20', \
                                                            '21', '22'], index_col=None, dtype={'date':str}) for f in \
                glob.glob('/home/jayaramdas/anaconda3/Thesis/FEC_data/itpas2_data/itpas2**.txt')))


df['date'].dropna()

df['date'] = pd.to_datetime(df['date'], format='%m%d%Y')

df1 = df.set_index('date')



print (df1)
               cmte_id trans_typ entity_typ state  amount 

    fec_id    cand_id
date                                                                          
2007-08-15  C00112250       24K        ORG    DC    2000  C00431569  P00003392
2007-09-26  C00119040       24K        CCM    FL    1000  C00367680  H2FL05127
2007-09-26  C00119040       24K        CCM    MD    1000  C00140715  H2MD05155

My error:

KeyError: 'date'
18 df2 = df1.set_index(pd.to_datetime(df1['date']), inplace=True)

My raw data:

C00112250|N|Q3|G|27931381854|24K|ORG|HILLARY CLINTON FOR PRESIDENT EXP. COMM.|WASHINGTON|DC|20013|||08152007|2000|C00431569|P00003392|71006.E7975|307490|||4101720071081637544
C00119040|N|Q3|G|27990795873|24K|CCM|FRIENDS OF GINNY BROWN-WAITE|BROOKSVILLE|FL|34605|||09262007|1000|C00367680|H2FL05127|SB21.4307|307491|||4101720071081637552
C00119040|N|Q3|G|27990795873|24K|CCM|HOYER FOR CONGRESS|CLINTON|MD|20735|||09262007|1000|C00140715|H2MD05155|SB21.4303|307491|||4101720071081637553

Why don't you just do df['date'] = pd.to_datetime(df['date'], format='%m%d%Y') and then df1 = df.set_index('date')? and skip all lines after this? — EdChum
– EdChum, Commented Mar 10, 2016 at 14:17
I just tried that and it works; but I also need the index to be time stamped and when I try to run the code for the time stamp, as above, I get a key error 'date' from df set index datetime code — Collective Action
– Collective Action, Commented Mar 10, 2016 at 14:27
I don't understand what is the desired output, can you edit your question — EdChum
– EdChum, Commented Mar 10, 2016 at 14:31
What code generates your error? If it's df2 = df1.set_index(pd.to_datetime(df1['date']), inplace=True) then yeah, you've set 'date' as index so it's no longer a column. — IanS
– IanS, Commented Mar 10, 2016 at 15:35
And I don't see the point of this, since df1 already is what you seem to be looking for in df2. — IanS
– IanS, Commented Mar 10, 2016 at 15:36

jezrael · Accepted Answer · 2016-03-12 09:07:15Z

I think you can use read_csv with parameter usecols for filtering columns and date_parser for set datetime:

import pandas as pd
import glob


dateparse = lambda x: pd.to_datetime(x, format='%m%d%Y')

#change path by your 
df = pd.concat((pd.read_csv(f, 
                            sep='|', 
                            header=None, 
                            names=['cmte_id', '2', '3', '4', '5', 'trans_typ', 'entity_typ', '8', '9', 'state', '11', 'employer', 'occupation', 'date', 'amount', 'fec_id', 'cand_id', '18', '19', '20', '21', '22'], 
                            usecols= ['date', 'cmte_id', 'trans_typ', 'entity_typ', 'state', 'employer', 'occupation', 'amount', 'fec_id', 'cand_id'],
                            parse_dates=[6],
                            date_parser=dateparse) for f in glob.glob('test/itpas2_data/itpas2**.txt')), ignore_index=True)

#reorder columns
df = df[['date', 'cmte_id', 'trans_typ', 'entity_typ', 'state', 'employer', 'occupation', 'amount', 'fec_id', 'cand_id']]

print df
        date    cmte_id trans_typ entity_typ state  employer  occupation  \
0 2007-08-15  C00112250       24K        ORG    DC       NaN         NaN   
1 2007-09-26  C00119040       24K        CCM    FL       NaN         NaN   
2 2007-09-26  C00119040       24K        CCM    MD       NaN         NaN   
3 2011-02-25  C00478404       24K        COM    MN       NaN         NaN   
4 2011-02-01  C00140855       24K        CCM    DC       NaN         NaN   
5 2011-02-01  C00140855       24K        CCM    DC       NaN         NaN   
6 2011-02-22  C00140855       24K        CCM    MD       NaN         NaN   
7 2011-02-28  C00093963       24K        CCM    ND       NaN         NaN   

   amount     fec_id    cand_id  
0    2000  C00431569  P00003392  
1    1000  C00367680  H2FL05127  
2    1000  C00140715  H2MD05155  
3    2400  C00326629  H8MN06047  
4    1000  C00373464  H2OH17109  
5    1000  C00289983  H4KY01040  
6    2500  C00140715  H2MD05155  
7    1000  C00474619  H0ND00135  

print df.dtypes
date          datetime64[ns]
cmte_id               object
trans_typ             object
entity_typ            object
state                 object
employer             float64
occupation           float64
amount                 int64
fec_id                object
cand_id               object
dtype: object

#if you need copy of column date to index
df.set_index(df['date'], inplace=True) 
print df
                 date    cmte_id trans_typ entity_typ state  employer  \
date                                                                    
2007-08-15 2007-08-15  C00112250       24K        ORG    DC       NaN   
2007-09-26 2007-09-26  C00119040       24K        CCM    FL       NaN   
2007-09-26 2007-09-26  C00119040       24K        CCM    MD       NaN   
2011-02-25 2011-02-25  C00478404       24K        COM    MN       NaN   
2011-02-01 2011-02-01  C00140855       24K        CCM    DC       NaN   
2011-02-01 2011-02-01  C00140855       24K        CCM    DC       NaN   
2011-02-22 2011-02-22  C00140855       24K        CCM    MD       NaN   
2011-02-28 2011-02-28  C00093963       24K        CCM    ND       NaN   

            occupation  amount     fec_id    cand_id  
date                                                  
2007-08-15         NaN    2000  C00431569  P00003392  
2007-09-26         NaN    1000  C00367680  H2FL05127  
2007-09-26         NaN    1000  C00140715  H2MD05155  
2011-02-25         NaN    2400  C00326629  H8MN06047  
2011-02-01         NaN    1000  C00373464  H2OH17109  
2011-02-01         NaN    1000  C00289983  H4KY01040  
2011-02-22         NaN    2500  C00140715  H2MD05155  
2011-02-28         NaN    1000  C00474619  H0ND00135

#if you DONT need copy of column date to index
df.set_index('date', inplace=True) 
print df
              cmte_id trans_typ entity_typ state  employer  occupation  \
date                                                                     
2007-08-15  C00112250       24K        ORG    DC       NaN         NaN   
2007-09-26  C00119040       24K        CCM    FL       NaN         NaN   
2007-09-26  C00119040       24K        CCM    MD       NaN         NaN   
2011-02-25  C00478404       24K        COM    MN       NaN         NaN   
2011-02-01  C00140855       24K        CCM    DC       NaN         NaN   
2011-02-01  C00140855       24K        CCM    DC       NaN         NaN   
2011-02-22  C00140855       24K        CCM    MD       NaN         NaN   
2011-02-28  C00093963       24K        CCM    ND       NaN         NaN   

            amount     fec_id    cand_id  
date                                      
2007-08-15    2000  C00431569  P00003392  
2007-09-26    1000  C00367680  H2FL05127  
2007-09-26    1000  C00140715  H2MD05155  
2011-02-25    2400  C00326629  H8MN06047  
2011-02-01    1000  C00373464  H2OH17109  
2011-02-01    1000  C00289983  H4KY01040  
2011-02-22    2500  C00140715  H2MD05155  
2011-02-28    1000  C00474619  H0ND00135

Collectives™ on Stack Overflow

set date column to index date.time pandas python

1 Answer 1

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Related