import pandas
np = pandas.np
class DataFrame2(pandas.DataFrame):
def __init__( self, *args, **kwargs ):
pandas.DataFrame.__init__(self, *args, **kwargs)
def get_lines_standard(self):
"""standard way to convert pandas dataframe
to lines with fomrmatted column spacing"""
lines = self.to_string(index=False).split('\n')
return lines
def get_lines_fast_unstruct(self):
""" lighter version of pandas.DataFrame.to_string()
with no special spacing format"""
df_recs = self.to_records(index=False)
col_titles = [' '.join(list(self))]
col_data = map(lambda rec:' '.join( map(str,rec) ),
df_recs.tolist())
lines = col_titles + col_data
return lines
def get_lines_fast_struct(self,col_space=1):
""" lighter version of pandas.DataFrame.to_string()
with special spacing format"""
df_recs = self.to_records(index=False) # convert dataframe to array of records
str_data = map(lambda rec: map(str,rec), df_recs ) # map each element to string
self.space = map(lambda x:len(max(x,key=len))+col_space, # returns the max string length in each column as a list
zip(*str_data))
col_titles = [self._format_line(list(self))]
col_data = [self._format_line(row ) for row in str_data ]
lines = col_titles + col_data
return lines
def _format_line(self, row_vals):
"""row_vals: list of strings.
Adds variable amount of white space to each
list entry and returns a single string"""
line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) ) # takes dataframe row entries and adds white spaces based on a format
line = ''.join(line_val_gen)
return line
import random
import numpy
#SOME TEST DATA
df = DataFrame2({'A':npnumpy.random.randint(0,1000,1000),
'B':npnumpy.random.random(1000),
'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16'])
for _ in range(1000)]})