Skip to main content
deleted 11 characters in body
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
df.get_lines_standard()[:5] # first five rows in dataframe
#[u'   A         B       C',
# u' 504  0.924385      <3',
# u' 388  0.285854    0.16',
# u' 984  0.254156    0.16',
# u' 446  0.472621  PANDAS']
# ...

df.get_lines_fast_struct()[:5] 
#['   A                 B      C',
# ' 504      0.9243853594     <3',
# ' 388    0.285854082778   0.16',
# ' 984    0.254155910401   0.16',
# ' 446    0.472621088021 PANDAS']
# ...
 
df.get_lines_fast_unstruct()[:5]
#['A B C',
# '504 0.9243853594 <3',
# '388 0.285854082778 0.16',
# '984 0.254155910401 0.16',
# '446 0.472621088021 PANDAS']
# ...
df.get_lines_standard()[:5] # first five rows in dataframe
#[u'   A         B       C',
# u' 504  0.924385      <3',
# u' 388  0.285854    0.16',
# u' 984  0.254156    0.16',
# u' 446  0.472621  PANDAS']

df.get_lines_fast_struct()[:5] 
#['   A                 B      C',
# ' 504      0.9243853594     <3',
# ' 388    0.285854082778   0.16',
# ' 984    0.254155910401   0.16',
# ' 446    0.472621088021 PANDAS']

df.get_lines_fast_unstruct()[:5]
#['A B C',
# '504 0.9243853594 <3',
# '388 0.285854082778 0.16',
# '984 0.254155910401 0.16',
# '446 0.472621088021 PANDAS']
df.get_lines_standard()
#[u'   A         B       C',
# u' 504  0.924385      <3',
# u' 388  0.285854    0.16',
# u' 984  0.254156    0.16',
# u' 446  0.472621  PANDAS']
# ...

df.get_lines_fast_struct()
#['   A                 B      C',
# ' 504      0.9243853594     <3',
# ' 388    0.285854082778   0.16',
# ' 984    0.254155910401   0.16',
# ' 446    0.472621088021 PANDAS']
# ...
 
df.get_lines_fast_unstruct()
#['A B C',
# '504 0.9243853594 <3',
# '388 0.285854082778 0.16',
# '984 0.254155910401 0.16',
# '446 0.472621088021 PANDAS']
# ...
deleted 13 characters in body
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line
import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line
import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs = self.to_records(index=False) # convert dataframe to array of records
        str_data = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data = [self._format_line(row) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line
added 4 characters in body
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
import pandas
np = pandas.np


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line
import random
import numpy

#SOME TEST DATA
df = DataFrame2({'A':npnumpy.random.randint(0,1000,1000), 
                 'B':npnumpy.random.random(1000), 
                 'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16']) 
                      for _ in range(1000)]})
import pandas
np = pandas.np


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line
import random

#SOME TEST DATA
df = DataFrame2({'A':np.random.randint(0,1000,1000), 
                 'B':np.random.random(1000), 
                 'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16']) 
                      for _ in range(1000)]})
import pandas


class DataFrame2(pandas.DataFrame):
    def __init__( self, *args, **kwargs ):
        pandas.DataFrame.__init__(self, *args, **kwargs)

    def get_lines_standard(self):
        """standard way to convert pandas dataframe
            to lines with fomrmatted column spacing"""
        lines = self.to_string(index=False).split('\n')
        return lines

    def get_lines_fast_unstruct(self):
        """ lighter version of pandas.DataFrame.to_string()
            with no special spacing format"""
        df_recs    = self.to_records(index=False)
        col_titles = [' '.join(list(self))]
        col_data   = map(lambda rec:' '.join( map(str,rec) ), 
                         df_recs.tolist())
        lines = col_titles + col_data
        return lines

    def get_lines_fast_struct(self,col_space=1):
        """ lighter version of pandas.DataFrame.to_string()
            with special spacing format"""
        df_recs    = self.to_records(index=False) # convert dataframe to array of records
        str_data   = map(lambda rec: map(str,rec), df_recs ) # map each element to string
        self.space = map(lambda x:len(max(x,key=len))+col_space,  # returns the max string length in each column as a list
                         zip(*str_data)) 
        
        col_titles = [self._format_line(list(self))]
        col_data   = [self._format_line(row ) for row in str_data ]
        
        lines = col_titles + col_data
        return lines

    def _format_line(self, row_vals):
        """row_vals: list of strings.
           Adds variable amount of white space to each
           list entry and returns a single string"""
        line_val_gen = ( ('{0: >%d}'%self.space[i]).format(entry) for i,entry in enumerate(row_vals) )  # takes dataframe row entries and adds white spaces based on a format
        line = ''.join(line_val_gen)
        return line
import random
import numpy

#SOME TEST DATA
df = DataFrame2({'A':numpy.random.randint(0,1000,1000), 
                 'B':numpy.random.random(1000), 
                 'C':[random.choice(['EYE', '<3', 'PANDAS', '0.16']) 
                      for _ in range(1000)]})
added 51 characters in body
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
Loading
Tweeted twitter.com/#!/StackCodeReview/status/629489535961972736
edited title
Link
dermen
  • 353
  • 2
  • 3
  • 9
Loading
added 116 characters in body
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
Loading
edited tags
Link
200_success
  • 145.7k
  • 22
  • 191
  • 481
Loading
added 4 characters in body
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
Loading
deleted 42 characters in body; edited title
Source Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238
Loading
Source Link
dermen
  • 353
  • 2
  • 3
  • 9
Loading