Skip to main content
added `df_name` argument
Source Link
def has_columns(df, columns):
    """
    Checks whether all `columns` are in `df`
    
    RetrunsRetuns a boolean result and the missing columns
    """
    if isinstance(columns, str):
        # to prevent the later `set` command from mangling our string
        columns = {columns}
    return set(df) >= set(columns), set(columns) - set(df)
from functools import wraps
def has_columns_decorator(columns, df_name="df"):
    """expects"""
    Checks for presence of `columns` in an argument to the decorated function
    
    Expects a function with argumenta `df`DataFrame as firstkeyword argument `df_name` 
    or keywordas first argument
    checks
    Checks whether all `columns` are columns in `dfthe `"""DataFrame
    Raises a ValueError if the check fails
    """
    def decorate(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            if 'df'df_name in kwargs:
                df = kwargs.pop("df"df_name)
            else:
                df, *args = args
            check_result, missing_columns = has_columns(df, columns)
            if not check_result:
                raise ValueError(
                    f"Not all columns are present: {missing_columns}"
                )
            
            result = func(*args, df=df, **kwargs)
            return result
        return wrapper
    return decorate
def has_columns(df, columns):
    """
    Checks whether all `columns` are in `df`
    
    Retruns a boolean result and the missing columns
    """
    if isinstance(columns, str):
        # to prevent the later `set` command from mangling our string
        columns = {columns}
    return set(df) >= set(columns), set(columns) - set(df)
from functools import wraps
def has_columns_decorator(columns):
    """expects a function with argument `df` as first or keyword argument
    checks whether all `columns` are columns in `df `"""
    def decorate(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            if 'df' in kwargs:
                df = kwargs.pop("df")
            else:
                df, *args = args
            check_result, missing_columns = has_columns(df, columns)
            if not check_result:
                raise ValueError(
                    f"Not all columns are present: {missing_columns}"
                )
            
            result = func(*args, df=df, **kwargs)
            return result
        return wrapper
    return decorate
def has_columns(df, columns):
    """
    Checks whether all `columns` are in `df`
    
    Retuns a boolean result and the missing columns
    """
    if isinstance(columns, str):
        # to prevent the later `set` command from mangling our string
        columns = {columns}
    return set(df) >= set(columns), set(columns) - set(df)
from functools import wraps
def has_columns_decorator(columns, df_name="df"):
    """
    Checks for presence of `columns` in an argument to the decorated function
    
    Expects a function with a DataFrame as keyword argument `df_name` 
    or as first argument
    
    Checks whether all `columns` are columns in the DataFrame
    Raises a ValueError if the check fails
    """
    def decorate(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            if df_name in kwargs:
                df = kwargs.pop(df_name)
            else:
                df, *args = args
            check_result, missing_columns = has_columns(df, columns)
            if not check_result:
                raise ValueError(
                    f"Not all columns are present: {missing_columns}"
                )

            result = func(*args, df=df, **kwargs)
            return result
        return wrapper
    return decorate
Source Link

You can write a function that validates the data, and decorate the functions you want.

If you can express your test as a python function:

def has_columns(df, columns):
    """
    Checks whether all `columns` are in `df`
    
    Retruns a boolean result and the missing columns
    """
    if isinstance(columns, str):
        # to prevent the later `set` command from mangling our string
        columns = {columns}
    return set(df) >= set(columns), set(columns) - set(df)

Then you can make a decorator:

from functools import wraps
def has_columns_decorator(columns):
    """expects a function with argument `df` as first or keyword argument
    checks whether all `columns` are columns in `df `"""
    def decorate(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            if 'df' in kwargs:
                df = kwargs.pop("df")
            else:
                df, *args = args
            check_result, missing_columns = has_columns(df, columns)
            if not check_result:
                raise ValueError(
                    f"Not all columns are present: {missing_columns}"
                )
            
            result = func(*args, df=df, **kwargs)
            return result
        return wrapper
    return decorate

You can make this validation as complex as you like, checking dtypes, whether it is larger than 0, etc

You can use it like this:

@has_columns_decorator("a")
def my_func(df):
    return df

my_func(df)
      a   b
0     0   a
1     1   b
2     2   c
​
@has_columns_decorator(["a", "c"])
def my_func2(df):
    return df

my_func2(df)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-53-997c72a8b535> in <module>
----> 1 my_func2(df)

<ipython-input-50-36b8ff709aa9> in wrapper(*args, **kwargs)
     28             if not check_result:
     29                 raise ValueError(
---> 30                     f"Not all columns are present: {missing_columns}"
     31                 )
     32 

ValueError: Not all columns are present: {'c'}

You can make your checks as elaborate as you want. engarde is a package that already has some checks for you