You can write a function that validates the data, and decorate the functions you want.
If you can express your test as a python function:
def has_columns(df, columns):
"""
Checks whether all `columns` are in `df`
Retruns a boolean result and the missing columns
"""
if isinstance(columns, str):
# to prevent the later `set` command from mangling our string
columns = {columns}
return set(df) >= set(columns), set(columns) - set(df)
Then you can make a decorator:
from functools import wraps
def has_columns_decorator(columns):
"""expects a function with argument `df` as first or keyword argument
checks whether all `columns` are columns in `df `"""
def decorate(func):
@wraps(func)
def wrapper(*args, **kwargs):
if 'df' in kwargs:
df = kwargs.pop("df")
else:
df, *args = args
check_result, missing_columns = has_columns(df, columns)
if not check_result:
raise ValueError(
f"Not all columns are present: {missing_columns}"
)
result = func(*args, df=df, **kwargs)
return result
return wrapper
return decorate
You can make this validation as complex as you like, checking dtypes, whether it is larger than 0, etc
You can use it like this:
@has_columns_decorator("a")
def my_func(df):
return df
my_func(df)
a b
0 0 a
1 1 b
2 2 c
@has_columns_decorator(["a", "c"])
def my_func2(df):
return df
my_func2(df)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-53-997c72a8b535> in <module>
----> 1 my_func2(df)
<ipython-input-50-36b8ff709aa9> in wrapper(*args, **kwargs)
28 if not check_result:
29 raise ValueError(
---> 30 f"Not all columns are present: {missing_columns}"
31 )
32
ValueError: Not all columns are present: {'c'}
You can make your checks as elaborate as you want. engarde is a package that already has some checks for you