You can flatten the (probably nested) columns in the dataframe and then check if each element of the flattened structure is null:
from pyspark.sql import functions as F
from pyspark.sql import types as T
from functools import reduce
#create some testdata with nested structures
data = ['{"active":null,"id":null,"uuid":null}',
'{"active":1,"id":2,"uuid":3}',
'{"active":4,"id":5,"uuid":6, "x": {"y":7, "z":8, "a":{"b":9,"c":10}}}',
'{"active":4,"id":5,"uuid":6, "x": {"y":7, "z":8, "a":{"b":9,"c":null}}}']
df = spark.read.json(spark.sparkContext.parallelize(data))
def flatten(field, prefix=""):
if isinstance(field, T.StructType):
for f in field:
yield from flatten(f, prefix)
elif isinstance(field, T.StructField) and isinstance(field.dataType, T.StructType):
for f in field.dataType:
yield from flatten(f, prefix + field.name + ".")
else:
yield prefix + field.name
cols_are_null = [F.col(c).isNull() for c in flatten(df.schema)]
all_cols_are_null = reduce(lambda l,r: l & r, cols_are_null)
df.withColumn("all_cols_are_null", all_cols_are_null).show()
#+------+----+----+-----------------+-----------------+
#|active| id|uuid| x|all_cols_are_null|
#+------+----+----+-----------------+-----------------+
#| null|null|null| null| true|
#| 1| 2| 3| null| false|
#| 4| 5| 6| {{9, 10}, 7, 8}| false|
#| 4| 5| 6|{{9, null}, 7, 8}| false|
#+------+----+----+-----------------+-----------------+