I have several columns to be extracted from json string. However one field has nested values. Not sure how to deal with that?
Need to explode into multiple rows to get values of field name, Value1, Value2.
import spark.implicits._
val df = Seq(
("1", """{"k": "foo", "v": 1.0}""", "some_other_field_1"),
("2", """{"p": "bar", "q": 3.0}""", "some_other_field_2"),
("3",
"""{"nestedKey":[ {"field name":"name1","Value1":false,"Value2":true},
| {"field name":"name2","Value1":"100","Value2":"200"}
|]}""".stripMargin, "some_other_field_3")
).toDF("id","json","other")
df.show(truncate = false)
val df1= df.withColumn("id1",col("id"))
.withColumn("other1",col("other"))
.withColumn("k",get_json_object(col("json"),"$.k"))
.withColumn("v",get_json_object(col("json"),"$.v"))
.withColumn("p",get_json_object(col("json"),"$.p"))
.withColumn("q",get_json_object(col("json"),"$.q"))
.withColumn("nestedKey",get_json_object(col("json"),"$.nestedKey"))
.select("id1","other1","k","v","p","q","nestedKey")
df1.show(truncate = false)