Sample Json
"alternateId": [
{
"type": "POPID",
"value": "1-7842-0759-001"
},
{
"type": "CAMID",
"value": "CAMID 0000-0002-7EC1-02FF-O-0000-0000-2"
},
{
"type": "ProgrammeUuid",
"value": "1ddb01e2-6146-4e10-bba9-dde40d0ad886"
}
]
I want to update a existing dataframe with two columns, those two columns are POPID and CAMID . These two values needs to be parsed from json structure I dont know how to parse this structure , Can you help me on what do i need to change on fetchField method. As per above json POPID is placed first and CAMID is placed second, but in real jsons, it can be placed at one of those 3 places inside alternateId.
val fetchCAMID_udf = udf(fetchCAMID _)
val fetchPOPID_udf = udf(fetchPOPID _)
var updatedDf = //Data frame initialize
updatedDf = updatedDf.withColumn("CAMID", fetchCAMID_udf(col("alternate_id")))
updatedDf = updatedDf.withColumn("POPID", fetchPOPID_udf(col("alternate_id")))
updatedDf .show(10,false)
def fetchCAMID(jsonStr: String): String = {
var CAMID: String = fetchField(jsonStr, "CAMID")
CAMID
}
def fetchPOPID(jsonStr: String): String = {
fetchField(jsonStr, "POPID")
}
def fetchField(jsonStr: String, fieldName: String): String = {
try {
implicit val formats = DefaultFormats
val extractedField = jsonStr match {
case "(unknown)" => jsonStr
case _ => {
val json = JsonMethods.parse(jsonStr)
val resultExtracted = (json \\ fieldName)
val result = resultExtracted match {
case _: JString => resultExtracted.extract[String]
case _: JInt => resultExtracted.extract[Int].toString
case _: JObject => "(unknown)"
}
result
}
}
extractedField
}
catch{
case e: Exception =>{
log.error(s"Fetch field failed. Field name: $fieldName . Json: $jsonStr")
"(unknown)"
}
}
}