0

Sample Json

 "alternateId": [
    {
        "type": "POPID",
        "value": "1-7842-0759-001"
    },
    {
        "type": "CAMID",
        "value": "CAMID 0000-0002-7EC1-02FF-O-0000-0000-2"
    },
    {
        "type": "ProgrammeUuid",
        "value": "1ddb01e2-6146-4e10-bba9-dde40d0ad886"
    }
]

I want to update a existing dataframe with two columns, those two columns are POPID and CAMID . These two values needs to be parsed from json structure I dont know how to parse this structure , Can you help me on what do i need to change on fetchField method. As per above json POPID is placed first and CAMID is placed second, but in real jsons, it can be placed at one of those 3 places inside alternateId.

 val fetchCAMID_udf = udf(fetchCAMID _)
 val fetchPOPID_udf = udf(fetchPOPID _)

 var updatedDf = //Data frame initialize

 updatedDf = updatedDf.withColumn("CAMID", fetchCAMID_udf(col("alternate_id")))
 updatedDf = updatedDf.withColumn("POPID", fetchPOPID_udf(col("alternate_id")))
 updatedDf .show(10,false)


 def fetchCAMID(jsonStr: String): String = {
var CAMID: String = fetchField(jsonStr, "CAMID")
 CAMID
}

 def fetchPOPID(jsonStr: String): String = {
fetchField(jsonStr, "POPID")
}


 def fetchField(jsonStr: String, fieldName: String): String = {
 try {
   implicit val formats = DefaultFormats
   val extractedField = jsonStr match {
    case "(unknown)" => jsonStr
    case _ => {
      val json = JsonMethods.parse(jsonStr)
      val resultExtracted = (json \\ fieldName)
      val result = resultExtracted match {
        case _: JString => resultExtracted.extract[String]
        case _: JInt => resultExtracted.extract[Int].toString
        case _: JObject => "(unknown)"
      }
      result
    }
   }
  extractedField
 }
catch{
  case e: Exception =>{
    log.error(s"Fetch field failed. Field name: $fieldName . Json: $jsonStr")
    "(unknown)"
   }
  }
}

2 Answers 2

1

Change your fetchField function as the following

def fetchField(jsonStr: String, fieldName: String): String = {
  try {
    val typeAndValue = (JsonMethods.parse("{"+jsonStr+"}") \ "alternateId" \ "type" \\ classOf[JString]).zip(JsonMethods.parse("{"+jsonStr+"}") \ "alternateId" \ "value" \\ classOf[JString])
    typeAndValue.filter(_._1 == fieldName).map(_._2).toList(0)
  }catch{
    case e: Exception =>{
      "(unknown)"
    }
  }
}

and you get the CAMID and POPID populated

Sign up to request clarification or add additional context in comments.

Comments

0

you can read the JSON using Spark and get it using regular spark operations

val df=spark.read.option("multiLine",true).json("test.json")

 df.select($"alternateId".getItem(0).as("pop"),$"alternateId".getItem(1).as("cam")).select($"pop.value".as("POPID"),$"cam.value".as("CAMID")).show()

+---------------+--------------------+
|          POPID|               CAMID|
+---------------+--------------------+
|1-7842-0759-001|CAMID 0000-0002-7...|
+---------------+--------------------+

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.