2
 object test {

 case class Caserne(x: String, y: String, Name: String, Description: String)


def main(args: Array[String]): Unit = {

val conf = new SparkConf().setAppName("BankDataAnalysis").setMaster("local[1]")
val sc = new SparkContext(conf)
val sqlContext= new SQLContext(sc)

import sqlContext.implicits._

// load caserne data
val caserneTxt = sc.parallelize(
  IOUtils.toString(
    new URL("http://donnees.ville.montreal.qc.ca/dataset/c69e78c6-e454-4bd9-9778-e4b0eaf8105b/resource/f6542ad1-31f5-458e-b33d-1a028fab3e98/download/casernessim.csv"),
    Charset.forName("utf8")).split("\n"))

val header = caserneTxt.first()
val caserne = caserneTxt.map(s => s.split(",")).filter(s => s != header).map(
  s => Caserne(s(0),
               s(1),
               s(2).replaceAll("[^\\d]", "").trim(),
               s(3).replaceAll("""<(?!\/?a(?=>|\s.*>))\/?.*?>""", " ").trim()
               )).toDF()

 caserne.registerTempTable("caserne")
 sqlContext.sql("Select * from caserne").show()

 }
}

enter image description here

I have to remove csv file header. I used filter(s => s != header) but it did'nt work. Thank you for your help

2 Answers 2

1

Try using :-

val rows = data.filter(s=> header(s,"X") != "X") 

reference :- How do I convert csv file to rdd

I found this convenient method

val header = caserneTxt.first()
val no_header = caserneTxt.filter(_(0) != header(0))
Sign up to request clarification or add additional context in comments.

1 Comment

Thank you for help, i found the easy way from reference :)
0

one way would be using the one of the header key and filter that from dataframe something like below

dataFrame.filter(row => row.getAs[String]("description") != "description").show

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.