2
object App {
  def main(args: Array[String]) {
    val conf = new spark.SparkConf().setMaster("local[2]").setAppName("mySparkApp")
    val sc = new spark.SparkContext(conf)
    val sqlContext = new SQLContext(sc)

    val jdbcUrl = "1.2.34.567" 
    val jdbcUser = "someUser"
    val jdbcPassword = "xxxxxxxxxxxxxxxxxxxx"
    val tableName = "myTable"
    val driver = "org.postgresql.Driver"
    Class.forName(driver)

    val df = sqlContext
            .read
            .format("jdbc")
            .option("driver", driver)
            .option("url", jdbcUrl)
            .option("userName", jdbcUser)
            .option("password", jdbcPassword)
            .option("dbtable", tableName) // NullPointerException occurs here
            .load()
  }
}

I want to connect to a Postgres database on my LAN from Spark. During runtime, the following error occurs:

Exception in thread "main" java.lang.NullPointerException
    at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:71)
    at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:210)
    at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:35)
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
    at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:167)
    at <redacted>?.main(App.scala:42)
    at <redacted>.App.main(App.scala)

Is there an obvious reason why there's a nullpointer exception at the option("dbtable", tableName) line? I'm using spark-2.3.1-bin-hadoop2.7 with Scala 2.11.12. For the postgres dependency, I'm using this version:

        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <version>9.4-1200-jdbc41</version>
        </dependency>
1
  • Can I see your tableName? It should be all the query: .option("dbtable", "select * from databaseName.tableName") Commented Apr 10, 2019 at 16:21

1 Answer 1

4

The error message (which isn't very helpful for troubleshooting) is probably not against option dbtable, but option url.

It looks like your jdbcUrl is missing the URL protocol jdbc:postgresql:// as its prefix. Here's a link re: Spark's JDBC data sources.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.