while performing simple join on 2 data frame, pyspark returns no output data
from pyspark.sql import *
import pyspark.sql.functions as F
from pyspark.sql.functions import col
spark = SparkSession.builder.master("local").appName("test").getOrCreate()
file_path="C:\\bigdata\\pipesep_data\\Sales_ny.csv"
df=spark.read.format("csv").option('header','True').option('inferSchema', 'True').option("delimiter", '|').load(file_path)
addData=[(1,"1523 Main St","SFO","CA"),
(2,"3453 Orange St","SFO","NY"),
(3,"34 Warner St","Jersey","NJ"),
(4,"221 Cavalier St","Newark","DE"),
(5,"789 Walnut St","Sandiago","CA")
]
addColumns = ["emp_id","addline1","city","State"]
addDF = spark.createDataFrame(addData,addColumns)
addDF.show()
df.join(addDF,df["State"] == addDF["State"]).show()
Output: No data in output, only columns are joined I also tried with left,right,fullouter etc..


