#PySpark - Create MapType Column from existing columns
# Import the libraries SparkSession, col, lit, create_map
from pyspark.sql import SparkSession
from pyspark.sql.functions import col,lit,create_map
# Create a spark session using getOrCreate() function
spark_session = SparkSession.builder.getOrCreate()
# Define the data set
emp = [(1,"Smith",-1,"2018","10","M",3000),
(2,"Rose",1,"2010","20","M",4000),
(3,"Williams",1,"2010","10","M",1000),
(4,"Jones",2,"2005","10","F",2000),
(5,"Brown",2,"2010","40","F",4000),
(6,"Brown",2,"2010","50","M",2000) ]
# Define the schema of the data set
empColumns = ["emp_id","name","superior_emp_id",
"year_joined", "emp_dept_id",
"gender","salary"]
# Create the data frame through data set and schema
empDF = spark_session.createDataFrame(data=emp,
schema = empColumns)
# Convert name, superior_emp_id, year_joined, emp_dept_id, gender, and salary columns to maptype column
empDF = empDF.withColumn("employee_details",
create_map(lit("name"),
col("name"),
lit("superior_emp_id"),
col("superior_emp_id"),
lit("year_joined"),
col("year_joined"),
lit("emp_dept_id"),
col("emp_dept_id"),
lit("gender"),
col("gender"),
lit("salary"),
col("salary"))).drop("name",
"superior_emp_id",
"year_joined",
"emp_dept_id",
"gender",
"salary")
# Display the data frame
empDF.show(truncate=False)