# Python program to sort list using UDF in PySpark
# Import the libraries SparkSession, StructType,
# StructField, StringType, IntegerType, UDF
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, ArrayType
# Create a spark session using getOrCreate() function
spark_session = SparkSession.builder.getOrCreate()
# Define the data set
data_set = [(('Ishita', 'Rai', 'Pundir'),
'2000-21-02', 'Male', 13000),
(('Aia', 'Singh', 'Rajput'),
'2004-01-06', 'Female', 10000)]
# Define the structure for the
# data frame by adding StructType columns
schema = StructType([
StructField('Full_Name', StructType([
StructField('First_Name', StringType(), True),
StructField('Middle_Name', StringType(), True),
StructField('Last_Name', StringType(), True)
])),
StructField('Date_Of_Birth', StringType(), True),
StructField('Gender', StringType(), True),
StructField('Fees', IntegerType(), True)
])
# Create the Pyspark data frame using
# createDataFrame function
df = spark_session.createDataFrame(data=data_set,
schema=schema)
# Create a user defined function
# to sort the ArrayType column
udf_sort = udf(lambda x: sorted(x),
ArrayType(StringType()))
# Create a new column by calling
# the user defined function created
df.withColumn('Sorted_Full_Name', udf_sort(
df["Full_Name"])).show(truncate=False)