0% found this document useful (0 votes)
5 views

Multiclass Classification

Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

Multiclass Classification

Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 1

from pyspark.

sql import SparkSession


from pyspark.ml.feature import StandardScaler, VectorAssembler, StringIndexer
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# Initialize SparkSession
spark = SparkSession.builder.appName("IrisClassification").getOrCreate()

# Load the Iris dataset


df = spark.read.csv("iris.csv", header=True, inferSchema=True)

# Prepare the data


feature_columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
assembler = VectorAssembler(inputCols=feature_columns, outputCol="features")
indexer = StringIndexer(inputCol="species", outputCol="label")

# Apply scaling
scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
withMean=True, withStd=True)

# Set up the classifier


lr = LogisticRegression(featuresCol="scaledFeatures", labelCol="label")

# Create a pipeline
pipeline = Pipeline(stages=[assembler, indexer, scaler, lr])

# Train the model


model = pipeline.fit(df)

# Make predictions
predictions = model.transform(df)

# Evaluate the model


evaluator = MulticlassClassificationEvaluator(labelCol="label",
predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print(f"Accuracy: {accuracy}")

You might also like