0% found this document useful (0 votes)
2 views

File 2

The document contains multiple R programming tasks, including data analysis using built-in datasets like PlantGrowth and iris, implementing various machine learning algorithms such as Linear Regression, Support Vector Machine, Decision Tree, Naive Bayes, K-Nearest Neighbour, and K-Means Clustering. Each task includes code snippets for loading datasets, processing data, visualizing results, and calculating model accuracy. Additionally, there are tasks for printing patterns and calculating factorials.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

File 2

The document contains multiple R programming tasks, including data analysis using built-in datasets like PlantGrowth and iris, implementing various machine learning algorithms such as Linear Regression, Support Vector Machine, Decision Tree, Naive Bayes, K-Nearest Neighbour, and K-Means Clustering. Each task includes code snippets for loading datasets, processing data, visualizing results, and calculating model accuracy. Additionally, there are tasks for printing patterns and calculating factorials.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 17

17) Write an R program to print, get information, print variable values,

sort variable values and analyse data for the inbuilt dataset.

# Load the built-in dataset


data("PlantGrowth")

# 1. Print the dataset


print("Dataset - PlantGrowth:")
print(PlantGrowth)

# 2. Get basic information about the dataset


print("Structure of the dataset:")
str(PlantGrowth)

print("Summary of the dataset:")


summary(PlantGrowth)

# 3. Print variable values


print("Values of 'weight' variable:")
print(PlantGrowth$weight)

print("Values of 'group' variable:")


print(PlantGrowth$group)

# 4. Sort the 'weight' variable in ascending order


print("Sorted weights (ascending):")
print(sort(PlantGrowth$weight))

sorted_df <- PlantGrowth[order(PlantGrowth$weight), ]


print("Data frame sorted by weight:")
print(sorted_df)

# 5. Analyze the data


mean_weight <- mean(PlantGrowth$weight)
cat("Mean of weights:", mean_weight, "\n")

sd_weight <- sd(PlantGrowth$weight)


cat("Standard deviation of weights:", sd_weight, "\n")

boxplot(weight ~ group, data = PlantGrowth,


main = "Weight by Treatment Group",
xlab = "Group", ylab = "Weight",
col = c("lightblue", "lightgreen", "lightcoral"))

group_counts <- table(PlantGrowth$group)


print("Count of samples in each group:")
print(group_counts)
21) Write an R Program to find Factorial of a number.

num <- as.numeric(readline("Enter a number: "))


factorial <- 1
if (num >= 0) {
for (i in 1:num) {
factorial <- factorial * i
}
cat("Factorial of", num, "is:", factorial, "\n")
} else {
cat("Factorial is not defined for negative numbers.\n")
}

22) Write an R Program to print the following pattern


* *
** **
*** ***
*******

rows <- 4
for (i in 1:rows) {
cat(rep("*", i), sep = "")

spaces <- 2 * (rows - i)


cat(rep(" ", spaces), sep = "")
cat(rep("*", i), sep = "")
cat("\n")
}

23) Write an R Program to print the following pattern


*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("*", i), sep = "")

cat("\n")
}
24) Write an R Program to print the following pattern
*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("* ", i), sep = "")


cat("\n")
}

25) Write an R Program to print the following pattern


*
***
*****
***
*
rows <- 3
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("* ", i), sep = "")


cat("\n")
}

for (i in (rows - 1):1) {


cat(rep(" ", rows - i), sep = "")
cat(rep("* ", i), sep = "")

cat("\n")
}

26) Write an R Program to process the dataset by using its functions.


library(dplyr)
data <- iris
head(data)
sum(is.na(data))

summary_data <- data %>%


summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE),
max_Sepal.Length = max(Sepal.Length, na.rm = TRUE),
min_Sepal.Length = min(Sepal.Length, na.rm = TRUE))

species_summary <- data %>%


group_by(Species) %>%
summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE))

print(summary_data)
print(species_summary)

plot(data$Sepal.Length, data$Sepal.Width,
main = "Sepal Length vs Sepal Width",
xlab = "Sepal Length", ylab = "Sepal Width",
col = data$Species, pch = 19)
27) Write an R Program to implement Linear Regression Algorithm.

library(ggplot2)
library(readr)
library(caret)

dataset <- read_csv("preprocessed_diabetes_dataset.csv")


head(dataset)

model <- lm(Glucose ~ BMI, data = dataset)

summary(model)

dataset$Predicted_Glucose <- predict(model, newdata = dataset)

plot <- ggplot(dataset, aes(x = BMI, y = Glucose)) +


geom_point(color = "blue", alpha = 0.5) + # Scatter plot of actual
values

geom_line(aes(y = Predicted_Glucose), color = "red", size = 1) + #


Regression line

labs(title = "Linear Regression: Glucose vs BMI",


x = "BMI",
y = "Glucose") +
theme_minimal()

print(plot)
28) Write an R program to implement a Support Vector Machine
Algorithm.

if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}
data(iris)
set.seed(123)

sample_index <- sample(1:nrow(iris), 0.7 * nrow(iris))


train_data <- iris[sample_index, ]
test_data <- iris[-sample_index, ]

svm_model <- svm(Species ~ ., data = train_data, kernel = "linear")


predictions <- predict(svm_model, test_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$Species)


print(conf_matrix)
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
29) Write an R program to implement a Decision Tree Algorithm.

# Load necessary libraries


if (!require(rpart)) install.packages("rpart")
if (!require(rpart.plot)) install.packages("rpart.plot")
library(rpart)
library(rpart.plot)

# Load dataset
diabetes <- read.csv("diabetes_data.csv")

# Set seed
set.seed(123)

# Split data into training (70%) and testing (30%)


sample_index <- sample(1:nrow(diabetes), 0.7 * nrow(diabetes))
train_data <- diabetes[sample_index, ]
test_data <- diabetes[-sample_index, ]

# Build decision tree model


tree_model <- rpart(Outcome ~ ., data = train_data, method = "class")

# Plot the tree


rpart.plot(tree_model, main = "Decision Tree for Diabetes Prediction")

# Make predictions
predictions <- predict(tree_model, test_data, type = "class")
# Confusion matrix
conf_matrix <- table(Predicted = predictions, Actual =
test_data$Outcome)
print(conf_matrix)

# Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
30) Write an R program to implement a Naive Bayes Algorithm.
if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}

data(mtcars)
mtcars$am <- as.factor(mtcars$am)
set.seed(123)

sample_index <- sample(1:nrow(mtcars), 0.7 * nrow(mtcars))


train_data <- mtcars[sample_index, ]
test_data <- mtcars[-sample_index, ]

nb_model <- naiveBayes(am ~ ., data = train_data)


predictions <- predict(nb_model, test_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$am)


print(conf_matrix)

accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)


cat("Accuracy:", round(accuracy * 100, 2), "%\n")
31) Write an R program to implement a K-Nearest Neighbour
Algorithm.

# Install and load required libraries


if (!require(class)) {
install.packages("class")
library(class)
}

# Load the dataset


wine_data <- read.csv("redwinequality.csv")

# View the first few rows of the dataset


head(wine_data)

# Convert 'quality' to a factor (since it's the target variable)


wine_data$quality <- as.factor(wine_data$quality)

# Set seed for reproducibility


set.seed(123)

# Split the dataset into training (70%) and testing (30%) sets
sample_index <- sample(1:nrow(wine_data), 0.7 * nrow(wine_data))
train_data <- wine_data[sample_index, ]
test_data <- wine_data[-sample_index, ]

# Separate features and target variable for both train and test sets
train_x <- train_data[, -ncol(wine_data)] # All columns except 'quality'
train_y <- train_data$quality # Target variable (quality)
test_x <- test_data[, -ncol(wine_data)] # All columns except 'quality'
test_y <- test_data$quality # Target variable (quality)

# Train the KNN model (k = 3)


k_value <- 3
knn_predictions <- knn(train_x, test_x, train_y, k = k_value)

# Confusion matrix
conf_matrix <- table(Predicted = knn_predictions, Actual = test_y)
print(conf_matrix)

# Calculate accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
32) Write an R program to implement a K-Means Clustering
Algorithm.

data("USArrests")
head(USArrests)
USArrests <- na.omit(USArrests)

scaled_data <- scale(USArrests)

set.seed(123)

kmeans_result <- kmeans(scaled_data, centers = 3, nstart = 25)

print(kmeans_result)

USArrests$Cluster <- as.factor(kmeans_result$cluster)

if (!require(ggplot2)) {
install.packages("ggplot2")
library(ggplot2)
}

pca_result <- prcomp(scaled_data)

pca_data <- data.frame(pca_result$x[, 1:2], Cluster = USArrests$Cluster)

ggplot(pca_data, aes(x = PC1, y = PC2, color = Cluster)) +


geom_point(size = 3) +
labs(title = "K-means Clustering on USArrests Dataset", x = "Principal
Component 1", y = "Principal Component 2") +
theme_minimal()

You might also like