0% found this document useful (0 votes)

2 views

File 2

The document contains multiple R programming tasks, including data analysis using built-in datasets like PlantGrowth and iris, implementing various machine learning algorithms such as Linear Regression, Support Vector Machine, Decision Tree, Naive Bayes, K-Nearest Neighbour, and K-Means Clustering. Each task includes code snippets for loading datasets, processing data, visualizing results, and calculating model accuracy. Additionally, there are tasks for printing patterns and calculating factorials.

Uploaded by

harshilsonibusiness

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

2 views

File 2

Uploaded by

harshilsonibusiness

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 17

17) Write an R program to print, get information, print variable values,

sort variable values and analyse data for the inbuilt dataset.

# Load the built-in dataset

data("PlantGrowth")

# 1. Print the dataset

print("Dataset - PlantGrowth:")
print(PlantGrowth)

# 2. Get basic information about the dataset

print("Structure of the dataset:")
str(PlantGrowth)

print("Summary of the dataset:")

summary(PlantGrowth)

# 3. Print variable values

print("Values of 'weight' variable:")
print(PlantGrowth$weight)

print("Values of 'group' variable:")

print(PlantGrowth$group)

# 4. Sort the 'weight' variable in ascending order

print("Sorted weights (ascending):")
print(sort(PlantGrowth$weight))

sorted_df <- PlantGrowth[order(PlantGrowth$weight), ]

print("Data frame sorted by weight:")
print(sorted_df)

# 5. Analyze the data

mean_weight <- mean(PlantGrowth$weight)
cat("Mean of weights:", mean_weight, "\n")

sd_weight <- sd(PlantGrowth$weight)

cat("Standard deviation of weights:", sd_weight, "\n")

boxplot(weight ~ group, data = PlantGrowth,

main = "Weight by Treatment Group",
xlab = "Group", ylab = "Weight",
col = c("lightblue", "lightgreen", "lightcoral"))

group_counts <- table(PlantGrowth$group)

print("Count of samples in each group:")
print(group_counts)
21) Write an R Program to find Factorial of a number.

num <- as.numeric(readline("Enter a number: "))

factorial <- 1
if (num >= 0) {
for (i in 1:num) {
factorial <- factorial * i
}
cat("Factorial of", num, "is:", factorial, "\n")
} else {
cat("Factorial is not defined for negative numbers.\n")
}

22) Write an R Program to print the following pattern

* *
** **
*** ***
*******

rows <- 4
for (i in 1:rows) {
cat(rep("*", i), sep = "")

spaces <- 2 * (rows - i)

cat(rep(" ", spaces), sep = "")
cat(rep("*", i), sep = "")
cat("\n")
}

23) Write an R Program to print the following pattern

*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("*", i), sep = "")

cat("\n")
}
24) Write an R Program to print the following pattern
*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("* ", i), sep = "")

cat("\n")
}

25) Write an R Program to print the following pattern

*
***
*****
***
*
rows <- 3
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("* ", i), sep = "")

cat("\n")
}

for (i in (rows - 1):1) {

cat(rep(" ", rows - i), sep = "")
cat(rep("* ", i), sep = "")

cat("\n")
}

26) Write an R Program to process the dataset by using its functions.

library(dplyr)
data <- iris
head(data)
sum(is.na(data))

summary_data <- data %>%

summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE),
max_Sepal.Length = max(Sepal.Length, na.rm = TRUE),
min_Sepal.Length = min(Sepal.Length, na.rm = TRUE))

species_summary <- data %>%

group_by(Species) %>%
summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE))

print(summary_data)
print(species_summary)

plot(data$Sepal.Length, data$Sepal.Width,
main = "Sepal Length vs Sepal Width",
xlab = "Sepal Length", ylab = "Sepal Width",
col = data$Species, pch = 19)
27) Write an R Program to implement Linear Regression Algorithm.

library(ggplot2)
library(readr)
library(caret)

dataset <- read_csv("preprocessed_diabetes_dataset.csv")

head(dataset)

model <- lm(Glucose ~ BMI, data = dataset)

summary(model)

dataset$Predicted_Glucose <- predict(model, newdata = dataset)

plot <- ggplot(dataset, aes(x = BMI, y = Glucose)) +

geom_point(color = "blue", alpha = 0.5) + # Scatter plot of actual
values

geom_line(aes(y = Predicted_Glucose), color = "red", size = 1) + #

Regression line

labs(title = "Linear Regression: Glucose vs BMI",

x = "BMI",
y = "Glucose") +
theme_minimal()

print(plot)
28) Write an R program to implement a Support Vector Machine
Algorithm.

if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}
data(iris)
set.seed(123)

sample_index <- sample(1:nrow(iris), 0.7 * nrow(iris))

train_data <- iris[sample_index, ]
test_data <- iris[-sample_index, ]

svm_model <- svm(Species ~ ., data = train_data, kernel = "linear")

predictions <- predict(svm_model, test_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$Species)

print(conf_matrix)
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
29) Write an R program to implement a Decision Tree Algorithm.

# Load necessary libraries

if (!require(rpart)) install.packages("rpart")
if (!require(rpart.plot)) install.packages("rpart.plot")
library(rpart)
library(rpart.plot)

# Load dataset
diabetes <- read.csv("diabetes_data.csv")

# Set seed
set.seed(123)

# Split data into training (70%) and testing (30%)

sample_index <- sample(1:nrow(diabetes), 0.7 * nrow(diabetes))
train_data <- diabetes[sample_index, ]
test_data <- diabetes[-sample_index, ]

# Build decision tree model

tree_model <- rpart(Outcome ~ ., data = train_data, method = "class")

# Plot the tree

rpart.plot(tree_model, main = "Decision Tree for Diabetes Prediction")

# Make predictions
predictions <- predict(tree_model, test_data, type = "class")
# Confusion matrix
conf_matrix <- table(Predicted = predictions, Actual =
test_data$Outcome)
print(conf_matrix)

# Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
30) Write an R program to implement a Naive Bayes Algorithm.
if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}

data(mtcars)
mtcars$am <- as.factor(mtcars$am)
set.seed(123)

sample_index <- sample(1:nrow(mtcars), 0.7 * nrow(mtcars))

train_data <- mtcars[sample_index, ]
test_data <- mtcars[-sample_index, ]

nb_model <- naiveBayes(am ~ ., data = train_data)

predictions <- predict(nb_model, test_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$am)

print(conf_matrix)

accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

cat("Accuracy:", round(accuracy * 100, 2), "%\n")
31) Write an R program to implement a K-Nearest Neighbour
Algorithm.

# Install and load required libraries

if (!require(class)) {
install.packages("class")
library(class)
}

# Load the dataset

wine_data <- read.csv("redwinequality.csv")

# View the first few rows of the dataset

head(wine_data)

# Convert 'quality' to a factor (since it's the target variable)

wine_data$quality <- as.factor(wine_data$quality)

# Set seed for reproducibility

set.seed(123)

# Split the dataset into training (70%) and testing (30%) sets
sample_index <- sample(1:nrow(wine_data), 0.7 * nrow(wine_data))
train_data <- wine_data[sample_index, ]
test_data <- wine_data[-sample_index, ]

# Separate features and target variable for both train and test sets
train_x <- train_data[, -ncol(wine_data)] # All columns except 'quality'
train_y <- train_data$quality # Target variable (quality)
test_x <- test_data[, -ncol(wine_data)] # All columns except 'quality'
test_y <- test_data$quality # Target variable (quality)

# Train the KNN model (k = 3)

k_value <- 3
knn_predictions <- knn(train_x, test_x, train_y, k = k_value)

# Confusion matrix
conf_matrix <- table(Predicted = knn_predictions, Actual = test_y)
print(conf_matrix)

# Calculate accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
32) Write an R program to implement a K-Means Clustering
Algorithm.

data("USArrests")
head(USArrests)
USArrests <- na.omit(USArrests)

scaled_data <- scale(USArrests)

set.seed(123)

kmeans_result <- kmeans(scaled_data, centers = 3, nstart = 25)

print(kmeans_result)

USArrests$Cluster <- as.factor(kmeans_result$cluster)

if (!require(ggplot2)) {
install.packages("ggplot2")
library(ggplot2)
}

pca_result <- prcomp(scaled_data)

pca_data <- data.frame(pca_result$x[, 1:2], Cluster = USArrests$Cluster)

ggplot(pca_data, aes(x = PC1, y = PC2, color = Cluster)) +

geom_point(size = 3) +
labs(title = "K-means Clustering on USArrests Dataset", x = "Principal
Component 1", y = "Principal Component 2") +
theme_minimal()

File 2
No ratings yet
File 2
17 pages
M Bda
No ratings yet
M Bda
19 pages
BDA Lab Manual (12 Weeks)
No ratings yet
BDA Lab Manual (12 Weeks)
22 pages
WEEK
No ratings yet
WEEK
17 pages
R Lab Program
No ratings yet
R Lab Program
20 pages
R lab Manual
No ratings yet
R lab Manual
22 pages
Datamining Lab Record
No ratings yet
Datamining Lab Record
36 pages
R- language
No ratings yet
R- language
23 pages
R_language Lab Manual_pg 2024
No ratings yet
R_language Lab Manual_pg 2024
29 pages
R Lab Manual (1)_merged
No ratings yet
R Lab Manual (1)_merged
25 pages
R Program 2025,-1
No ratings yet
R Program 2025,-1
11 pages
DATAMINING
No ratings yet
DATAMINING
24 pages
r file code
No ratings yet
r file code
16 pages
Data_scinece_practical_file
No ratings yet
Data_scinece_practical_file
23 pages
Dav Pracs
No ratings yet
Dav Pracs
9 pages
1
No ratings yet
1
19 pages
Session Set Working Directory Choose Directlry
No ratings yet
Session Set Working Directory Choose Directlry
17 pages
Final Data Lab
No ratings yet
Final Data Lab
21 pages
R Examples
No ratings yet
R Examples
56 pages
DM Slip Solutions
100% (1)
DM Slip Solutions
24 pages
shahun term workR1
No ratings yet
shahun term workR1
34 pages
Worksheet Classification2
No ratings yet
Worksheet Classification2
14 pages
R Lab File Deepak
No ratings yet
R Lab File Deepak
27 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
Aman DA 111
No ratings yet
Aman DA 111
14 pages
DA all (1)
No ratings yet
DA all (1)
15 pages
Rstudio Study Notes For PA 20181126
No ratings yet
Rstudio Study Notes For PA 20181126
6 pages
datamining
No ratings yet
datamining
20 pages
7708 - MBA PredAnanBigDataNov21
No ratings yet
7708 - MBA PredAnanBigDataNov21
11 pages
Datamining 2
No ratings yet
Datamining 2
54 pages
Toc ch1
No ratings yet
Toc ch1
9 pages
Final Cost Practical
No ratings yet
Final Cost Practical
29 pages
BAN5
No ratings yet
BAN5
2 pages
7406HW02-1
No ratings yet
7406HW02-1
3 pages
DM lab
No ratings yet
DM lab
18 pages
R Course
No ratings yet
R Course
7 pages
Record
No ratings yet
Record
23 pages
data science
No ratings yet
data science
15 pages
R Programs
No ratings yet
R Programs
12 pages
Lab file AD pdf
No ratings yet
Lab file AD pdf
25 pages
DA_Lab_Week-1
No ratings yet
DA_Lab_Week-1
7 pages
MY R
No ratings yet
MY R
11 pages
Final Practical
No ratings yet
Final Practical
53 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
R Programming Practical File
No ratings yet
R Programming Practical File
38 pages
bd
No ratings yet
bd
12 pages
R Practicals
No ratings yet
R Practicals
32 pages
Da Exp9,10
No ratings yet
Da Exp9,10
9 pages
bi 5to 8
No ratings yet
bi 5to 8
6 pages
R Lab
No ratings yet
R Lab
15 pages
to edit data science
No ratings yet
to edit data science
18 pages
R Programing Bhagu
No ratings yet
R Programing Bhagu
40 pages
da lab file 2
No ratings yet
da lab file 2
13 pages
STAT-2450 Assignment 1: Name:, Student ID: B00
No ratings yet
STAT-2450 Assignment 1: Name:, Student ID: B00
9 pages
r-cheatsheet-ABCD
No ratings yet
r-cheatsheet-ABCD
3 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
ML
No ratings yet
ML
11 pages
saurabh
No ratings yet
saurabh
22 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Model Question Paper 2 21matcs41 - Cse Allied Branches
No ratings yet
Model Question Paper 2 21matcs41 - Cse Allied Branches
4 pages
9 AIML Question bank updated 5 units
No ratings yet
9 AIML Question bank updated 5 units
21 pages
Powerpoint Normality 30042019
No ratings yet
Powerpoint Normality 30042019
20 pages
Table Analysis
No ratings yet
Table Analysis
22 pages
Ee 451 Homework 7 Spring 2016
No ratings yet
Ee 451 Homework 7 Spring 2016
2 pages
EM 7 - EDA - Problem Set 2
No ratings yet
EM 7 - EDA - Problem Set 2
2 pages
Exam in Statistical Machine Learning Statistisk Maskininlärning (1RT700)
No ratings yet
Exam in Statistical Machine Learning Statistisk Maskininlärning (1RT700)
13 pages
MS Excel in The Manual Analysis of A 2k Factorial Design of Experiments Montgomery 8th Ed Solution To 6.7
No ratings yet
MS Excel in The Manual Analysis of A 2k Factorial Design of Experiments Montgomery 8th Ed Solution To 6.7
4 pages
Complex Random Variable 1
No ratings yet
Complex Random Variable 1
6 pages
Simple Linear Regression 2023
No ratings yet
Simple Linear Regression 2023
33 pages
Analisa Univariat: Tingkat Pengetahuan
No ratings yet
Analisa Univariat: Tingkat Pengetahuan
3 pages
Forecasting Nifty Bank Sectors Stock Price Using Arima Model
No ratings yet
Forecasting Nifty Bank Sectors Stock Price Using Arima Model
6 pages
LP Chapter 4 Lesson 3 Confidence Interval Estimate of The Population Mean
No ratings yet
LP Chapter 4 Lesson 3 Confidence Interval Estimate of The Population Mean
3 pages
PAFAS
No ratings yet
PAFAS
15 pages
Psychological Assessment Outline Summary
No ratings yet
Psychological Assessment Outline Summary
9 pages
Motion Detection and
No ratings yet
Motion Detection and
17 pages
Individual Assignment (MBA, 2012)
No ratings yet
Individual Assignment (MBA, 2012)
1 page
JUN 2020 S1 QP edx ial s1
No ratings yet
JUN 2020 S1 QP edx ial s1
24 pages
Estimator & Types of Estimators
No ratings yet
Estimator & Types of Estimators
30 pages
588-Article Text-2174-1-10-20201217 - Anna Maria Vasile
No ratings yet
588-Article Text-2174-1-10-20201217 - Anna Maria Vasile
15 pages
Hull OFOD11 e Solutions CH 31
No ratings yet
Hull OFOD11 e Solutions CH 31
7 pages
Regression Quiz
No ratings yet
Regression Quiz
2 pages
Chi Squared
No ratings yet
Chi Squared
2 pages
Continuous Random Variable Can Take Any One of An Unlimited Number
No ratings yet
Continuous Random Variable Can Take Any One of An Unlimited Number
73 pages
1694845437012_15EMAB203_988_15EMAB203_988_23kle632
No ratings yet
1694845437012_15EMAB203_988_15EMAB203_988_23kle632
2 pages
SBST3203 Elementary Data Analysis MAY 2020: Name: Arif Soebah Id No: 830811125679001 Phone Number: 013-8880791 Email
No ratings yet
SBST3203 Elementary Data Analysis MAY 2020: Name: Arif Soebah Id No: 830811125679001 Phone Number: 013-8880791 Email
9 pages
Powerpoint 3 (Confidence Intervals) 2425
No ratings yet
Powerpoint 3 (Confidence Intervals) 2425
50 pages
Sample 7620
No ratings yet
Sample 7620
11 pages
RM - Binary Logistic Regression Model - Estimation
No ratings yet
RM - Binary Logistic Regression Model - Estimation
19 pages
Chapter 3 Random Variables and Probability Distributions
No ratings yet
Chapter 3 Random Variables and Probability Distributions
20 pages

File 2

Uploaded by

File 2

Uploaded by

17) Write an R program to print, get information, print variable values,

# Load the built-in dataset

# 1. Print the dataset

# 2. Get basic information about the dataset

print("Summary of the dataset:")

# 3. Print variable values

print("Values of 'group' variable:")

# 4. Sort the 'weight' variable in ascending order

sorted_df <- PlantGrowth[order(PlantGrowth$weight), ]

# 5. Analyze the data

sd_weight <- sd(PlantGrowth$weight)

boxplot(weight ~ group, data = PlantGrowth,

group_counts <- table(PlantGrowth$group)

num <- as.numeric(readline("Enter a number: "))

22) Write an R Program to print the following pattern

spaces <- 2 * (rows - i)

23) Write an R Program to print the following pattern

cat(rep("*", i), sep = "")

cat(rep("* ", i), sep = "")

25) Write an R Program to print the following pattern

cat(rep("* ", i), sep = "")

for (i in (rows - 1):1) {

26) Write an R Program to process the dataset by using its functions.

summary_data <- data %>%

species_summary <- data %>%

dataset <- read_csv("preprocessed_diabetes_dataset.csv")

model <- lm(Glucose ~ BMI, data = dataset)

dataset$Predicted_Glucose <- predict(model, newdata = dataset)

plot <- ggplot(dataset, aes(x = BMI, y = Glucose)) +

geom_line(aes(y = Predicted_Glucose), color = "red", size = 1) + #

labs(title = "Linear Regression: Glucose vs BMI",

sample_index <- sample(1:nrow(iris), 0.7 * nrow(iris))

svm_model <- svm(Species ~ ., data = train_data, kernel = "linear")

conf_matrix <- table(Predicted = predictions, Actual = test_data$Species)

# Load necessary libraries

# Split data into training (70%) and testing (30%)

# Build decision tree model

# Plot the tree

sample_index <- sample(1:nrow(mtcars), 0.7 * nrow(mtcars))

nb_model <- naiveBayes(am ~ ., data = train_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$am)

accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)

# Install and load required libraries

# Load the dataset

# View the first few rows of the dataset

# Convert 'quality' to a factor (since it's the target variable)

# Set seed for reproducibility

# Train the KNN model (k = 3)

scaled_data <- scale(USArrests)

kmeans_result <- kmeans(scaled_data, centers = 3, nstart = 25)

USArrests$Cluster <- as.factor(kmeans_result$cluster)

pca_result <- prcomp(scaled_data)

pca_data <- data.frame(pca_result$x[, 1:2], Cluster = USArrests$Cluster)

ggplot(pca_data, aes(x = PC1, y = PC2, color = Cluster)) +

You might also like