R PROGRAMING
1.#write a R program to use if else function on vectors
cat("U15IG21S0401\n")
cat("enter the no of elements")
n=[Link](readLines("stdin",n=1))
cat("Enter the elements\n")
x=[Link](readLines("stdin",n=n))
y=ifelse(x%%2==0,paste(x," is even no\n"),paste(x," is odd no\n"))
cat(y)
output:
C:\R>Rscript lab1.R
U15IG21S0401
enter the no of elements5
Enter the elements
10
33
12
40
77
10 is even no
33 is odd no
12 is even no
40 is even no
77 is odd no
2.#write a R program to illustrate the for loop with stop on condition, to print
error message
cat("U15IG21S0401\n")
cat("Enter the no of words")
n=[Link](readLines("stdin",n=1))
cat("Enter the words one by one\n")
words=readLines("stdin",n=n)
for(w in words){
if(w=="stop"){
stop("The program has been stoped due to word present as stop")
cat("word=",w,"length=",nchar(w),"\n")
}
Output:
c:\R>Rscript lab2.R
U15IG21S0401
Enter the no of words5
Enter the words one by one
hi
hello
welcome
stop
world
word= hi length= 2
word= hello length= 5
word= welcome length= 7
Error: The program has been stoped due to word present as stop
Execution halted
3.#write a R program to find a factorial of a number using recursion
cat("U15IG21S0401\n")
fact=function(n){
if(n==0){
return(1)
}else{
return(n*fact(n-1))
cat("Enter the no")
n=[Link](readLines("stdin",n=1))
result=fact(n)
cat("The factorial of",n,"=",result)
output:
c:\R>Rscript lab3.R
U15IG21S0401
Enter the no5
The factorial of 5 = 120
4.#write an R program to implement T-test for Anova
employee_id department salary
1 Sales 55000
2 Marketing 62000
3 Engineering 70000
4 Sales 48000
5 Marketing 59000
6 Engineering 81000
7 Sales 52000
8 Marketing 65000
9 Engineering 75000
# Load necessary libraries
cat("U15IG21S0401\n")
library(tidyverse)
library(broom)
# Load the employee dataset (assuming '[Link]' is the file name)
employee_data <- [Link]("[Link]")
# Check the structure of the dataset
str(employee_data)
# Assuming 'salary' is the variable you want to analyze
# and 'department' is the grouping variable
# Perform one-way ANOVA
anova_result <- aov(salary ~ department, data = employee_data)
# Print ANOVA summary
summary(anova_result)
# Perform T-test for pairwise comparisons
pairwise_tukey <- TukeyHSD(anova_result)
# Print Tukey's post hoc test summary
print(pairwise_tukey)
output:
c:\R>Rscript lab4.R
U15IG21S0401
── Attaching core tidyverse packages
──────────────────────────────────────────────────────────────────
─────────── tidyverse 2.0.0 ──✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts
──────────────────────────────────────────────────────────────────
───────────────────────────── tidyverse_conflicts() ──✖ dplyr::filter() masks
stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<[Link] to force all conflicts to
become errors
'[Link]': 9 obs. of 3 variables:
$ employee_id: int 1 2 3 4 5 6 7 8 9
$ department : chr "Sales" "Marketing" "Engineering" "Sales" ...
$ salary : int 55000 62000 70000 48000 59000 81000 52000 65000 75000
Df Sum Sq Mean Sq F value Pr(>F)
department 2 844666667 422333333 24.52 0.0013 **
Residuals 6 103333333 17222222
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = salary ~ department, data = employee_data)
$department
diff lwr upr p adj
Marketing-Engineering -13333.33 -23729.98 -2936.68951 0.0179999
Sales-Engineering -23666.67 -34063.31 -13270.02284 0.0010455
Sales-Marketing -10333.33 -20729.98 63.31049 0.0511645
5.#program compute mean values for factor aggregates defined by factors
tapply and sapply
cat("U15IG21S0401\n")
cat("enter the no of persons")
n=[Link](readLines("stdin",n=1))
cat("enter the ",n,"persons gender\n")
gender=readLines("stdin",n=n)
cat("enter the",n,"persons height\n")
heights=[Link](readLines("stdin",n=n))
result=tapply(heights,gender,mean)
result1=sapply(result,mean)
cat("The mean height of male and female using tapply is\n")
cat(result)
cat("\n the mean height of all the persons using sapply\n")
cat(result1)
output:
c:\R>Rscript lab5.R
U15IG21S0401
enter the no of persons2
enter the 2 persons gender
female
male
enter the 2 persons height
5.3
5.4
The mean height of male and female using tapply is
5.3 5.4
the mean height of all the pwesons using sapply
5.3 5.4
6.#write a R program to find stationary distribution of markov chain
Cat(“U15IG21S0401\n”)
library(markovchain)
cat("Enter the number of transition states")
n=[Link](readLines("stdin",n=1))
cat("enter the ",n,",transition state names one by one\n")
states=readLines("stdin",n=n)
cat("Enter the probability distribution values of states\n")
probs=[Link](readLines("stdin",n=n*n))
trans_matrix=matrix(probs,nrow=n,ncol=n,byrow
=TRUE,dimnames=list(states,states))
mkc<-new("markovchain",states,transitionMatrix=trans_matrix)
stat_distb=steadyStates(mkc)
cat("The stationary distribution is as follows")
cat(stat_distb)
output:
c:\R>Rscript lab6.R
U15IG21S0401
Package: markovchain
Version: 0.9.5
Date: 2023-09-24 [Link] UTC
BugReport: [Link]
Enter the number of transition states2
enter the 2 ,transition state names one by one
sunny
rainy
Enter the probability distribution values of states
0.2
0.8
0.4
0.6
The stationary distribution is as follows0.3333333 0.6666667
7.#write a R program to sort the elements using quick sort for binary search
cat("U15IG21S0401\n")
quick_sort=function(arr){
if(length(arr)<=1){
return(arr)
pivot=arr[1]
lesser=arr[arr<pivot]
equal=arr[arr==pivot]
greater=arr[arr>pivot]
return(c(quick_sort(lesser),equal,quick_sort(greater)))
binary_search=function(arr,low,high,key){
if(low<=high){
mid=(low+high)%/%2
if(arr[mid]==key){
return(mid)
}else if(key<arr[mid]){
binary_search(arr,low,mid-1,key)
}else if(key>arr[mid]){
binary_search(arr,mid+1,high,key)
}
}else{
return(0)
cat("Enter the no of elements")
n=[Link](readLines("stdin",n=1))
cat("Enter the elements one by one\n")
x=[Link](readLines("stdin",n=n))
y=quick_sort(x)
cat("The sorted elements are\n")
cat(y)
cat("\nEnter element to be searched")
key=[Link](readLines("stdin",n=1))
low=1
high=length(y)
pos=binary_search(y,low,high,key)
if(pos==0){
cat("\nElement is not present in the vector")
}else{
cat("\nElement",key,"is found at",pos,"position")
}
Output:
c:\R>Rscript lab7.R
U15IG21S0401
Enter the no of elements4
Enter the elements one by one
40
60
22
36
The sorted elements are
22 36 40 60
Enter element to be searched36
Element 36 is found at 2 position
8.#write a R program to demonstrate read and write files
cat("U15IG21S0401\n")
cat('enter the number of students')
n=[Link](readLines("stdin",n=1))
rollno=c()
sname=c()
marks=c()
for(i in 1:n){
cat('enter the rollno of',i,'th student')
rno=[Link](readLines('stdin',n=1))
cat('enter the student name')
name=readLines('stdin',n=1)
cat('enter the percentage')
m=[Link](readLines("stdin",n=1))
rollno=append(rollno,rno)
sname=append(sname,name)
marks=append(marks,m)
df=[Link](rollno=rollno,sname=sname,marks=marks)
[Link](df,'[Link]')
df1=[Link]('[Link]')
cat('the data from [Link] file\n')
print(df1)
output:
c:\R>Rscript lab8.R
U15IG21S0401
enter the number of students3
enter the rollno of 1 th student401
enter the student nameRakshita
enter the percentage90
enter the rollno of 2 th student033
enter the student nameSpandana
enter the percentage92
enter the rollno of 3 th student6
enter the student nameBhagya
enter the percentage89
the data from [Link] file
X rollno sname marks
11 401 Rakshita 90
22 33 Spandana 92
3 3 6 Bhagya 89
9.#write a R program to demonstrate histogram,linechart,scatter plots, pie
chart, bar plot and box plot
cat("U15IG21S0401/n")
#histogram
height=seq(from=3,to=4,by=0.1)
hist(height,xlab="height",ylab="number of persons",main="histogram",col="red")
#piechart
fruits=c("Apple","Banana","Mango")
sales=c(400,300,350)
pie(sales,fruits)
#barplot
ayear=c("2018-19","2019-20","2020-21","2021-22","2022-23")
passout=c(3500,3550,4000,3900,4100)
barplot(passout,[Link] = ayear,col="green",xlab='year of passout',ylab='no of
students passout',main = 'Students passing rate')
#linechart
plot(height,type='l',pch=17,cex=2,col="red",xlab="height")
#scatter plot
price=c(300,200,270)
plot(sales,price,pch=17,cex=2,col="black")
#boxplot
boxplot(height)
output:
10.#write a R program to do basic data manipulation and analysis using data
frames
qu
cate pric anli
product gory e ty
Elect 120
laptop onics 0 5
furni
desk ture 250 2
Elect
headphones onics 80 10
furni
chair ture 150 4
furni
smartphone ture 600 3
Elect
bookshelf onics 120 3
furni
mouse ture 20 8
furni
Table ture 180 2
cat("U15IG21S0006\n")
#step1 load the data
df=[Link]("[Link]")
#step2 explore the data
types of data frame
columns
str(df)
#step3 print few rows of
data frame
head(df)
#step4 know the summary
of each column
summary(df$Quantity)
summary(df$Price)
#step5 filter the rows
df=df[df$Quantity>3,]
df
#step6 create new
columns
df$Total_Price=df$Quanti
ty*df$Price
df
#step7 aggregate function
and grouping
df1=aggregate(df$Total_P
rice,list(df$Category),sum)
df1
#step8 selecting specific
columns from data frame
df2<-
df[,c("Product","Quantity"
)]
df2
#step9 visulaize the data
barplot(df2$Quantity,nam
[Link] =
df2$Product,col="red")
#step10 export the new
data frame
[Link](df2,"[Link]")
Output:
c:\R>Rscript lab10.R
U15IG21S0006
> #step1 load the data
> df=[Link]("[Link]")
> #step2 explore the datatypes of data frames columns
> str(df) '[Link]': 8 obs. of 4 variables:
$ Product : chr "Laptop" "Desk" "Headphone" "Chair" ...
$ Category: chr "Electronics" "Furniture" "Electronics" "Furniture" ...
$ Price : int 1200 250 80 150 600 120 20 180
$ Quantity: int 5 2 10 4 3 3 8 2
> #step3 print few rows of data frames
> head(df)
Product Category Price Quantity
1 Laptop Electronics 1200 5
2 Desk Furniture 250 2
3 Headphone Electronics 80 10
4 Chair Furniture 150 4
5 Smartphones Electronics 600 3
6 Bookshelf Furniture 120 3
> #step4 know the summary of each column
> summary(df$Quantity)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 2.750 3.500 4.625 5.750 10.000
> summary(df$Price)
Min. 1st Qu. Median Mean 3rd Qu. Max
20.0 110.0 165.0 325.0 337.5 1200.0
> #step5 filter the rows
> df=df[df$Quantity>3,]
> df
Product Category Price Quantity
1 Laptop Electronics 1200 5
3 Headphone Electronics 80 10
4 Chair Furniture 150 4
7 Mouse Electronics 20 8
> #step6 create a new column
> df$Total_Price=df$Quantity*df$Price
> df
Product Category Price Quantity Total_Price
1 Laptop Electronics 1200 5 6000
3 Headphone Electronics 80 10 800
4 Chair Furniture 150 4 600
7 Mouse Electronics 20 8 160
> #step7 aggregate functions and grouping
> df1=aggregate(df$Total_Price,list(df$Category),sum)
> df1
Group.1 x
1 Electronics 6960
2 Furniture 600
> #step8 selecting the specific columns from data frame
> df2<-df[,c("Product","Quantity")]
> df2 Product Quantity
1 Laptop 5
3 Headphone 10
4 Chair 4
11.#Lab 11 WARP TO create any application of linear
Regression
years_experienc
employee_id e education_level job_title salary
Software
1 5 Bachelor's Engineer 80000
2 3 Master's Data Scientist 95000
Senior Research
3 8 PhD Scientist 120000
Marketing
4 2 Bachelor's Analyst 65000
5 1 Associate's Sales Associate 40000
6 6 Master's Project Manager 90000
Chief Technology
7 10 PhD Officer 150000
Software
8 4 Bachelor's Engineer 85000
9 2 Master's Data Scientist 70000
Senior Research
10 7 Bachelor's Scientist 100000
cat("U15IG21S0401\n")
#in multivariate context for predictive purpose
#assuming ur dataset is in a csv file named "[Link]"
employees <- [Link]("[Link]")
#view the structure of the dataset
str(employees)
#check for missing values
summary(employees)
#visualize relationship b/n variables([Link] plots box)
#this step helps u understand how the variables relate to each
other
#scatter plot of salary v/s years of experiance
plot(salary ~ years_experience, data=employees,main="Scatter
Plot: Salary vs Years of Experience")
#scatter plot of salary v/s education level(using box plots)
boxplot(salary ~ education_level, data=employees, main="Box
Plot: Salary by Education Level")
#assuming u want to predict "salary" based on
"years_experiance"& "educational_level & "job_title"
model<-lm(salary ~ years_experience + education_level +
job_title,data=employees)
#get a summary of the model's output
summary(model)
#extraxt & print specific information
cat("Intercept:",model$coefficients[1],"\n")
cat("Coefficient for years_experience:",model$coefficients[2],"\
n")
cat("Coefficient for educational_level (Bachelor's as
reference):",model$coefficients[3],"\n")
cat("Coefficient for job_title (Software Engineer as
reference):",model$coefficients[4],"\n")
cat("R-squared:", summary(model)$[Link],"\n")
output:
c:\R>Rscript lab11.R
U15IG21S0401
'[Link]': 10 obs. of 5 variables:
$ employee_id : int 1 2 3 4 5 6 7 8 9 10
$ years_experience: int 5 3 8 2 1 6 10 4 2 7
$ education_level : chr "Bachelor's" "Master's" "PhD"
"Bachelor's" ...
$ job_title : chr "Software Engineer" "Data Scientist"
"Senior Research Scientist" "Marketing Analyst" ...
$ salary : int 80000 95000 120000 65000 40000 90000
150000 85000 70000 100000
employee_id years_experience education_level job_title
Min. : 1.00 Min. : 1.00 Length:10 Length:10
1st Qu.: 3.25 1st Qu.: 2.25 Class :character Class :character
Median : 5.50 Median : 4.50 Mode :character
Mode :character
Mean : 5.50 Mean : 4.80
3rd Qu.: 7.75 3rd Qu.: 6.75
Max. :10.00 Max. :10.00
salary
Min. : 40000
1st Qu.: 72500
Median : 87500
Mean : 89500
3rd Qu.: 98750
Max. :150000
Call:
lm(formula = salary ~ years_experience + education_level +
job_title,
data = employees)
Residuals:
1 2 3 4 5 6 7
-7.500e+03 7.500e+03 2.274e-13 -9.095e-13 -1.819e-12 -
2.046e-12 2.046e-12
8 9 10
7.500e+03 -7.500e+03 2.046e-12
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.000e+04 2.121e+04 1.414 0.392
years_experience 1.000e+04 1.500e+04 0.667
0.626
education_levelBachelor's 1.000e+04 1.237e+05 0.081
0.949
education_levelMaster's -4.352e-10 7.794e+04 0.000
1.000
education_levelPhD 2.000e+04 1.367e+05 0.146
0.907
job_titleData Scientist 2.750e+04 5.562e+04 0.494
0.708
job_titleMarketing Analyst 5.000e+03 1.092e+05 0.046
0.971
job_titleProject Manager NA NA NA NA
job_titleSales Associate NA NA NA NA
job_titleSenior Research Scientist -1.000e+04 3.674e+04 -
0.272 0.831
job_titleSoftware Engineer -2.500e+03 7.310e+04 -0.034
0.978
Residual standard error: 15000 on 1 degrees of freedom
Multiple R-squared: 0.9728, Adjusted R-squared: 0.7552
F-statistic: 4.471 on 8 and 1 DF, p-value: 0.3511
Intercept: 30000
Coefficient for years_experience: 10000
Coefficient for educational_level (Bachelor's as reference):
10000
Coefficient for job_title (Software Engineer as reference): -
4.352441e-10
R-squared: 0.9728015
12.#write a R program to find mean, median and mode
cat("U15IG21S0401\n")
cat("Enter the number of students")
n=[Link](readLines("stdin",n=1))
cat("Enter the family income of",n,"students\n")
income=[Link](readLines("stdin",n=n))
avg=mean(income)
med=median(income)
x=table(income)
y=names(x)
result=sort(x,decreasing = TRUE)
z=max(x)
mod=paste(y[1],z)
cat("The mean of family income=",avg,"\n")
cat("The median of family income or middle value=",med,"\n")
cat("The mode or most frequent income is=",mod)
output:
c:\R>Rscript lab12.R
U15IG21S0401
Enter the number of students5
Enter the family income of 5 students
2000
3000
5600
3000
560
The mean of family income= 2832
The median of family income or middle value= 3000
The mode or most frequent income is= 560 2