0% found this document useful (0 votes)
2 views

Lab 02 - Compound Data Structures

The document provides a comprehensive overview of compound data structures in R, including vectors, matrices, lists, factors, and data frames. It covers creation, indexing, slicing, and operations on these structures, along with functions for data manipulation and analysis. Additionally, it discusses importing external datasets and performing data simulation.

Uploaded by

testingcode44
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

Lab 02 - Compound Data Structures

The document provides a comprehensive overview of compound data structures in R, including vectors, matrices, lists, factors, and data frames. It covers creation, indexing, slicing, and operations on these structures, along with functions for data manipulation and analysis. Additionally, it discusses importing external datasets and performing data simulation.

Uploaded by

testingcode44
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 12

Fundamentals-02 Compound Data Structures

Created by H. M. Samadhi Chathuranga Rathnayake


#Compound data structures
#Vectors
#Creating vectors with c function
a=c(12,22,23,34,35)
a

class(a)

b=c("Dog","Cat","Rat")
b

class(b)

x=c(12,22,23,"Man","Car")
x #This is called the coercion

#logical < integer < numeric < complex < character (This is the order)
class(x)

y=c(TRUE,FALSE,12)
y

class(y)

#Other ways of creating vectors


d=1:10
d

p=15:5
p

k=seq(from=10,to=50,by=5)
k

h=seq(from=10,to=50,length.out = 20)
h

f=seq_len(20)
f

h=replicate(10,c("Dog","Cow"))
h

#Indexing & slicing vector elements


a=c(23,33,34,32,45,50,65)
a[1]

a[c(1,3,6)]

a[-c(1,3,6)] #All other elements except these indexes

a[1:3]

a[c(T,T,T,F,T,T,F)] #Boolean masking

a[a>40]

a[a%%2==0]

#Element wise operations in numerical vectors


a=c(2,3,4,6,3)
b=c(3,4,1,6,4)

a+10

a*2

a/3

a^2

a>3

a+b

a*b

a/b

a^b

a>b

a%in%b #This is the membership

#Element wise operations with recursive property in vectors


p=c(10,20,30,40,50)
q=c(100,200,300)

p+q

#Vector properties with functions


a=c(2,3,5,3,6,7,4,5,8,1,3,4,2)

#Basic summary functions


length(a)

summary(a)
str(a)

min(a)

max(a)

sum(a)

mean(a)

median(a)

var(a)

sd(a)

range(a)

#Cumulative functions
cumsum(a)

cumprod(a)

cummin(a)

cummax(a)

#Vector operations with functions


a=c(20,30,40)
b=append(a,100)
b

d=append(x = a,values = 200,after = 2)


d

k=append(x = a,values = c(100,200,300),after = 2)


k

h=c(a,100,200,300) #This another way of appending


h

a=c(20,30,40)
rep(a,3)

b=c(10,20,25,30,35,40)
all(b>20)

all(b<50)

any(b>20)

any(b>50)

b=c(100,50,25,30,15,40,15,40)
sort(b)
order(b)

rev(b)

unique(b)

h=c(12,22,23,34,45,43,32,33,21,23,45,56,67,56,70)
s=sample(h,5)
s

set.seed(1000)
h=c(12,22,23,34,45,43,32,33,21,23,45,56,67,56,70)
s=sample(h,5)
s

p=sample(h,5,replace = T)
p

f=c(12,22,23)
q=sample(f,10,replace = T)
q

k=c(20,12,25,30,32,22,33)
which(k>=25)

which.min(k)

which.max(k)

a=c(12,22,23,34,45,56,43,43,33,34)
idx=which(a%in%c(22,23,34))
b=a[-idx]
b

g=c("Male","Female","Female","Female","Male")
table(g)

g=c("Male","Female","Female","Female","Male")
replace(x = g,list = c(1,5),values = "M")

#Matrices
#Matrix creation
a=matrix(c(12,23,32,33,45,43),3,2)
a

b=matrix(c(12,23,32,33,45,43),3,2,byrow = TRUE)
b

c=array(c(12,23,32,33,45,43),c(3,2))
c
a=matrix(10,3,2)
a

#Matrix properties
b=matrix(c(12,23,32,33,45,43),3,2,byrow = TRUE)
b

dim(b)

nrow(b)

ncol(b)

str(b)

summary(b)

#Merging matrices
a=matrix(c(12,23,32,33),2,2)
b=matrix(c(22,26,37,43),2,2)

c=cbind(a,b)
c

d=rbind(a,b)
d

#Indexing & slicing matrices


a=matrix(c(12,23,32,33,45,43,34,55,56),3,3)
a

a[1,3]

a[1,]

a[,2]

a[c(1,3),2]

a[,2:3]

a>40

a[a>40]

#Matrix operations
a=matrix(c(12,23,32,33),2,2)
b=matrix(c(22,26,37,43),2,2)
c=matrix(c(12,23,32,33,45,43,34,55,56),3,3)

b
a+b

a-b

a*b

a/b

a%*%b

t(c)

det(c)

solve(c)

diag(c)

#Lists
#Creating a list
a=c(12,22,23,34)
b=c("Cat","Dog")
c=100
d="Man"
m=matrix(c(12,22,23,34,45,32),3,2)

L=list(a,b,c,d,m)
L

#A name can be given for each segment of the list


names(L)=c("Part1","Part2","Part3","Part4","Part5")
L

#Accessing items and elements in items inside the list


L[[1]] #Accessing the first items

L$Part1

L[[1]][2] #Accessing elements inside the items

#Adding elements to a list


L$Part6="Cow"
L

#Removing elements from a list


L$Part3=NULL
L
#Convert a list into a vector
v1=unlist(L)
v1

#Merge lists
L1=list("Man",c(12,22,23),TRUE)
L2=list(matrix(1:20,4,5),c("Cat","Dog"))

L1

L2

L3=c(L1,L2)
L3

#Splitting a list
a=c(12,22,23,34)
b=c("Cat","Dog")
c=100
d="Man"
m=matrix(c(12,22,23,34,45,32),3,2)

L=list(a,b,c,d,m)
L

L1=L[1:2]
L2=L[3:5]
L1

L2

#Factors
#Un Ordered factors
g=c("Male","Female","Male","Male","Male","Female")
g

fg=factor(g)
fg

#Ordered factors
h=c("first","first","fifth","fourth","second","fifth","third","second","fourt
h")
h

fh=factor(h)
fh

ofh=factor(h,levels = c("first","second","third","fourth","fifth"))
ofh
#Data Frames
name=c("Kane","Jane","David","Harry","Larry","Mary","John","Jessy","Anne","Li
lly","Julia","Pale")
age=c(23,33,34,32,21,22,23,34,32,18,21,23)
marks=c(89,78,88,59,67,78,88,90,59,75,77,69)

df=data.frame(name,age,marks)
df

#Accessing columns in a data frame


df["name"] #This shows a sub data frame

df["age"]

df$name #This is giving the output as a vector

attach(df) #This will make these columns as global variables

name

age

marks

detach(df) #Make them again local variables

#Selecting several columns


df[c("name","age")]

#Accessing the elements in a data frame


df

df[1,1] #First column first element

df[1,"age"] #First row age value

df[c(1,5),"age"] #First and fifth rows age value

df[2,] #All values in the 2nd row

df[,3] #All values in the 3rd column

df[,-3] #All values except 3rd column

df[2:6,2] #2nd to 6th row of 2nd column

df$name[1:5] #1st to 5th name in name column

#Change elements in a data frame


df

df[1,1]="Sammie"
df
df[2:6,2]=c(50,50,50,50,50)
df

#Adding a new column


df$class=c("C1","C2","C3","C4","C5","C6","C1","C2","C3","C4","C5","C6")
df

#Removing an existing column


df$age=NULL
df

#Basic column operations


df$marks_new=df$marks+5
df

df$marks_diff=df$marks_new-df$marks
df

#Checking conditions
df$marks>=70 #Returns TRUE for marks greater than 70

#Boolean masking for data frames


df_new=df[df$marks>=70,]
df_new

#Data frame functions


View(df) #View the data frame

head(df) #Top elements

head(x = df,n = 8)

tail(df) #Bottom elements

tail(x = df,n = 8)

dim(df) #Dimensions

nrow(df)

ncol(df)

#Row and column names


row.names(df)

colnames(df)

colnames(df)=c("Student_Name","Previous_Marks","Class","New_Marks","Marks_Dif
ference")
df

#Column and row sums


colSums(df[c("Previous_Marks","New_Marks")])
rowSums(df[c("Previous_Marks","New_Marks")])

#Column and row means


colMeans(df[c("Previous_Marks","New_Marks")])

rowMeans(df[c("Previous_Marks","New_Marks")])

#edit function can be used for editing a data frame manually


df1=edit(df)
df1

#str function will give all the data types in the data frame
str(df)

#summary function will give a summary about the data frame


summary(df)

#which function can be used for identifying the indexes of some criteria
which(df["Previous_Marks"]>=70)

#table function will give frequencies in a categorical column


table(df$Class)

#Factorize the categorical columns


df=data.frame(Name=c("Sam","Kane","Jane"),Gender=c("M","M","F"),Age=c(23,32,2
1),University_Year=c(2,3,1))
df

str(df)

summary(df)

df$Gender=factor(df$Gender)
df$University_Year=factor(df$University_Year,levels = c(1,2,3))

str(df)

summary(df)

#Working with external data sets


#CSV files
#Importing CSV files
data=read.csv("D:\\Workshops\\R Programming for Data Science Workshop\\Part
01 - Fundamentals of R Programming\\Datasets\\default.CSV")

getwd()

setwd("D:\\Workshops\\R Programming for Data Science Workshop\\Part 01 -


Fundamentals of R Programming\\Datasets")
data=read.csv("default.CSV")
data
#Now we can treat this as a data frame
head(data)

dim(data)

colnames(data)

str(data)

table(data$Loan.Offered)

table(data$Own.house)

data$Gender=factor(data$Gender)
data$Loan.Offered=factor(data$Loan.Offered)
data$Job=factor(data$Job)
data$Status=factor(data$Status)
data$Credit.History=factor(data$Credit.History)
data$Own.house=factor(data$Own.house)
data$Purpose=factor(data$Purpose)

str(data)

head(data)

summary(data)

#We can perform any data frame operation


data_male=data[data$Gender=="Male",]
data_female=data[data$Gender=="Female",]
data_female=data[!data$Gender=="Male",]

summary(data_female)

data_female$CS_Ex_Ratio=data_female$Credit.Score/data_female$Work.Exp
head(data_female)

summary(data_male)

data_male$Exp_Level="Low"
head(data_male)

data_male[data_male$Work.Exp>=15,]$Exp_Level="High"
head(data_male)

#Data simulation
datanorm=rnorm(100) #Standard normal distribution
datanorm

datanorm2=rnorm(n = 100,mean = 20,sd = 5) #Standard normal distribution


datanorm2
datauni=runif(n = 100,min = 10,max = 20) #Uniform distribution
datauni

datapois=rpois(n = 100,lambda = 5)
datapois

databin=rbinom(n = 100,size = 1, prob = 0.5)


databin

#rnbinom(),rgamma(),rhyper(),rbeta()

You might also like