0% found this document useful (0 votes)
52 views

Exercise-9..Study and Implementation of Data Visulization With Ggplot

This document summarizes the process of creating various data visualizations using the ggplot2 package in R. It demonstrates how to create histograms, density plots, bar charts, box plots, scatter plots, and faceted plots. It also shows how to customize visualizations by modifying titles, labels, themes, color scales, axis ranges, and more. The goal is to study and implement different types of data visualization using ggplot2.

Uploaded by

Sri Ram
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
52 views

Exercise-9..Study and Implementation of Data Visulization With Ggplot

This document summarizes the process of creating various data visualizations using the ggplot2 package in R. It demonstrates how to create histograms, density plots, bar charts, box plots, scatter plots, and faceted plots. It also shows how to customize visualizations by modifying titles, labels, themes, color scales, axis ranges, and more. The goal is to study and implement different types of data visualization using ggplot2.

Uploaded by

Sri Ram
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 1

Experiment-9.

Study and implementation of


Data
visulization with ggplot
Radhika Sureshbhai MAtholiya
_MCA_SET28(67220200055)
2022-07-05
######Experiment No. 9##########

#Aim: Study and implementation of Data Visualization with ggplot

#Install and Load Package

#installing package

#install.packages("ggplot2")

library(ggplot2)

library(plyr)

library(dplyr)

##

## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':

##

## arrange, count, desc, failwith, id, mutate, rename, summarise,

## summarize

## The following objects are masked from 'package:stats':

##

## filter, lag

## The following objects are masked from 'package:base':

##

## intersect, setdiff, setequal, union

#Histogram, Density plots and Box plots are used for visualizing a continuous variable.

#Creating Histogram:

View(iris)

ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#To change the width of bin in the histograms we can use binwidth in geom_histogram( )

ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram(binwidth=1)

#One can also define the number of bins being wanted, the binwidth in that case will be adjusted automatically.

ggplot(data = iris , aes(x=Sepal.Length)) + geom_histogram(color="black", fill="white", bins =10)

#How to visualize various groups in histogram

ggplot(iris, aes(x=Sepal.Length, color=Species)) + geom_histogram(fill="white", binwidth = 1)

#Creating Density Plot

#Density plot is also used to present the distribution of a continuous variable.

ggplot(iris, aes( x = Sepal.Length)) + geom_density( )

#How to show various groups in density plot

ggplot(iris, aes(x=Sepal.Length, color=Species)) + geom_density( )

#Creating Bar and Column Charts :

#Now mpg data will be used for creating the following graphics.

ggplot(mpg, aes(x= class)) + geom_bar()

#Using coord_flip( ) one can inter-change x and y axis.

ggplot(mpg, aes(x= class)) + geom_bar() + coord_flip()

#How to add or modify Main Title and Axis Labels

p = ggplot(mpg, aes(x= class)) + geom_bar()

p + labs(title = "Number of Cars in each type", x = "Type of car", y = "Number of cars")

#How to add data labels

p = ggplot(mpg, aes(x= class)) + geom_bar()

p = p + labs(title = "Number of Cars in each type", x = "Type of car", y = "Number of cars")

p + geom_text(stat='count', aes(label=..count..), vjust=-0.25)

#How to reorder Bars

#Using stat="identity" we can use our derived values instead of count.

count(mpg,class) %>% arrange(-n) %>%

mutate(class = factor(class,levels= class)) %>%

ggplot(aes(x=class, y=n)) + geom_bar(stat="identity")

#Showing Mean of Continuous Variable by Categorical Variable

df = mpg %>% group_by(class) %>% summarise(mean = mean(displ)) %>%

arrange(-mean) %>% mutate(class = factor(class,levels= class))

p = ggplot(df, aes(x=class, y=mean)) + geom_bar(stat="identity")

p + geom_text(aes(label = sprintf("%0.2f", round(mean, digits = 2))),

vjust=1.6, color="white", fontface = "bold", size=4)

#Creating Stacked Bar Chart

p <- ggplot(data=mpg, aes(x=class, y=displ, fill=drv))

p + geom_bar(stat = "identity")

#Stacked - Position_dodge

p + geom_bar(stat="identity", position=position_dodge())

#Creating BoxPlot

mtcars$cyl = factor(mtcars$cyl)

ggplot(mtcars, aes(x=cyl, y=disp)) + geom_boxplot()

#To create a notched boxplot we write notch = TRUE

ggplot(mtcars, aes(x=cyl, y=disp)) + geom_boxplot(notch = TRUE)

## notch went outside hinges. Try setting notch=FALSE.

## notch went outside hinges. Try setting notch=FALSE.

#Scatter Plot

# Creating a scatter plot denoting various species.

ggplot(data = iris, aes( x = Sepal.Length, y = Sepal.Width,shape = Species, color = Species)) + geom_point()

# Creating scatter plot for automatic cars denoting different cylinders.

ggplot(data = subset(mtcars,am == 0),aes(x = mpg,y = disp,colour = factor(cyl))) + geom_point()

# Seeing the patterns with the help of geom_smooth.

ggplot(data = mtcars, aes(x = mpg,y = disp,colour = hp)) + geom_point() + geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Plotting the horsepower using geom_line

ggplot(data = mtcars, aes(x = mpg,y = disp,colour = hp)) + geom_point(size = 2.5) + geom_line(aes(y = hp))

#Modifying the axis labels and appending the title and subtitle

#Adding title or changing the labels

ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point() + labs(title = "Scatter plot")

#Alternatively

ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point() + ggtitle(label = "Scatter plot")

ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point() + ggtitle(label = "Scatter plot",subtitle = "mtcars data in


R")

a <- ggplot(mtcars,aes(x = mpg, y = disp, color = factor(cyl))) + geom_point()

#Changing the axis labels.

a + labs(color = "Cylinders")

a + labs(color = "Cylinders") + xlab("Mileage") + ylab("Displacement")

#Combining it all

a + labs(color = "Cylinders") + xlab("Mileage") + ylab("Displacement") + ggtitle(label = "Scatter plot", subtitle


= "mtcars data in R")

#Playing with themes

#Changing the themes.

b <- ggplot(mtcars,aes(mpg,disp)) + geom_point() + labs(title = "Scatter Plot")

#Changing the size and color of the Title and the background color.

b + theme(plot.title = element_text(color = "blue",size = 17),plot.background = element_rect("orange"))

b + theme_minimal( )

#Removing the lines from the background.

b + theme(panel.background = element_blank())

#Removing the text from x and y axis.

b + theme(axis.text = element_blank())

b + theme(axis.text.x = element_blank())

b + theme(axis.text.y = element_blank())

#Changing the legend position

c <- ggplot(mtcars,aes(x = mpg, y = disp, color = hp)) +labs(title = "Scatter Plot") +geom_point()

c + theme(legend.position = "top")

#Combining everything.

c + theme(legend.position = "bottom", axis.text = element_blank()) +theme(plot.title = element_text(color = "Fore


st Green",size = 17),plot.background = element_rect("Yellow"))

#Changing the color scales in the legend

c + scale_color_gradient(low = "yellow",high = "red")

#if we want 3 colors

c + scale_color_gradient2(low = "red",mid = "green",high = "blue")

#To serve the purpose of having 3 colors in the legend we use scale_color_gradient2 with low = "red",mid = "gree
n" and high = "blue" means it divides the entire range(Starting from 0) to the maximum observation in 3 equal par
ts, with first part being shaded as red, central part as green and highest part as blue.
c + theme(legend.position = "bottom") + scale_color_gradientn(colours = c("red","forest green","white","blue"))

#Changing the breaks in the legend.

c + scale_color_continuous(name = "horsepower", breaks = seq(50,350,75), labels = paste(seq(50,350,75),"hp"))

#Changing the break points and color scale of the legend together

#Trial 1 : This one is wrong

c + scale_color_continuous( breaks = seq(50,350,75)) +scale_color_gradient(low = "blue",high = "red")

## Scale for 'colour' is already present. Adding another scale for 'colour',

## which will replace the existing scale.

#Trial 2: Next one is wrong.

c + scale_color_gradient(low = "blue",high = "red") +

scale_color_continuous( breaks = seq(50,350,75))

## Scale for 'colour' is already present. Adding another scale for 'colour',

## which will replace the existing scale.

#The correct way to do is to define the arguments in one function only.

c + scale_color_continuous(name = "horsepower", breaks = seq(50,350,75), low = "red", high

= "black") + theme(panel.background = element_rect("green"),

plot.background = element_rect("orange"))

#Changing the axis cut points

d <- ggplot(mtcars,aes(x = mpg,y = disp)) + geom_point(aes(color = factor(am))) +

xlab("Mileage") + ylab("Displacement") +

theme(panel.background = element_rect("black") , plot.background = element_rect("pink"))

#To change the axis cut points we use scale_(axisname)_continuous.

d + scale_x_continuous(limits = c(2,4)) + scale_y_continuous(limits = c(15,30))

## Warning: Removed 32 rows containing missing values (geom_point).

#To change the x axis limits to 2 to 4, we use scale_x_continuous and scale_y_continuous set the least cut off po
int to 15 and highest cut off point of y axis to 30.

d + scale_x_continuous(limits = c(2,4),breaks = seq(2,4,0.25)) +

scale_y_continuous(limits = c(15,30),breaks = seq(15,30,3))

## Warning: Removed 32 rows containing missing values (geom_point).

#Faceting.

View(mtcars)

unique(mtcars$carb)

## [1] 4 1 2 3 6 8

ggplot(mtcars,aes(mpg,disp)) +geom_point() +facet_wrap(~carb)

#alternatively

ggplot(mtcars,aes(mpg,disp)) +geom_point() +facet_wrap("carb")

# Control the number of rows and columns with nrow and ncol

ggplot(mtcars, aes(mpg, disp)) + geom_point() + facet_wrap(~carb,nrow = 3)

#We can mention the number of rows we need for faceting.

# Control the number of rows and columns with nrow and ncol

ggplot(mtcars, aes(mpg, disp)) + geom_point() + facet_wrap(~carb,nrow = 3)

## Use the `labeller` option to control how labels are printed:

ggplot(mtcars, aes(mpg, disp)) + geom_point() + facet_wrap(~carb + am, labeller = "label_both")

#R provides facet_grid( ) function which can be used to faced in two dimensions.

z <- ggplot(mtcars, aes(mpg, disp)) + geom_point()

#We store our basic plot in 'z' and thus we can make the additions:

z + facet_grid(. ~ cyl) #col

z + facet_grid(cyl ~ .) #row

z + facet_grid(gear ~ cyl,labeller = "label_both") #row and col

#Adding text to the points.

ggplot(mtcars, aes(x= mpg,y = disp)) + geom_point() +geom_text(aes(label = am))

require(ggrepel)

## Loading required package: ggrepel

ggplot(mtcars, aes(x= mpg,y = disp)) + geom_point() +geom_text_repel(aes(label = am))

You might also like