-
Notifications
You must be signed in to change notification settings - Fork 0
/
twoFactorVisualizeDataset.R
73 lines (55 loc) · 1.84 KB
/
twoFactorVisualizeDataset.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#Set the Working Directory
getwd()
setwd("/Users/xinan/Documents/program/R")
getwd()
#Import the csv dataset
data <- read.csv("P2-Section5-Homework-Data.csv")
#Explore the data
data
head(data) #check top 6 rows
tail(data, n=7) #check bottom 7 rows
str(data) #check the structure of the data frame
summary(data) #check the summary of the data
#Filter the dataframes
data1960 <- data[data$Year==1960,]
head(data1960)
data2013 <- data[data$Year==2013,]
head(data2013)
#Check row counts
nrow(data1960) #187 rows
nrow(data2013) #187 rows. Equal split.
#Create the additional dataframes
add1960 <- data.frame(Code=Country_Code, Life.Exp=Life_Expectancy_At_Birth_1960)
add2013 <- data.frame(Code=Country_Code, Life.Exp=Life_Expectancy_At_Birth_2013)
#Check summaries
summary(add1960)
summary(add2013)
#Merge the pairs of dataframes
merged1960 <- merge(data1960, add1960, by.x="Country.Code", by.y="Code")
merged2013 <- merge(data2013, add2013, by.x="Country.Code", by.y="Code")
#Check the new structures
str(merged1960)
str(merged2013)
#We can see an obsolete column in each of the merged dataframes
#Column "Year" is no longer required. Let's remove it
merged1960$Year <- NULL
merged2013$Year <- NULL
#Check structures again
str(merged1960)
str(merged2013)
#Visualization time
library("ggplot2")
#Visualize the 1960 dataset
qplot(data=merged1960, x=Fertility.Rate, y=Life.Exp,
colour=Region, #colour
size=I(5),
alpha=I(0.6), #transparency
main="Life Expectancy vs Fertility (1960)" #title
)
#Visualize the 2013 dataset
qplot(data=merged2013, x=Fertility.Rate, y=Life.Exp,
colour=Region, #colour
size=I(5),
alpha=I(0.6), #transparency
main="Life Expectancy vs Fertility (2013)" #title
)