Data Load

######## [LIBRARY] ########
library(readxl)
library(tidyverse)
library(ggplot2)
library(gridExtra)


######## [DATA] ########
datathon <- read_xlsx('C:\\Users\\jeong\\Desktop\\datathon.xlsx',sheet = 1)
country <- read_xlsx('C:\\Users\\jeong\\Desktop\\datathon.xlsx',sheet = 2)

Data Preprocessing

######## [CODE] data of ages 0-4 (year: 2001-2015) ########
datathon2 <- datathon
## latest 15-year data (2003-2015)
myyear <- as.character(2003:2017)

## select rows that do not have any NA
completeFun <- function(data, desiredCols) {
  completeVec <- complete.cases(data[, desiredCols])
  return(data[completeVec, ])
}
mydata <- completeFun(datathon2,myyear)
mydata2 <- mydata[,-c(5:47,63)]

Demographic Dividend

######## [DATA] data_all ########
## group data by year
data_all <- mydata2 %>% 
  gather(key = year, value = value,
         -`Country Name`,-`Country Code`,-`Indicator Code`,-`Indicator Name`)

######## [DATA] data_all2 / all_single / all_group ########
## classify data
data_all2 <- left_join(data_all,country %>% 
                         select(`Country Code`,Region))
## continental information O
Non_region <- data_all2 %>% 
  distinct(`Country Name`,`Country Code`,Region) %>% 
  filter(is.na(Region)) %>% select(`Country Name`)

## continental information X
region <- data_all2 %>% 
  distinct(`Country Name`,`Country Code`,Region) %>% 
  filter(!is.na(Region)) %>% select(`Country Name`)

all_single <- data_all2 %>% 
  filter(`Country Name` %in% region$`Country Name`)
all_group <- data_all2 %>% 
  filter(`Country Name` %in% Non_region$`Country Name`) %>% 
  select(-Region)
######## [DATA] demo ########
demo <- all_group %>% 
  filter(`Country Name` %in% c('Early-demographic dividend',
                               'Late-demographic dividend',
                               'Post-demographic dividend',
                               'Pre-demographic dividend'))

## rearrange levels
demo$`Country Name` <- factor(demo$`Country Name`,
                              levels = c('Pre-demographic dividend',
                                         'Early-demographic dividend',
                                         'Late-demographic dividend',
                                         'Post-demographic dividend'))


######## [DATA] demo_f / demo_m ########
demo_f <- demo %>% 
  filter(`Indicator Code` %in% c('SE.PRM.NENR.FE',
                                 'SE.SEC.NENR.FE'))
demo_m <- demo %>% 
  filter(`Indicator Code` %in% c('SE.PRM.NENR.MA',
                                 'SE.SEC.NENR.MA'))
ff <- demo_f %>% 
  select(`Country Name`,`Indicator Name`,year,value) %>% 
  mutate('gender'='Female')
mm <- demo_m %>% 
  select(`Country Name`,`Indicator Name`,year,value) %>% 
  mutate('gender'='Male')
tt <- rbind(ff,mm)

ggplot(tt,aes(year,value,
              group=interaction(`Indicator Name`,gender),
              color=interaction(`Indicator Name`,gender),
              linetype=interaction(`Indicator Name`,gender))) +
  geom_line(lwd=1) + theme_bw() +
  labs(y='(% net)') +
  theme(legend.position = 'bottom',
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_discrete(breaks = seq(2003,2017,by=3)) +
  scale_color_manual('',values = c('tomato','royalblue1','tomato','royalblue1'),
                     labels = c('Primary School enrollment, Female',
                                'Secondary School enrollment, Female',
                                'Primary School enrollment, Male',
                                'Secondary School enrollment, Male')) +
  scale_linetype_manual('',values = c(1,1,3,3),
                     labels = c('Primary School enrollment, Female',
                                'Secondary School enrollment, Female',
                                'Primary School enrollment, Male',
                                'Secondary School enrollment, Male')) +
  guides(color=guide_legend(nrow=2,byrow = T)) +
  facet_grid(~`Country Name`)

It is clearly shown that as countries enter a more developed demographic period, the percentage of people engaging in education rises. This supports the findings of recent research that the demographic dividend is an education-triggered dividend.