Data Importing: 736575

Data Importing

rm(list=ls())
library(dplyr)
library(‘tseries’)
library(‘forecast’)

#Importing Data
dataUSDeaths = read.table(“C:/Users/User/Documents/dataUSDeaths.txt”, header=T, dec=”.”)
dataUSExposures = read.table(“C:/Users/User/Documents/dataUSExposures.txt”, header=T, sep=””, dec=”.”)
Deaths_data = select(dataUSDeaths, Year, Age, Total)
Exposures_data = select(dataUSExposures, Year, Age, Total)
Deaths_data_1 = Deaths_data
Exposures_data_1 = Exposures_data
Deaths_data_2 = Deaths_data
Exposures_data_2 = Exposures_data

#Question 1
#Filtering The Age
Deaths_data = filter(Deaths_data, Age!=”110+”, Age!=”109″, Age!=”108″, Age!=”107″, Age!=”106″,Age!=”105″,
Age!=”104″, Age!=”103″, Age!=”102″, Age!=”101″)
Exposures_data = filter(Exposures_data, Age!=”110+”, Age!=”109″, Age!=”108″, Age!=”107″, Age!=”106″,Age!=”105″,
Age!=”104″, Age!=”103″, Age!=”102″, Age!=”101″)
Age_Data = as.numeric(paste(unique(Deaths_data$Age)))
nAge = length(unique(Age_Data))
nAge
Year_Data = unique(Deaths_data$Year)
nYear = length(unique(Year_Data))
nYear

#Mortality
Mortality_Rates = log(matrix(Deaths_data$Total/Exposures_data$Total, nYear, nAge, byrow=T))
Mortality_Rates

#LeeCarter MODEL
#LeeCarter MODEL Parameter Estimation
alpha_hat = colMeans(Mortality_Rates)
Z=matrix(0,nYear, nAge)
for(i in 1:nAge){
Z[,i] = Mortality_Rates[,i]-alpha_hat[i]
}
SVD_Z = svd(Z,1,1)
kapaa_hat = SVD_Z$u*sum(SVD_Z$v)*SVD_Z$d[1]
beta_hat = SVD_Z$v/sum(SVD_Z$v)
par(mfrow=c(1,3))
plot(Year_Data, kapaa_hat, type=’l’, main=”kapaa”)
plot(Age_Data, alpha_hat, type=’l’, main= “alpha”)
plot(Age_Data,beta_hat, type=’l’, main=”beta”)

#LeeCarter Residuals
Mortality_RatesHat = rep(1,1,nYear)%*%t(alpha_hat)+kapaa_hat%*%t(beta_hat)
ResidualRates = Mortality_Rates – Mortality_RatesHat
ResidualRates

#ARMA MODEL
AIC = c()
LL = c()
BICC = c()

Model = arima(kapaa_hat, order=c(1,0,1))
Model
AIC[1] = Model$aic
LL[1] = Model$loglik
BICC[1] = BIC(Model)
par(mfrow = c(1,2))
plot(Model$residuals, col = 1)
acf(Model$residuals)
Box.test(Model$residuals, lag = 4, type = c(“Ljung-Box”))

#Forecasting The Mortality Rate for the year 2017
ModelPred = forecast(Model, h = 2)
par(mfrow = c(1,1))
plot(ModelPred, main = “Model”)

#Poisson MODEL
Yt = Deaths_data$Year
Ax = Deaths_data$Age
Dxt = as.integer(Deaths_data$Total)
Ext = Exposures_data$Total

Model2=glm(Dxt~Ext + Ax + Yt, offset=log(Ext), poisson(link=log) )
summary(Model2)
Model2.values=log(exp(predict.glm(Model2))/Ext)
Model2.valuesMatrix=matrix(Model2.values, nYear, nAge,byrow=T)
par(mfrow = c(1,1))
plot(Model2.valuesMatrix[4,], type = “l”, main = “PoissonModel”)

#Question 2
d=(kapaa_hat[nYear]-kapaa_hat[1])/nYear
Predkapaa_hat=c()
Residualskp=c()
Predkapaa_hat[1]=kapaa_hat[1]
for(i in 2:nYear){
Predkapaa_hat[i]=kapaa_hat[i-1]+d
Residualskp[i-1]=kapaa_hat[i]-Predkapaa_hat[i]
}
N_Sim=10000
N_period=100
Residuals_k = Residualskp
Residuals_y = ResidualRates
par(mfrow = c(1,2))

#normal simulation
x=20
AA_x=alpha_hat[Age_Data==x]
BB_x=beta_hat[Age_Data==x]
kp_sim=c()
y_sim=c()
kp_sim[1]=kapaa_hat[nYear]
y_sim[1]=Mortality_Rates[nYear, Age_Data==x]
for(i in 2:N_Sim){
kp_sim[i]=kp_sim[i-1]+d+rnorm(1,0,sd(Residuals_k))
y_sim[i]=AA_x+BB_x*kp_sim[i]+rnorm(1,0,sd(Residuals_y))
}

plot(Year_Data,Mortality_Rates[,Age_Data==x],type=’l’,xlim=c(1935,2035),ylim=c(-9,-3), main = “normal simulation”)

#Sample Simulation
x=10
AA_x=alpha_hat[Age_Data==x]
BB_x=beta_hat[Age_Data==x]
kp_sim=c()
y_sim=c()
kp_sim[1]=kapaa_hat[nYear]
y_sim[1]=Mortality_Rates[nYear, Age_Data==x]
for(i in 2:N_Sim){
kp_sim[i]=kp_sim[i-1]+d+sample(Residuals_k,1,replace=F)
y_sim[i]=AA_x+BB_x*kp_sim[i]+sample(Residuals_y,1,replace=F)
}
plot(Year_Data,Mortality_Rates[,Age_Data==x],type=’l’,xlim=c(1935,2035),ylim=c(-8,-5), main = “Sample Method”)

#Question 3
#Pensioners aged 65 and above
Deaths_data$Age=as.numeric(as.character(Deaths_data$Age))
Deaths_data.3 = Deaths_data
Deaths_data.3$Age[Deaths_data.3$Age < 65] = NA
Deaths_data.3=na.omit(Deaths_data.3)
Exposures_data$Age=as.numeric(as.character(Exposures_data$Age))
Exposures_data.3 = Exposures_data
Exposures_data.3$Age[Exposures_data.3$Age < 50] = NA
Exposures_data.3=na.omit(Exposures_data.3)

#Ages in >65 dataset
Age_Data1 = as.numeric(paste(unique(Exposures_data.3$Age)))
nAge1 = length(unique(Age_Data1))
#Years in >65 dataset
Year_Data1 = unique(Exposures_data.3$Year)
nYear1 = length(unique(Year_Data1))

#Mortality
Mortality_Rates1 = log(matrix(Deaths_data.3$Total/Exposures_data.3$Total, nYear1, nAge1,byrow=T))
Mortality_Rates1

#Using the LeeCarter Model
alpha_hat1 = colMeans(Mortality_Rates1)
Zn=matrix(0,nYear1, nAge1)
for(i in 1:nAge1){
Zn[,i] = Mortality_Rates1[,i]-alpha_hat1[i]
}
SVD_Z1 = svd(Zn,1,1)

kapaa_hat1 = SVD_Z1$u*sum(SVD_Z1$v)*SVD_Z1$d[1]
beta_hat1 = SVD_Z1$v/sum(SVD_Z1$v)
par(mfrow=c(1,3))
plot(Year_Data1, kapaa_hat1, type=’l’, main=”kappa1″)
plot(Age_Data1, alpha_hat1, type=’l’, main= “alpha1″)
plot(Age_Data1,beta_hat1, type=’l’, main=”beta1”)

#Question 4
Deaths_data_1$Year[Deaths_data_1$Year > 1990] = NA
Deaths_data.Train = na.omit(Deaths_data_1)
Exposures_data_1$Year[Exposures_data_1$Year > 1990] = NA
Exposures_data.Train = na.omit(Exposures_data_1)

#Ages in the train data
AgeTrain = as.numeric(paste(unique(Exposures_data.Train$Age)))
nAgeTrain = length(unique(AgeTrain))
nAgeTrain
#Years in the train data
YearTrain = unique(Exposures_data.Train$Year)
nYearTrain = length(unique(YearTrain))
nYearTrain

#Fitting ARMA model
Yt1 = Deaths_data.Train$Year
Ax1= Deaths_data.Train$Age
Dxt1 = as.integer(Deaths_data.Train$Total)
Ext1 = Exposures_data.Train$Total

Model2Train = glm(Dxt1~Ext1 + Ax1 + Yt1, offset=log(Ext1), poisson(link=log) )
summary(Model2Train)
Model2.values1 = log(exp(predict.glm(Model2Train))/Ext1)
Model2.valuesMatrix1 = matrix(Model2.values1, nYearTrain, nAgeTrain,byrow=T)

Deaths_data_2$Year[Deaths_data_1$Year < 1991] = NA
Deaths_data.Test = na.omit(Deaths_data_2)
Exposures_data_2$Year[Exposures_data_2$Year < 1991] = NA
Exposures_data.Test = na.omit(Exposures_data_2)

#Ages in Test data
AgeTest = as.numeric(paste(unique(Exposures_data.Test$Age)))
nAgeTest = length(unique(AgeTest))
nAgeTest
#Years in Test data
YearTest = unique(Exposures_data.Test$Year)
nYearTest = length(unique(YearTest))
nYearTest

Yt2 = Deaths_data.Test$Year
Ax2= Deaths_data.Test$Age
Dxt2= as.integer(Deaths_data.Test$Total)
Ext2 = Exposures_data.Test$Total
data_Test = matrix(c(Dxt2,Ext2,Ax2,Yt2), ncol = 4, byrow = T)
colnames(data_Test) = c(“Dxt2″,”Ext2″,”Ax2″,”Yt2”)
data_Test

Modelp = predict(Model2Train, data = data_Test)
Modelp = log(exp(Modelp)/Ext2)
par(mfrow=c(1,2))
Mortality_RatesTrain = log(matrix(Deaths_data.Test$Total/Exposures_data.Test$Total, nYearTest, nAgeTest,byrow=T))
plot(Mortality_RatesTrain[4,], type = “l”, main = “Test Data”)
Model2.valuesMatrix2 = matrix(Modelp, nYearTest, nAgeTest,byrow=T)
plot(Model2.valuesMatrix2[4,], type = “l”, main = “Modelled Data”)