#####################################################################################################
####            
##################################################################################################
library(mvtnorm)
library(Matrix)
library(foreach)
library(glmnet)
library(ISLR)
library(graphics)

NUM<-1000
T<-300                            #sample size, we consider 200,300,400
p1<- 0.3                          #u_V2 correlation coefficients, 0 for no endogeneity case, 0.3 for endogenous case
KR<-seq(0,1.1,by=0.05)            #the range of k in k-class estimator, in theory k can be any arbitrary number, in empirical study, the meaningful value of k is usually in [0,LIML]
##############################################################################################
##                           model setting                                       ##
##############################################################################################
set.seed(111)
options(digits = 4)
n<-51                        #n-1 number of endogenous variables, set to 50, 100 in the paper
M<-80                        #no of instruments 80, 150 in the paper
beta<-matrix(0,n-1,1) 
beta[1:5]<-runif(5,1,3)
idbeta<-rbinom(5,1,0.5)
for(i in 1:5){
        if(idbeta[i]>0.5)   beta[i]<-(-beta[i])
}
trueparameter<-beta  
####   # structural equation coefficients C1
C1<-matrix(0,M,1)                                      
C1[1:10]<-runif(10,0.5,1)
idC1<-rbinom(10,1,0.5)
for(i in 1:10){
        if(C1[i]>0.5)   C1[i]<-(-C1[i])
}
####    # reduced form equation coefficients C2
C2<-matrix(rep(0,M*(n-1)),nrow =M,ncol =(n-1) )       
idC2<-matrix(rbinom(10*(n-1),1,0.5),nrow =M,ncol =(n-1) )
for(i in 1:10){
        for(j in 1:(n-1)){
                C2[i,j]<-runif(1,0.5,1)
                if(idC2[i,j]>0.5)   C2[i,j]<-(-C2[i,j])
        }
} 
####                          
cor_V2_u<-matrix(0,n,n) 
for(i in 1:(n-1)) {
        for(j in 1:(n-1)){
                cor_V2_u[i,j]<-0.1^abs(j-i)
        }
} 
cor_V2_u[1:5,n]<-p1
cor_V2_u[n,1:5]<-p1
id<-sample(c(6:(n-1)),5,replace = F)
cor_V2_u[id,n]<-p1
cor_V2_u[n,id]<-p1
cor_V2_u[n,n]<-1
cor_x_x<-matrix(rep(2,M*M),nrow =M,ncol =M)      
for(i in 1:M){
        for(j in 1:M){
                cor_x_x[i,j]<-0.5^abs(j-i)
        }
}   
X<-rmvnorm( T,rep(1,M),cor_x_x)      #generate IVs 
V2_u<-rmvnorm(T,rep(0,n),cor_V2_u) 
u<-V2_u[, n]                          #random error u
V2<-V2_u[,-n]  
Y2<-X%*%C2+V2                                        
y1<-u+Y2%*%beta  

PX<-X%*%solve(t(X)%*%X)%*%t(X)
R<-(diag(T)-PX)%*%Y2
Qhat<-t(Y2)%*%Y2-t(R)%*%R
Ghat<-(t(R)%*%R)/T
beta2sls<-solve(t(Y2)%*%PX%*%Y2)%*%(t(Y2)%*%PX%*%y1)
qhat<-t(Y2)%*%(y1-Y2%*%beta2sls)/T
sig2hat<-t(y1-Y2%*%beta2sls)%*%(y1-Y2%*%beta2sls)/T
sig2hat

G1hat<-qhat%*%t(qhat)/sig2hat[1,1]   
G2hat<-Ghat-G1hat
a<-M-2*(n-1)-3-sum(diag(G2hat%*%Qhat))/sum(diag(G1hat%*%Qhat))
kbesthat<-1+a/T
kbesthat


####################################################################################
##                     k besthat estimation             ##
###################################################################################
W<-diag(T)-kbesthat*(diag(T)-PX)
W.eigen=eigen(W,symmetric = T)
A<-diag(W.eigen$values)
U<-W.eigen$vectors
cv.out<-cv.glmnet(t(U)%*%Y2,t(U)%*%y1,alpha=1,weights = diag(abs(A)),family="gaussian")
bestlam<-cv.out$lambda.min
parameterhat<-predict(cv.out,type="coefficients",s=bestlam) [-1]
MSEbesthat<-sum((abs(trueparameter-parameterhat))^2)
MADbesthat<-sum(abs(trueparameter-parameterhat))
TPbesthat<-0
for (z in 1 : (n-1)){
        ifelse(trueparameter[z]!=0&parameterhat[z]!=0,TPbesthat<-1+TPbesthat,TPbesthat)
}
TPbesthat<-TPbesthat/sum(trueparameter!=0)
VSbesthat<-0
for (z in 1 : (n-1)){
        ifelse( (trueparameter[z]==0&parameterhat[z]==0) | (trueparameter[z]!=0&parameterhat[z]!=0),
                VSbesthat<-1+VSbesthat,VSbesthat)
}
VSbesthat<-VSbesthat/(n-1)
##############################################################################################
##                                    LIML                                          ##
##############################################################################################
y1hat<-X%*%solve(t(X)%*%X)%*%t(X)%*%y1               #OLS 
Y2hat<-X%*%solve(t(X)%*%X)%*%t(X)%*%Y2

v1hat<-y1-y1hat                                                               
V2hat<-Y2-Y2hat            
e1hat<-y1
E2hat<-Y2
eE1<-cbind(t(e1hat)%*%e1hat,t(e1hat)%*%E2hat)
eE2<-cbind(t(E2hat)%*%e1hat,t(E2hat)%*%E2hat)
eE<-rbind(eE1,eE2)
vV1<-cbind(t(v1hat)%*%v1hat,t(v1hat)%*%V2hat)
vV2<-cbind(t(V2hat)%*%v1hat,t(V2hat)%*%V2hat)
vV<-rbind(vV1,vV2)
k.LIML<-min(eigen(eE%*%solve(vV))$value)                    #LIMI
k.LIML

W.LIML<-diag(T)-k.LIML*(diag(T)-PX)
W.eigen.LIML=eigen(W.LIML,symmetric = T)
A.LIML<-diag(W.eigen.LIML$values)
U.LIML<-W.eigen.LIML$vectors
cv.out.LIML<-cv.glmnet(t(U.LIML)%*%Y2,t(U.LIML)%*%y1,alpha=1,weights = diag(abs(A.LIML)),family="gaussian")
bestlam.LIML<-cv.out.LIML$lambda.min
parameterhat.LIML<-predict(cv.out.LIML,type="coefficients",s=bestlam.LIML) [-1]
MSE.LIML<-sum((abs(trueparameter-parameterhat.LIML))^2)
MAD.LIML<-sum(abs(trueparameter-parameterhat.LIML))
TP.LIML<-0
for (z in 1 : (n-1)){
        ifelse(trueparameter[z]!=0&parameterhat.LIML[z]!=0,TP.LIML<-1+TP.LIML,TP.LIML)
}
TP.LIML<-TP.LIML/sum(trueparameter!=0)
VS.LIML<-0
for (z in 1 : (n-1)){
        ifelse( (trueparameter[z]==0&parameterhat.LIML[z]==0) | (trueparameter[z]!=0&parameterhat.LIML[z]!=0),
                VS.LIML<-1+VS.LIML,VS.LIML)
}
VS.LIML<-VS.LIML/(n-1)

###############################################################################################
##                       LASSO——high dimensional k-class estimation                          ##
##############################################################################################
coef_LASSO<-function(x,y) {
        cv.out<-cv.glmnet(x,y,alpha=1) 
        bestlam<-cv.out$lambda.min
        coef<-predict(cv.out,type="coefficients",s=bestlam) [-1]
        coef
}
y1pred_LASSO<-function(x,y) {                                             
        cv.out<-cv.glmnet(x,y,alpha=1) 
        bestlam<-cv.out$lambda.min
        y1pred<-predict(cv.out,s=bestlam,newx=x)
        y1pred
}  
#####
kclasskrange<-function(krange,num){
        MAD<-matrix(0,num,length(krange))        
        MSE<-matrix(0,num,length(krange))
        TN<-matrix(0,num,length(krange))
        TP<-matrix(0,num,length(krange))
        VS<-matrix(0,num,length(krange))
        for(i in 1:num){
                for(j in 1:length(krange)){
                        W<-diag(T)-krange[j]*(diag(T)-PX)
                        W.eigen=eigen(W,symmetric = T)
                        A<-diag(W.eigen$values)
                        U<-W.eigen$vectors
                        cv.out<-cv.glmnet(t(U)%*%Y2,t(U)%*%y1,alpha=1,weights = diag(abs(A)),family="gaussian")
                        bestlam<-cv.out$lambda.min
                        parameterhat<-predict(cv.out,type="coefficients",s=bestlam) [-1]
                        
                        MAD[i,j]<-sum(abs(trueparameter-parameterhat))
                        
                        MSE[i,j]<-sum((abs(trueparameter-parameterhat))^2)
                        
                        for (z in 1 : (n-1)){
                                ifelse(trueparameter[z]==0&parameterhat[z]==0,TN[i,j]<-1+TN[i,j],TN[i,j])
                        }
                        TN[i,j]<-TN[i,j]/sum(trueparameter==0)
                        
                        for (z in 1 : (n-1)){
                                ifelse(trueparameter[z]!=0&parameterhat[z]!=0,TP[i,j]<-1+TP[i,j],TP[i,j])
                        }
                        TP[i,j]<-TP[i,j]/sum(trueparameter!=0)
                        
                        for (z in 1 : (n-1)){
                                ifelse( (trueparameter[z]==0&parameterhat[z]==0) | (trueparameter[z]!=0&parameterhat[z]!=0),
                                        VS[i,j]<-1+VS[i,j],VS[i,j] )
                        }
                        VS[i,j]<-VS[i,j]/(n-1)
                        
                }
        }
        return(list(MAD,MSE,TN,TP,VS))
}

##############################################################################################
##                                report the statistics in the tables of the paper          ##
##############################################################################################

kclasskrange_sum<-kclasskrange(krange=KR,num=NUM)
MAD<-kclasskrange_sum[[1]]
MSE<-kclasskrange_sum[[2]]
TN <-kclasskrange_sum[[3]]
TP <-kclasskrange_sum[[4]]
VS <-kclasskrange_sum[[5]]
boxplot(MAD,names=KR,xlab = "the value of k",ylab = "MAD")
boxplot(MSE,names=KR,xlab = "the value of k",ylab = "MSE")

MAD_median<-apply(MAD, 2, median)
MSE_mean<-apply(MSE, 2, mean)
TN_median <-apply(TN,  2, median)
TP_median <-apply(TP,  2, median)
VS_median <-apply(VS,  2, median)

par(lty=1,pch=19)
plot(KR,MAD_median,xlab = "the value of k",ylab = "MAD",pch=20)
curve(splinefun(KR,MAD_median, method="mono")(x), add=TRUE, col=1, n=1001)
min_MAD_median= min(MAD_median)
min_MAD_k <- KR[which.min(MAD_median)]
points(x=kbesthat, y=MADbesthat,col="blue",pch=17)
legend("top",inset = .05,legend = c("the estimator of the optimal k "),
       lty=c(2),pch = c(17),col=c("blue"))

plot(KR, MSE_mean,xlab = "the value of k",ylab = "MSE",pch=20)
min_MSE_mean= min(MSE_mean)
min_MSE_k <- KR[which.min(MSE_mean)]
curve(splinefun(KR,MSE_mean, method="mono")(x), add=TRUE, col=1, n=101)
points(x=min_MSE_k, y=min_MSE_mean,col="red",pch=15)
points(x=kbesthat, y=MSEbesthat,col="blue",pch=17)
legend("top",inset = .05,legend = c("the optimal k ","the estimator of the optimal k"),
       lty=c(1,2),pch = c(15,17),col=c("red","blue"))

plot(KR, TP_median,xlab = "the value of k",ylab = "TP",ylim=c(0,1),pch=20)
max_TP_median= max(TP_median)
max_TP_k <- KR[which.max(TP_median)]
curve(splinefun(KR,TP_median, method="mono")(x), add=TRUE, col=1, n=101)


plot(KR, VS_median,xlab = "the value of k",ylab = "VS",ylim=c(0,1),pch=20)
max_VS_median= max(VS_median)
max_VS_k <- KR[which.max(VS_median)]
curve(splinefun(KR,VS_median, method="mono")(x), add=TRUE, col=1, n=101)

##############################################################################################
##                    show the OLS 2SLS LIML and adaptive k-class estimation statistics    ##
##############################################################################################
which(KR==0)        ##OLS
MAD_median[1]
MSE_mean[1]
TP_median[1]
VS_median[1]

which(KR==1)       ##2SLS
MAD_median[21]
MSE_mean[21]
TP_median[21]
VS_median[21]


k.LIML                  ##LIMI
MAD.LIML
MSE.LIML
TP.LIML
VS.LIML

which.min(MSE_mean)   ## optimal k
MAD_median[which.min(MSE_mean) ]
MSE_mean[which.min(MSE_mean) ]
TP_median[which.min(MSE_mean) ]
VS_median[which.min(MSE_mean) ]

kbesthat               ##estimated k value
MADbesthat
MSEbesthat
TPbesthat
VSbesthat