############################################################################## # # S - Plus code for linear regression # This code was written by Karel Kupka, TriloByte.cz # without use of pre-defined s-plus functions. # # This CODE can be run in S-PLUS Statistical Package (www.insightful.com) # or alternatively in free R programing environment # # # All routines and diagnostic functions are also implemented in # commercial software QCExpert(R) (www.trilobyte.cz) # # Contact to author: kupka@trilobyte.cz # # # # # # ############################################################# ## D A T A S E C T I O N ## (choose one of the 4 data sets) ############################################################# ###### DATA SET pK PALLAS ######## xx_as.matrix(c(5.45,10.23,9.25,8.28,7.48,2.86,8.31,8.35,3.29,9.47,6.05,9,0.33,7.3,2.5,4.34,3.44,1.8,2.6,6.65,2.1,2.68,4.06,2.63,3.74)) dimnames(xx)_list(NULL,"pKexp") y_c(6.26,11.23,9.77,7.84,7.48,2.89,4.93,8.72,3.28,9.24,7.07,8.32,0.38,6.82,2.86,4.36,3.34,1.82,2.3,4.45,1.99,2.67,2.93,3.13,3.98) yname_"pK(Pallas)" ###### END OF DATA SET pK PALLAS ######## ###### DATA SET _ 01 ######## ### DATA - Random ### n_60 # No of Rows m_10 # No of Indep. Variables. #Independent Variables (random, uniform distribution (0,1): xx_matrix(runif(n*m),ncol=m) dimnames(xx)_list(NULL,paste("SL",1:m,sep="")) # parameters: atest_round(runif(m)*100-50)/10 atestm_as.matrix(atest[1:m]) # Generate Dependent variable with normal errors: y_ xx %*% atestm + rnorm(n) y_as.vector(y) ###### End of Random Data Generation ########## ###### DATA SET _ 02 ######## ### DATA Multivariate - Testing (Alternative to Random Data) ### #Independent variables: x1_c(1,5,3,7,11,5,3,9,12,16) x2_c(4,6,3,6,8,4,9,4,3,0) x3_c(46,63,21,84,32,26,65,33,46,52) xx_cbind(x1,x2,x3); dimnames(xx)_list(NULL,c("SL1","SL2","SL3")) #xx_as.matrix(x1);dimnames(xx)_list(NULL,"SL1") #Dependent Variable: y_c( 3.3, 1, 12.3, 19.2, 6.8, 4.7, 0.8, 14.4, 35.4, 68.4 ) nazevY_"Sloupec Y" ###### DATA - Testing Multivariate - END ########## ###### DATA SET _ 03 ######## ###### DATA for Polynomial ########## xx_as.matrix(c(1,2,3,4,5,6,7)) dimnames(xx)_list(NULL,"SL1") y_c(1, 3, 5, 9,14,28,33) nazevY_"Sloupec Y" ###### End of Data for Polynomial ########## ###################################################### ## END OF DATA SECTION ###################################################### ####### General parameters for regression ############ alfa_0.05 # 0=1 omez_0.00001 # 0<=omez<=1 absolut_T # T F polynom_F # T F stupenpol_2 # stupenpol>=2, polynomial degree user_F # T F metoda_"NC" # "NC" "L1" "MINIMAX" "FAIR" "EXPE" "EXPE2" "BIR" "QUAN" "STEP+" "STEP-" "STEPALL" vahy_"1" # "1" "Y" "1/Y" "Nazev_Sloupce" # WEIGHTS transfy_F kvazilin_F predikce_F if (polynom | user) notransf_F # else notransf_T # ####### End of Parameter Definition ############ ##################################################### # # Data Precalculations # ##################################################### n_length(y) # Number of rows m_dim(xx)[2] #Number of Columns of X data_xx pocsloup_m w_rep(0,n) # deklarace w[n] if (vahy=="1") # Zadne w_rep(1,n) if (vahy=="Y") { for (i in 1:n) if (y[i]<=0) stop("Při váhy = Y musí být Y>0!") w_y/sum(y) } if (vahy=="1/Y") { for (i in 1:n) if (y[i]<=0) stop("Při váhy = 1/Y musí být Y>0!") w_(1/y) / sum(1/y) } if (kvazilin) { for (i in 1:n) w[i]_1/ transformyD ( y[i] ) w_w / sum(w) } if (polynom & (pocsloup==1)) { dname_dimnames(xx)[[2]] dn_dname for (i in 2:stupenpol) { xx_cbind(xx,data^i) dname_c(dname,paste(dn,"^",i,sep="")) } m_m+stupenpol-1 dimnames(xx)_list(NULL,dname) } if (user) { umod_usermodel(xx) xx_umod$X m_umod$M pocsloup_umod$pocsl } if (absolut) { x0_rep(1,n) xx_cbind(x0,xx) m_m+1 dimnames(xx)[[2]][1]_"(Abs)" } yname_"Y" if (transfy) { y_TRANY(y)$YT yname_TRANY(y)$Name } tkvant_qt(1-alfa/2,n-m) fkvant_qf(1-alfa ,m-1,n-m) ##################################################### # # End Of Data Precalculations # ##################################################### ####################################################### # REGRESSION SECTION # (choose one of the NC, IRWLS, BIR procedures) ####################################################### ############ LEAST SQUARES "NC" ########### # # REQUIRED INPUT: # xx, y, w, n, m # OUTPUT: # cc, a, pred, res, rsc, prumch, sres, sigmaa, hatdiag, eigval, eigvec, ccstd # ############ ############################################ ## NC - LEAST SQUARES FUNCTION DEFINITION: ############################################ NC_function(xx, y, w, n, m) { w_w/sum(w)*n w2_(w*w) #---------------------------- wx_xx wy_y for (i in 1:m) wx[,i]_xx[,i]*w2 wy_y*w2 xtwx_t(xx)%*%wx #---------------------------------- #cc_solve(xtwx) va_eigen(xtwx)$values ve_eigen(xtwx)$vectors if (min(va)<=0) stop("Singular Characteristic Matrix XTX!") cc_matrix(rep(0,m*m),ncol=m) # cc[m,m]=0 for (i in 1:m) cc_cc+(1/va[i])* ve[,i] %*% t(ve[,i]) #---------------------------------- xtwy_t(xx)%*%wy a_ cc %*% xtwy #---------------------------- pred_xx%*%a res_y-pred rsc_sum(res*res) prumch_mean(abs(res)) sres_sqrt(rsc/(n-m)) ccdiag_diag(cc) sigmaa_sres*sqrt(ccdiag) hatdiag_y xw1_xx w1_sqrt(w2) for (i in 1:m) xw1[,i]_xx[,i]*w1 xxtx_xw1 %*% cc for (i in 1:n) hatdiag[i]_ sum ( xxtx[i,]*xw1[i,] ) eigval_va # vektor 1 x m, viz Optimalizace eigvec_ve # matice m x m ??? #ccstd_t(xx) %*% xx ccstd_cc #solve(ccstd) list(cc=cc, a=a, pred=pred, res=res, rsc=rsc, prumch=prumch, sres=sres, sigmaa=sigmaa, hatdiag=hatdiag, eigval=eigval, eigvec=eigvec, ccstd=ccstd) } ####################################################### # END OF FUNCTION DEFINITION ####################################################### ## FUNCTION CALL: ########################################### if (metoda=="NC") { temp_NC(xx, y, w, n, m) cc_temp$cc; a_temp$a; pred_temp$pred; res_temp$res; rsc_temp$rsc; prumch_temp$prumch; sres_temp$sres; sigmaa_temp$sigmaa hatdiag_temp$hatdiag; eigval_temp$eigval eigvec_temp$eigvec; ccstd_temp$ccstd } ######### LEAST SQUARES - END ################ ############ Least Squares with Classical Inversion "NC1" ########### # Parametry: # vstup: # xx, y, w, n, m # výstup: cc, a, pred, res, rsc, prumch, sres, sigmaa, hatdiag, eigval, eigvec, ccstd # ############ NC1_function(xx, y, w, n, m) { w_w/sum(w)*n w2_(w*w) #---------------------------- wx_xx wy_y for (i in 1:m) wx[,i]_xx[,i]*w2 wy_y*w2 xtwx_t(xx)%*%wx #---------------------------------- cc_solve(xtwx) #---------------------------------- xtwy_t(xx)%*%wy a_ cc %*% xtwy #---------------------------- pred_xx%*%a res_y-pred rsc_sum(res*res) prumch_mean(abs(res)) sres_sqrt(rsc/(n-m)) ccdiag_diag(cc) sigmaa_sres*sqrt(ccdiag) hatdiag_y xw1_xx w1_sqrt(w2) for (i in 1:m) xw1[,i]_xx[,i]*w1 xxtx_xw1 %*% cc for (i in 1:n) hatdiag[i]_ sum ( xxtx[i,]*xw1[i,] ) eigval_va # vektor 1 x m, viz Optimalizace eigvec_ve # matice m x m ??? #ccstd_t(xx) %*% xx ccstd_cc #solve(ccstd) list(cc=cc, a=a, pred=pred, res=res, rsc=rsc, prumch=prumch, sres=sres, sigmaa=sigmaa, hatdiag=hatdiag, eigval=eigval, eigvec=eigvec, ccstd=ccstd) } ######### END of Least Squares Regression ################ ##### Robust Regression IRWLS ( exp(-E) "EXPE" ) ########### if ((metoda=="EXPE") | (metoda=="EXPE2")) { if (metoda=="EXPE") expo_1 if (metoda=="EXPE2") expo_2 sres_1 delta_1 delta0_0 # w_rnorm(n)^2 w_w/sum(w)*n while (abs(delta)>0.00001*sres) { delta0_delta w2_w*w #---------------------------- wx_xx wy_y for (i in 1:m) wx[,i]_xx[,i]*w2 wy_y*w2 xtwx_t(xx)%*%wx #---------------------------------- #cc_solve(xtwx) va_eigen(xtwx)$values ve_eigen(xtwx)$vectors if (min(va)<=0) stop("Singular Characteristic Matrix XTX!") cc_matrix(rep(0,m*m),ncol=m) # cc[m,m]=0 for (i in 1:m) cc_cc+(1/va[i])* ve[,i] %*% t(ve[,i]) #---------------------------------- xtwy_t(xx)%*%wy a_ cc %*% xtwy #---------------------------- pred_xx%*%a res_y-pred prumch_mean(res) rsc_sum(res*res*w2) sres_sqrt(rsc/(n-m)) resn_res/sres rr_as.vector(exp(-(abs(resn^expo)))) rr_rr/sum(rr)*n delta_sqrt ( sum ((w-rr)^2) ) w _rr cat( delta," ... ") } ccdiag_diag(cc) sigmaa_sres*sqrt(ccdiag) hatdiag_y xxtx_xx %*% cc for (i in 1:n) hatdiag[i]_ sum ( xxtx[i,]*xx[i,] ) eigval_eigen(xtwx)$values eigvec_eigen(xtwx)$vectors # ccstd_t(xx) %*% xx ccstd_cc #solve(ccstd) } ########### EXP_E KONEC ############################### ##### BIR ########### Bounded influence regression if (metoda=="BIR") { xtx_t(xx)%*%xx #---------------------------------- #cc0_solve(xtx) va_eigen(xtx)$values ve_eigen(xtx)$vectors if (min(va)<=0) stop("Singular Characteristic Matrix XTX!") cc0_matrix(rep(0,m*m),ncol=m) # cc[m,m]=0 for (i in 1:m) cc0_cc0+(1/va[i])* ve[,i] %*% t(ve[,i]) #---------------------------------- hatw_y xxtx_xx %*% cc0 for (i in 1:n) hatw[i]_ sum ( xxtx[i,]*xx[i,] ) shft_0.05 hatw_shft/(hatw*hatw+shft) hatw_hatw/sum(hatw) sres_1 delta_1 delta0_0 # w_rnorm(n)^2 w_w/sum(w)*n while (abs(delta)>0.00001*sres) { delta0_delta w2_w*w*hatw*hatw #---------------------------- wx_xx wy_y for (i in 1:m) wx[,i]_xx[,i]*w2 wy_y*w2 xtwx_t(xx)%*%wx #---------------------------------- #cc_solve(xtwx) va_eigen(xtwx)$values ve_eigen(xtwx)$vectors if (min(va)<=0) stop("Singularni char. matice!") cc_matrix(rep(0,m*m),ncol=m) # cc[m,m]=0 for (i in 1:m) cc_cc+(1/va[i])* ve[,i] %*% t(ve[,i]) #---------------------------------- xtwy_t(xx)%*%wy a_ cc %*% xtwy #---------------------------- pred_xx%*%a res_y-pred prumch_mean(res) rsc_sum(res*res) sres_sqrt(rsc/(n-m)) resn_res/sres rr_as.vector(exp(-(abs(resn^2)))) rr_rr/sum(rr)*n delta_sqrt ( sum ((w-rr)^2) ) w _rr cat( delta," ... ") } w_w*hatw w_w/sum(w)*n w2_w*w for (i in 1:m) wx[,i]_xx[,i]*w2 wy_y*w2 xtwx_t(xx)%*%wx va_eigen(xtwx)$values ve_eigen(xtwx)$vectors if (min(va)<=0) stop("Singularni char. matice!") cc_matrix(rep(0,m*m),ncol=m) # cc[m,m]=0 for (i in 1:m) cc_cc+(1/va[i])* ve[,i] %*% t(ve[,i]) ccstd_cc rsc_sum(res*res*w2) sres_sqrt(rsc/(n-m)) resn_res/sres ccdiag_diag(cc) sigmaa_sres*sqrt(ccdiag) hatdiag_y xxtx_xx %*% cc for (i in 1:n) hatdiag[i]_ sum ( xxtx[i,]*xx[i,] ) eigval_eigen(xtwx)$values eigvec_eigen(xtwx)$vectors } ########### BIR END ############################### ########################################### # END OF REGRESSION SECTION ########################################### ###################################################################### ################ G R A P H I C A L O U T P U T S ############### ###################################################################### ## Use this to divide the drawing area: ## par(mfrow=c(3,4)) ## Use this to restore the drawing area: ## par(mfrow=c(1,1)) ######### Regression curve ########### Only for ONE independent variable! ## !!! Only for pocsloup = 1 !!!!## if (pocsloup==1) { jemnost_200 xgrafmin_ min(data) # ZOOM X xgrafmax_ max(data) # ZOOM X xrange_xgrafmax-xgrafmin xgraf_seq(from=xgrafmin,to=xgrafmax,length=jemnost) ygraf_rep(0,jemnost) xpred_as.matrix(xgraf) if (polynom) { for (i in 2:stupenpol) xpred_cbind(xpred,xgraf^i) } if (user) { umod_usermodel(xpred) xpred_umod$X } x0_rep(1,jemnost) if (absolut) xpred_cbind(x0,xpred) for (i in 1:jemnost) ygraf[i]_xpred[i,]%*%a ygrafmin_min(c(ygraf,y)) ygrafmax_max(c(ygraf,y)) yconfL_ygraf yconfU_ygraf ## Continuous weights: #! w1_w tune_diff(range(data)) wpred_xgraf fkvantM_qf(1-alfa ,m,n-m) for (i in 1:jemnost) { kk_sqrt(m*fkvantM)*sres wpred[i]_0 for (j in 1:n) { wpred[i]_wpred[i]+1-exp(-(tune/100)*abs(xgraf[i]-data[j]))*(1-w1[j]) } wpred[i]_wpred[i]/n kk1_sqrt(abs(t(xpred[i,]*wpred[i])%*%ccstd%*%xpred[i,]*wpred[i])) yconfL[i]_ygraf[i]-kk*kk1 yconfU[i]_ygraf[i]+kk*kk1 } ## Continuous weights END ygrafmin_min(c(ygrafmin,yconfL)) ygrafmax_max(c(ygrafmax,yconfU)) popisx_dimnames(xx)[[2]][2] popisy_paste("Y=",yname,sep="") popis_"Regression Curve" plot(data,y,xlim=c(xgrafmin,xgrafmax),ylim=c(ygrafmin,ygrafmax),xlab=popisx,ylab=popisy,main=popis) lines(xgraf,ygraf) lines(xgraf,yconfL,lty=4,col=6) lines(xgraf,yconfU,lty=4,col=6) } ######### Regression curve END ########### ############################################# # Residuals ############################################# ############ Y-Prediction ################## popisx_"Y - predicted" popisy_"Y - measured" popis_"Y - predicted" plot(pred,y,xlab=popisx,ylab=popisy,main=popis) abline(0,1) ############ Y-Prediction END ################## ############ Residuals vs. prediction ################## popisx_"Prediction" popisy_"E" popis_"Residuals - Prediction" plot(pred,res,xlab=popisx,ylab=popisy,main=popis) abline(h=0) ############ Residuals vs. prediction - END ################## ############ Abs. residuals ################## popisx_"Index" popisy_"abs(E)" popis_"Abs. residuals" plot(1:n,abs(res),xlab=popisx,ylab=popisy,main=popis) abline(h=prumch) ############ Abs. residuals - END ################## ############ Squared Residuals ################## popisx_"Index" popisy_"E^2" popis_"Squared Residuals" plot(1:n,res^2,xlab=popisx,ylab=popisy,main=popis) abline(h=mean(res^2)) ############ Squared Residuals - END ################## ############ Q-Q plot of residuals ################## popisx_"Q-Normal" popisy_"Q-residuals" popis_"Q-Q plot of residuals" qi_(1:n)/(n+1) qres_qnorm(qi) sortres_sort(res) plot(qres,sortres,xlab=popisx,ylab=popisy,main=popis) abline(0,sres) # přímka y = sres * x ############ Q-Q plot of residuals END ################## ############ Autocorrelation ################## popisx_"E(i)" popisy_"E(i-1)" popis_"Autocorrelation of residuals" x00_res[2:n] x01_res[1:(n-1)] plot(x01,x00,xlab=popisx,ylab=popisy,main=popis) ############ Autocorrelation - END ################## ############ Heteroscedasticity ################## popisx_"X" popisy_"Y" popis_"Heteroscedasticity" for ( i in 1:n ) { x00[i]_(1-hatdiag[i])*pred[i] x01[i]_res[i]/sqrt(1-hatdiag[i])/sres } plot(x00,x01,xlab=popisx,ylab=popisy,main=popis) ############ Heteroscedasticity END ################## ############ Jack-Knife residuals ################## popisx_"index" popisy_"Ej" popis_"Jack-Knife residuals" x00_1:n for ( i in 1:n ) { esi_res[i]/sqrt(1-hatdiag[i])/sres x01[i]_esi*sqrt((n-m-1)/(n-m-esi*esi)) } plot(x00,x01,xlab=popisx,ylab=popisy,main=popis) abline(h=0) ############ Jack-Knife residuals END ################## ############ Predicted Residuals ################## popisx_"index" popisy_"Epred" popis_"Predicted Residuals" x00_1:n for ( i in 1:n ) { epi_res[i]/(1-hatdiag[i]) x01[i]_epi } plot(x00,x01,xlab=popisx,ylab=popisy,main=popis) abline(h=0) ############ Predicted Residuals END ################## ######################################################### # Partial regression and residual plots ######################################################### ############## Partial regression plot ############### ## Only for m>1 !! ### zz_xx if (absolut) prvni_2 else prvni_1 for (cis in prvni:m) { popisx_paste("Component",cis) popisy_"Py" popis_paste("Partial regression plot Y' - X'",cis,sep="") xxj_xx[,-cis] # Leave-out No-th col of xx xtxj1_solve( t(xxj) %*% xxj ) xxtxj_xxj %*% xtxj1 vv_rep(0,n) # vektor n x 1 uu_rep(0,n) # vektor n x 1 for (i in 1:n) { vv[i]_0 uu[i]_0 for (j in 1:n) { pij_0 if (i == j) kron_1 else kron_0 pij_kron - sum(xxtxj[i,]*xxj[j,]) vv[i]_vv[i] + pij * xx[j,cis] uu[i]_uu[i] + pij * y[j] } zz[i,cis]_a[cis] * uu[i] + res[i] } plot( vv, uu, xlab=popisx,ylab=popisy ,main=popis) abline( 0,a[cis] ) } ######### Partial regression plot - END ######## ######### Partial Residual plot ######## ## Only for m>1 !! ### if (absolut) prvni_2 else prvni_1 for (cis in prvni:m) { popisx_paste("X",cis) popisy_"Res parc." popis_"Partial Residual plot" ccparc_( xx[,cis]-mean(xx[,cis]) )*a[cis] ssparc_ccparc+res ccabs_-a[cis]*mean(xx[,cis]) plot( xx[,cis],ssparc, xlab=popisx,ylab=popisy ,main=popis) abline( ccabs,a[cis] ) } ######### Partial Residual plot END ######## ################################################### # Influential Points Diagnostics plots ################################################### ############ Projection Matrix Plot ################## popisx_"index" popisy_"Hat-diagonal" popis_"Projection Matrix H" x00_1:n ymax_max(c(hatdiag,2*m/n)) plot(x00,hatdiag,xlab=popisx,ylab=popisy,ylim=c(0,ymax),main=popis) lines(x00,hatdiag,col=4) points(x00,hatdiag) abline(h=2*m/n,col=8,lty=4) ############ Projection Matrix END ################## ############ Prediction of Residuals ################## popisx_"E" popisy_"E pred" popis_"Prediction of Residuals" x01_rep(0,n) for ( i in 1:n ) { epi_res[i]/(1-hatdiag[i]) x01[i]_epi } plot(x01,res,xlab=popisx,ylab=popisy,main=popis) abline(0,1) ############ Prediction of Residuals END ################## ############ Pregibon Plot ################## popisx_"Hat-diagonal" popisy_"E2 norm" popis_"Pregibon Plot" x00_hatdiag x01_res*res/rsc ymax_max(x01,3*(m+1)/n) plot(x00,x01,xlab=popisx,ylab=popisy,ylim=c(0,ymax),main=popis) abline(2*(m+1)/n,-1) abline(3*(m+1)/n,-1,col=8) ############ Pregibon Plot - END ################## ############ Williams Plot ################## tq095_qt(0.95,n-m-1) popisx_"Hat-diagonal" popisy_"E jack" popis_"Williams Plot" x00_hatdiag x01_rep(0,n) for ( i in 1:n ) { esi_res[i]/sqrt(1-hatdiag[i])/sres x01[i]_abs(esi*sqrt((n-m-1)/(n-m-esi*esi))) } ymin_min(x01) ymax_max(x01,tq095) xmax_max(x00,2*m/n) plot(x00,x01,xlab=popisx,ylab=popisy,xlim=c(0,xmax),ylim=c(ymin,ymax),main=popis) abline(h=tq095,col=8) abline(v=2*m/n,col=8) ############ Williams Plot - END ################## ############ McCulloh-Meter plot ################## popisx_"LN(Hat-diagonal)n" popisy_"E std" popis_"McCulloh-Meter plot" x00_rep(0,n) x01_rep(0,n) for ( i in 1:n ) { x00[i]_log(hatdiag[i]/(m*(1-hatdiag[i]))) x01[i]_res[i]/sqrt(1-hatdiag[i])/sres x01[i]_log(x01[i]*x01[i]) } tq095_qt(0.95,n-m-1) fq090_qf(0.9,n-m,m) aa0_2/(n-2*m) if( aa0 <= 0 ) aa0_2 bb0_log( (n - m) * tq095^2 * ( tq095^2 + n - m ) ) xmin_min(x00) xmax_max(x00,log(aa0)) ymin_min(x01) ymax_max(x01,bb0) plot(x00,x01,xlab=popisx,ylab=popisy,xlim=c(xmin,xmax),ylim=c(ymin,ymax),main=popis) abline(h=bb0) abline(v=log(aa0)) abline(-log(fq090),-1,col=8) ############ McCulloh-Meter plot END ################## ############ L-R plot ################## popisx_"Hat-diagonal" popisy_"E2 norm" popis_"L-R plot" x00_hatdiag x01_res*res/rsc plot(x00,x01,xlim=c(0,1),ylim=c(0,1),xlab=popisx,ylab=popisy,main=popis) abline(1,-1,lty=4) abline(h=0) abline(v=0) njem_50 xlr_(0:njem)/njem for (cc0 in c(2,4,8)) { kk0_n*(n-m-1)/(cc0*cc0*m) ylr_(2*xlr-xlr*xlr-1)/(xlr*(1-kk0)-1) lines(xlr,ylr,col=cc0) } ############ L-R plot END ################## #---- Pre-calculations for Cook, Atkinson a Likelihood plots ---- resstd_res/(sres*sqrt(1-hatdiag)) resjack_rep(0,n) # deklarace for ( i in 1:n ) { esi_res[i]/sqrt(1-hatdiag[i])/sres resjack[i]_esi*sqrt((n-m-1)/(n-m-esi*esi)) } for ( i in 1:n ) { respred_res/(1-hatdiag) } res2norm_res*res/rsc atkind_abs(resjack)*sqrt((n-m)/m * hatdiag/(1-hatdiag)) # 6.110 cookd_abs((resstd/m)) * hatdiag/(1-hatdiag) # 6.108 ldb_rep(0,n) # deklarace lds_rep(0,n) # deklarace ldbs_rep(0,n) # deklarace for (i in 1:n) { di_(resstd[i]^2)/(n-m) hi_hatdiag[i] ldb[i]_ n * log( (di*hi)/(1-hi)+1) lds[i]_ n * log( n/(n-1) )+ n * log( 1-di) + di*(n-1)/(1-di) - 1 ldbs[i]_n * log( n/(n-1) )+ n * log( 1-di) + di*(n-1)/ ( (1-di)*(1-hi) ) - 1 } atkinkrit_2*sqrt( (m*(n-m))/(m*n)) cookkrit_1 ldkrit_qchisq(1-alfa,m+1) # stejné pro ldb, lds, ldbs #------------------------------------------------- ############ Cook Distance Plot ################## popisx_"Index" popisy_"Cook" popis_"Cook Distance Plot" x00_1:n ymin_0 ymax_max(cookd,cookkrit) plot(x00,cookd,ylim=c(ymin,ymax),xlab=popisx,ylab=popisy,main=popis) abline(h=cookkrit,col=8,lty=4) ############ Cook Distance Plot END ################## ############ Atkinson Distance Plot ################## popisx_"Index" popisy_"Atkins" popis_"Atkinson Distance Plot" x00_1:n ymin_0 ymax_max(atkind,atkinkrit) plot(x00,atkind,ylim=c(ymin,ymax),xlab=popisx,ylab=popisy,main=popis) abline(h=atkinkrit,col=8,lty=4) ############ Atkinson Distance Plot END ################## ############ Likelihood Distance Plots ################## popisx_"Index" popisy_"LD" popis_"Likelihood Distance Plots" x00_1:n ymin_0 ymax_max(c(ldb,lds,ldbs,ldkrit)) plot(x00,ldb,ylim=c(ymin,ymax),type="l",xlab=popisx,ylab=popisy,main=popis,col=1) lines(x00,ldb,col=3) # magenta points(x00,ldb,col=3) # magenta lines(x00,lds,col=4) # green points(x00,lds,col=4) # green lines(x00,ldbs,col=6) # blue points(x00,ldbs,col=6) # blue abline(h=ldkrit,col=8,lty=4) # red # # Plot Legend: # col 3 ... LD(b) # magenta # col 4 ... LD(s) # green # col 6 ... LD(b,s) # blue # ########## Likelihood Distance Plots END ############### ################################################### # RANKIT PLOTS ################################################### ############ Q-Q for Normalized Residuals ################## popisx_"Q-Normal" popisy_"Q-Residuals" popis_"Q-Q for Normalized Residuals" qi_(1:n)/(n+1) qres_qnorm(qi) sortres_sort(res/sres) plot(qres,sortres,xlab=popisx,ylab=popisy,main=popis) abline(0,1) # line y = x ############ Q-Q for Normalized Residuals END ################## ############ Q-Q Predicted Residuals ################## popisx_"Q-Normal" popisy_"Q-Residuals" popis_"Q-Q Predicted Residuals" qi_(1:n)/(n+1) qres_qnorm(qi) for ( i in 1:n ) { epi_res[i]/(1-hatdiag[i]) x01[i]_epi } sortres_sort(x01/sres) ss_sqrt(var(sortres)) plot(qres,sortres,xlab=popisx,ylab=popisy,main=popis) abline(0,ss) # line y = ss*x ############ Q-Q Predicted Residuals END ################## ############ Q-Q Jack-Knife Residuals ################## popisx_"Q-Teor" popisy_"Q-Residual" popis_"Q-Q, Jackknife" qi_(1:n)/(n+1) qres_qnorm(qi) for ( i in 1:n ) { esi_res[i]/sqrt(1-hatdiag[i])/sres x01[i]_esi*sqrt((n-m-1)/(n-m-esi*esi)) } ss_sqrt(var(x01)) sortres_sort(x01) plot(qres,sortres,xlab=popisx,ylab=popisy,main=popis,main=popis) abline(0,ss) # line y = ss*x ############ Q-Q Jack-Knife Residuals END ################## ################################################################### ######## E N D O F G R A P H I C A L O U T P U T ################################################################### ###################################################################### ############### TEXT OUTPUT / RESULT TABLES ######################## ###################################################################### # # Used Global Variables # n,m,metoda, alfa, absolut pocsloup, xx, y, # # # # # tkvant_qt(1-alfa/2,n-m) # fkvant_qf(1-alfa,m-1,n-m) # jmenasl_dimnames(xx)[[2]] # Column Names { cat("Linear Regression Results","\n") cat("\n") smetoda_"" if (metoda=="NC") smetoda_"Classical Least Squares" if (metoda=="HOD") smetoda_paste("Corrected Rank", "treshold=",omez) if (metoda=="L1") smetoda_"L1, Least Absolute Error Method" if (metoda=="LP") smetoda_paste("Lp-regression, p=",LP) if (metoda=="MINIMAX") smetoda_"Minimal Maximal Error Method" if (metoda=="FAIR") smetoda_"Robust M-estimate Fair" if (metoda=="EXPE") smetoda_"Robust M-estimate W=EXP(-e)" if (metoda=="EXPE2") smetoda_"Robust M-estimate Welsch" if (metoda=="BIR") smetoda_"Resistent estimate Bounded Influence Regression (BIR)" if (metoda=="KVANT") smetoda_paste("Quantile Regression, q=",QUAN) if (metoda=="STEP+") smetoda_"Stepwise Forward" if (metoda=="STEP-") smetoda_"Stepwise Backward" if (metoda=="STEPALL") smetoda_"Stepwise-All Subsets Regression" cat("Significance Level","\t",alfa,"\n") cat("Quantile t(1-alfa/2,n-m)","\t",tkvant,"\n") cat("Quantile F(1-alfa,m,n-m)","\t",fkvant,"\n") if (absolut) cat("Absolute Term:","\t","YES","\n") else cat("Absolute Term:","\t","NO","\n") cat("No of Rows","\t",n,"\n") cat("No of Parameters","\t",m,"\n") cat("Method","\t",smetoda,"\n") cat("Columns","\n") for (i in 1:pocsloup) cat("\t","NazevSloupce",i,"\n") stransf_"" if (notransf) stransf_"No transormation" if (polynom) stransf_paste("Polynomial of ",stupenpol," degree",sep="") if (user) stransf_"User Transform" cat("Transformation","\t",stransf,"\n") jmenasl_dimnames(xx)[[2]] # Names of Cols if (user) for (i in 1:m) cat(paste(i,". variable:","\t",jmenasl[i],"\n",sep="")) { # Basic Analysis ################## prumy_mean(y) prumx_rep(0,m) #Declration korxy_rep(0,m) #Declration vyzn_rep(0,m) #Declration smox_rep(0,m) #Declration for (i in 1:m) prumx[i]_mean(xx[,i]) for (i in 1:m) smox[i]_sqrt(var(xx[,i])) if (absolut) prvni_2 else prvni_1 for (i in prvni:m) { kova_sum( (xx[,i]-prumx[i]) * (y-prumy) ) varx_sum( (xx[,i]-prumx[i]) * (xx[,i]-prumx[i]) ) vary_sum( (y-prumy) * (y-prumy) ) korxy[i]_kova/sqrt(varx*vary) vyzn[i]_2*(1-pt(abs(korxy[i])*sqrt(n-2)/sqrt(1-korxy[i]*korxy[i]) , n-2)) } cat("\n","Section 1. Basic Analysis","\n","\n") # BOLD # cat("Variable Characteristics","\n") cat("Variable","\t","Mean","\t","StdDev","\t","Corr. vs. Y","\t","Significance","\n") for (i in prvni:m) cat(jmenasl[i],"\t",prumx[i],"\t",smox[i],"\t",korxy[i],"\t",vyzn[i],"\n") #---------------------------------------------------- korxx_matrix(rep(0,m*m)) # Declaration: korxx[m,m] xxn_xx } cat("\n","Section 2. Pair Correlations (Xi, Xj)","\n") # BOLD # cat("Variable","\t","Corr.Coeff","\t","Significance","\n") for (i in 1:m) for (j in 1:n) xxn[j,i]_xx[j,i]-prumx[i] korxx_ ( t(xxn)%*%xxn ) / (n-1) for (i in 1:m) for (j in 1:m) korxx[i,j]_korxx[i,j]/(smox[i]*smox[j]+1e-20) if (absolut) korxx[1,1]_1 for ( i in 1:(m-1) ) for (j in (i+1):m) { vyznam_2-2*pt(abs(korxx[i,j])*sqrt(n-2)/sqrt(1-korxx[i,j]*korxx[i,j]) , n-2) cat(jmenasl[i]," - ",jmenasl[j],"\t",korxx[i,j],"\t",vyznam,"\n") } #------------------------------------------------------- if (m>1) # Only for m>1 ! { korxxi_rep(0,m) # declaration korxxi[m] d0_det(korxx) # determinant korxx for (i in 1:m) { kor1_korxx[-i,-i] # Leave out ith col and ith row of korxx di_det(as.matrix(kor1)) # determinant kor1 korxxi[i]_sqrt(1-d0/di) } vif_diag(solve(korxx)) # diagonal INV(korxx), vlcisxx_eigen(korxx)$values # Eigen values, minvlcis_min(vlcisxx) cat("\n","Section 3. Multicollinearity Detection","\n") # BOLD # cat("Variable","\t","Eigenvalue kor. m.","\t","Condition number kappa","\t","VI factor","\t","Multiple corr.","\n") for (i in 1:m) cat(jmenasl[i],"\t",vlcisxx[i],"\t",vlcisxx[i]/minvlcis,"\t",vif[i],"\t",korxxi[i],"\n") # Red: vif[i]>10, vlcisxx[i]/minvlcis>1000 } vyzn_rep(0,m) # deklarace vektor [m] vyzns_rep("",m) # deklarace vektor of string [m] string for (i in 1:m) { vyzn[i]_2*(1-pt(abs(a[i]/sigmaa[i]),n-m)) if (vyzn[i]>alfa) vyzns[i]_"Insignificant" else vyzns[i]_"Significant" } { yprumer_mean(y) sumy2_sum(y^2) csc_ sum ((y-yprumer)^2) cscprum_mean ((y-yprumer)^2) cscvar_ var (y-yprumer) tsc_csc-rsc tscprum_ mean ((yprumer-pred)^2) tscvar_ var (yprumer-pred) rscprum_mean(res^2) rscvar_var(res) cat("\n","Section 4. Analysis of Variance","\n") # BOLD # cat("Mean Y","\t",yprumer,"\n") cat("Source","\t","Square Sum","\t","Mean Square","\t","Variance","\n") cat("Overall Variability","\t",csc,"\t",cscprum,"\t",cscvar,"\n") cat("Explained Variability","\t",tsc,"\t",tscprum,"\t",tscvar,"\n") cat("Residual variability","\t",rsc,"\t",rscprum,"\t",rscvar,"\n") fis_(csc-rsc)*(n-m)/(rsc*(m-1)) if(fis>fkvant) fistext_"Model is Significant" else fistext_"Midel is insignificant" fispr_1-pf(fis,m-1,n-m) cat("F-statistic","\t",fis,"\n") cat("Critical F-quantile (1-alfa, m-1, n-m)","\t", fkvant,"\n") cat("p-value","\t",fispr,"\n") cat("Conclusion","\t",fistext,"\n","\n") ################################################## } { cat("\n","Section 5. Parameter Estimates","\n") # BOLD # cat("Variable","\t","Estimate","\t","StdDev.","\t","Conclusion","\t","p-value","\t","Lower limit","\t","Upper limit","\n") for (i in 1:m) cat(jmenasl[i],"\t",a[i],"\t",sigmaa[i],"\t",vyzns[i],"\t",vyzn[i],"\t",a[i]-tkvant*sigmaa[i],"\t",a[i]+tkvant*sigmaa[i],"\n") } yprumer_mean(y) sumy2_sum(y^2) csc_sum( (y-yprumer)^2) tsc_csc-rsc R_sqrt(1-(rsc/csc)) R2_R^2 aic_n * log(rsc/n) + 2*m mep_ sum ( res^2 / ((1-hatdiag)^2) ) / n RP_1 - ((n*mep)/(sumy2-n*yprumer^2)) RP_1 - (n*mep)/csc cat("\n","Section 6. Statistical Characteristics ","\n") # BOLD # cat("Multiple Correlation Coefficient R","\t",R,"\n") cat("Determination Ceofficient R^2","\t",R2,"\n") cat("Predicted Correlation Coefficient Rp","\t",RP,"\n") cat("Mean Error of Prediction MEP","\t",mep,"\n") cat("Akaike Information Criterion","\t",aic,"\n") epsy_sqrt(hatdiag*sres^2) cat("\n","Section 7. Analysis of Classical Residuals","\n","\n") # (BOLD) cat("Index","\t","Y measured","\t","Y predicted","\t","StdDev. Y","\t","Residual","\t","Rel. Residual [%Y]","\n") for (i in 1:n) { if (y[i]==0) cat(i,"\t",y[i],"\t",pred[i],"\t",epsy[i],"\t",res[i],"\t","---","\n") else cat(i,"\t",y[i],"\t",pred[i],"\t",epsy[i],"\t",res[i],"\t",100*res[i]/y[i],"\n") } u1_sum(res)/n u2_sum(res*res)/n u3_sum(res*res*res)/n u4_sum(res*res*res*res)/n { cat("Residual Sum of Squares","\t",rsc,"\n") cat("Mean of Absolute Residuals","\t",sum(abs(res))/n,"\n") cat("Residual StdDev","\t",sres,"\n") cat("Residual Variance","\t",sres^2,"\n") cat("Residual Skewness","\t",(u3*u3)/(u2*u2*u2),"\n") cat("Residual Variance","\t",u4/(u2*u2),"\n") } cat("\n","Section 8. Regression Triplet Analysis","\n","\n") # (BOLD) #----- Fisher-Snedecor test of Significance ---- 6.39 fis_(csc-rsc)*(n-m)/(rsc*(m-1)) if(fis>fkvant) fistext_"Model is Significant" else fistext_"Model is Insignificant" fispr_1-pf(fis,m-1,n-m) cat("Fisher-Snedecor test of Significance","\n") cat("F-Statistic","\t",fis,"\n") cat("Critical F- (1-alfa, m-1, n-m)","\t", fkvant,"\n") cat("p-value","\t",fispr,"\n") cat("Conclusion","\t",fistext,"\n","\n") #----- Scott Criterion of Multicollinearity ---- 6.49 ftest_R2/(m-1)/((max(1-R2,1e-30))/(n-m)) sco_sum( (a/sigmaa)^2 )/(m-1) # sco_(ftest/mstat-1)/(ftest/mstat+1) scotext_"Model is correct." if (sco>0.33) scotext_"Model Shows Multicollinearity!" if (sco>0.8) scotext_"Multicollinearity is too high, model is incorrect!" cat("Scott Criterion of Multicollinearity","\n") cat("SC Statistic","\t",sco,"\n") cat("Conclusion","\t",scotext,"\n","\n") #----- Cook-Weisberg test of heteroscedasticity ----- 6.131 cw_sum((pred-yprumer)*res*res) cw_cw*cw cw2_ 2 * sum((pred-yprumer)^2)*sres*sres*sres*sres cw_cw/cw2 chikvant1_qchisq(1-alfa,1) cwpr_1-pchisq(cw,1) cwtext_"Residuals are homoscedastic." if (cw>chikvant1) cwtext_"Residuals are heteroscedastic!" { cat("Cook-Weisberg test of heteroscedasticity","\n") cat("CW-statistic","\t",cw,"\n") cat("Critical quantile Chi^2(1-alfa,1)","\t", chikvant1,"\n") cat("p-value","\t",cwpr,"\n") cat("Conclusion","\t",cwtext,"\n","\n") } #----- Jarque-Berra test of normality ---- 6.133 g1_u3*u3/(u2*u2*u2) g2_u4/(u2*u2) jb_n*( g1/6 + (g2-3)^2/24) chikvant2_qchisq(1-alfa,2) jbpr_1-pchisq(jb,2) jbtext_"Residuals are normally distributed." if (jb>chikvant2) jbtext_"Residual are NOT normally distributed!" { cat("Jarque-Berra test of normality","\n") cat("JB-statistic ","\t",jb,"\n") cat("Critical quantile Chi^2(1-alfa,2)","\t", chikvant2,"\n") cat("p-value","\t",jbpr,"\n") cat("Conclusion","\t",jbtext,"\n","\n") } #---- Wald test of autocorrelation -------- 6.177 ro_0 for (i in 1:(n-1)) ro_ro+res[i]*res[i+1] ro_(ro/(n-1))/(sres*sres) wald_n*ro*ro/(1-ro*ro) chikvant1_qchisq(1-alfa,1) waldpr_1-pchisq(wald,1) if(wald>chikvant1) waldtext_"Autocorrelation is significant" else waldtext_"Autocorrelation is insignificant" { cat("Wald test of autocorrelation","\n") cat("WA-statistic ","\t",wald,"\n") cat("Critical quantile Chi^2(1-alfa,1)","\t", chikvant1,"\n") cat("p-value","\t",waldpr,"\n") cat("Conclusion","\t",waldtext,"\n","\n") } #---- Durbin-Watson test of autocorrelation -------- 6.178 dw_ 0 for (i in 1:(n-1)) dw_dw+(res[i]-res[i+1])^2 dw_dw/rsc dwtext_"Residuals are not correlated." if (dw>2.5) dwtext_"Residuals are negatively correlated!" if (dw<1.5) dwtext_"Residuals are positively correlated!" cat("Durbin-Watson test of autocorrelation","\n") cat("DW-statistic ","\t",dw,"\n") cat("Conclusion","\t",dwtext,"\n","\n") # ---- Sign test of fit ----- 6.130 nuni_0 nplus_0 nminus_0 znam_0 eps_sres * 1.43038e-10 for (i in 1:n) { if (res[i]>=0) nplus_nplus+1 if (res[i]<0) nminus_nminus+1 if (sign (res[i]+eps) != znam) { znam_sign (res[i]+eps) nuni_nuni+1 } } nt0_1+(2*nplus*nminus)/(nplus+nminus) dt01_2*nplus*nminus * (2*nplus*nminus - nplus - nminus) dt02_ (nplus + nminus) * (nplus + nminus) * (nplus + nminus - 1) dt0_abs(dt01/dt02) uu_abs((nuni-nt0+0.5)/sqrt(dt0)) nkvant_qnorm(1-alfa/2) znapr_2*(1-pnorm(uu)) znatext_"There is no trend in residuals." if (uu>nkvant) znatext_"There IS TREND in residuals!" cat("Sign test of fit","\n") cat("Sgn-statistic ","\t",uu,"\n") cat("Critical Quantile N(1-alfa/2)","\t", nkvant,"\n") cat("p-value","\t",znapr,"\n") cat("Conclusion","\t",znatext,"\n","\n") #-- Infuential Point Diagnostis -------------- resstd_res/(sres*sqrt(1-hatdiag)) resjack_rep(0,n) # Variable Declaration for ( i in 1:n ) { esi_res[i]/sqrt(1-hatdiag[i])/sres resjack[i]_esi*sqrt((n-m-1)/(n-m-esi*esi)) } for ( i in 1:n ) { respred_res/(1-hatdiag) } res2norm_res*res/rsc hatdiagy_hatdiag+res2norm atkind_abs(resjack)*sqrt((n-m)/m * hatdiag/(1-hatdiag)) # 6.110 cookd_(resstd/m) * hatdiag/(1-hatdiag) # 6.108 andrewd_1-hatdiag-res2norm # 6.113 vlivy_resjack*sqrt(hatdiag/(1-hatdiag)) ldb_rep(0,n) # Variable Declaration lds_rep(0,n) # Variable Declaration ldbs_rep(0,n) # Variable Declaration for (i in 1:n) { di_(resstd[i]^2)/(n-m) hi_hatdiag[i] ldb[i]_ n * log( (di*hi)/(1-hi)+1) lds[i]_ n * log( n/(n-1) )+ n * log( 1-di) + di*(n-1)/(1-di) - 1 ldbs[i]_n * log( n/(n-1) )+ n * log( 1-di) + di*(n-1)/ ( (1-di)*(1-hi) ) - 1 } # Critical values: hatdiagkrit_2*m/n hatdiagykrit_2*(m+1)/n atkinkrit_2*sqrt( (m*(n-m))/(m*n)) cookkrit_1 andrewkrit_2*(m+1)/n-1 # (pod) vlivykrit_2*sqrt(m/n) ldkrit_qchisq(1-alfa,m+1) # stejné pro ldb, lds, ldbs #---------- Indikace vlivných dat ----------------------- cat("\n","Section 9. Influential Points Diagnostics","\n","\n") # (BOLD) cat(" Subsection A. Residual Analysis","\n") cat("Index","\t","Standard","\t","Jackknife","\t","Predicted","\t","Diag(Hii)","\t","Diag(H*ii)","\t","Cook Dist.","\n") for (i in 1:n) cat(i,"\t",resstd[i],"\t",resjack[i],"\t",respred[i],"\t",hatdiag[i],"\t",hatdiagy[i],"\t",cookd[i],"\n") cat("Subsection B. Influence Analysis" ,"\n") cat("Index","\t","Atkinson Dist.","\t","Andrews-Pregibon stat","\t","Infl on Y^","\t","Infl on parameters LD(b)","\t","Infl on variance LD(s)","\t","Total Influence LD(b,s)","\n") for (i in 1:n) cat(i,"\t",atkind[i],"\t",andrewd[i],"\t",vlivy[i],"\t",ldb[i],"\t",lds[i],"\t",ldbs[i],"\n") } #################################################################### ######### E N D O F F I L E ####################################################################