mpca <- function (cdata, threshold) { # standardize cdata.mean = vector() cdata.stddev = vector() for (j in 1:ncol(cdata) ) { cdata.mean[j] = mean(cdata[,j]); cdata.stddev[j] = sqrt(var(cdata[,j])); } Z = cdata for (j in 1:ncol(cdata) ) { Z[,j] = (cdata[,j] - cdata.mean[j]) / cdata.stddev[j] } # compute correlation matrix (vector operators) rho = matrix(nrow = ncol(Z), ncol = ncol(Z)) for (k in 1:ncol(Z) ) { for (j in 1:ncol(Z) ) { rho[k,j] = (Z[,k] %*% Z[,j]) / nrow(Z) } } # compute eigenvalues and eigenvectors of correlation matrix rho.eigen = eigen(rho) e = rho.eigen$vectors lambda = rho.eigen$values # compute a transformed dataset (optional) # Y = matrix(nrow = nrow(Z), ncol = ncol(Z)) # for (j in 1:ncol(Y)) Y[,j] = Z * e[,j]; Y = Z * e # compute the correlation matrix between *principal components* and *origina attributes* pca_corr = matrix (nrow = ncol(Y), ncol = ncol(Z)) for (k in 1:ncol(Y) ) { for (j in 1:ncol(Z) ) { pca_corr[k,j] = e[j,k] * sqrt(lambda[k]) } } # Eigenvalue Criterion for (k in 1:ncol(Y)) if (lambda[k] > 1) print(c("EC suggests to keep ",k)) else print(c("EC suggests to drop ",k)) # Proportion of Variance Explained pv = 0.0 for (k in 1:ncol(Y)) { pv = pv + lambda[k] / ncol(Y); if (pv < threshold) print(c("PVE suggests to keep ",k)) else print(c("PVE suggests to drop ",k)) } # scree plot critetion plot(lambda, type="l") # communality computation communality = matrix(nrow = ncol(Y), ncol = ncol(Z)) p = 0 for (p in 1:ncol(Y)) { for (j in 1:ncol(Z)){ #communality at "level" p, attribute j communality[p,j] = 0.0 for (k in 1:p) communality[p,j] = communality[p,j] + pca_corr[k,j]^2; } allcomm = TRUE; for (j in 1:ncol(Z)) if (communality[p,j] < threshold) allcomm = FALSE; if (allcomm) { print(c("Communality criterion suggests to keep ",1:p)); break; } } # build a result record res = NULL res$p = p res$Y = Y res$e = e res$lambda = lambda res }