# Validity index R script for Bhatta.Lung.filtered.norm.norm using PMO projections and hierarchical clustering # December 2005 library(Biobase); library(clusterv); ########################################### # Loading Bhatta.Lung.filtered.norm set.seed(100); load("Bhatta.Lung.filtered.norm"); M <- exprs(Bhatta.Lung.filtered.norm); # The dataset includes 203 specimens histologically defined: # 127 lung adenocarcinoma (AD); # 21 squamous cell lung adenocarcinoma (SQ) # 20 pulmonary carcinoids (COID) # 6 small-cell lung adenocarcinoma (SCLC) # 17 normal lung (NL) colnames(M)<-rownames(Bhatta.Lung.filtered.norm@phenoData@pData); num.examples <- ncol(M); ############################################ # Range of the number c of clusters range.c <- c(2,3,4,5,6,7,8,9,10,20); # range.c <- c(2,10); # Set of epsilon values to be considered epsilon.set <- c(0.5,0.4,0.3,0.2,0.1); # epsilon.set <- c(0.5,0.3); # Set of corresponding subspace dimensions (w.r.t. JL lemma) subspace.dim <- numeric(length(epsilon.set)); # number of projections n.projections <- 50; ########################################### # Hierarchical clustering in the original space d <- dist (t(M)); tree.Bhatta.Lung.filtered.norm.ward <- hclust(d, method = "ward"); tree.Bhatta.Lung.filtered.norm.average <- hclust(d, method = "average"); i<-0; cl.Bhatta.Lung.filtered.norm.ward <- list(); # list of the lists of clusterings cl.Bhatta.Lung.filtered.norm.average <- list(); # list of the lists of clusterings for (c in range.c) { i <- i+1; plot(tree.Bhatta.Lung.filtered.norm.ward, main=""); cl.Bhatta.Lung.filtered.norm.ward[i] <- list(rect.hclust(tree.Bhatta.Lung.filtered.norm.ward, k = c)); plot(tree.Bhatta.Lung.filtered.norm.average, main=""); cl.Bhatta.Lung.filtered.norm.average[i] <- list(rect.hclust(tree.Bhatta.Lung.filtered.norm.average, k = c)); } print("Hierarchical clustering in the original space done."); # Computation of the set of the subspace dimensions corresponding to the desired epsilon values (w.r.t. JL lemma). subspace.dim <- ceiling(JL.predict.dim(num.examples, epsilon.set)); ########################################### # Hierarchical clustering in the subspaces # List of the lists of trees generated by clustering Random projections PMOtree.Bhatta.Lung.filtered.norm.ward <- list(); PMOtree.Bhatta.Lung.filtered.norm.average <- list(); # generating the list of the lists of trees by clustering Random projections for (d in 1:length(subspace.dim)) { PMOtree.Bhatta.Lung.filtered.norm.ward[d] <- list(PMO.hclustering.tree (M, subspace.dim[d], hmethod="ward", n=n.projections, scale=TRUE, seed=100)); PMOtree.Bhatta.Lung.filtered.norm.average[d] <- list(PMO.hclustering.tree (M, subspace.dim[d], hmethod="average", n=n.projections, scale=TRUE, seed=100)); cat("Hierarchical clustering in the subspace ",subspace.dim[d], "done.\n"); } print("Hierarchical clustering in the subspaces done."); # list of the validity matrices. Each matrix has the number of rows equal to the number of clusters and number # of columns equal to the number of the different subspace dimensions. # The elements m[i,j] refer to the validity index of the ith cluster computed with the jth dimension. # The elements of the list are orderer according to range.c list.PMO.Bhatta.Lung.filtered.norm.ward.validity <- list(); list.PMO.Bhatta.Lung.filtered.norm.average.validity <- list(); # list of the overall validity vectors. Each vector has length equal to the number of the different subspace dimensions. # The elements of he list are orderer according to range.c list.PMO.Bhatta.Lung.filtered.norm.ward.ov.validity <- list(); list.PMO.Bhatta.Lung.filtered.norm.average.ov.validity <- list(); # matrix of lists. Each element of the matrix is a list of the PMO clusterings PMOcl.Bhatta.Lung.filtered.norm.ward <- matrix(list(), nrow=length(range.c), ncol=length(subspace.dim)); PMOcl.Bhatta.Lung.filtered.norm.average <- matrix(list(), nrow=length(range.c), ncol=length(subspace.dim)); # Data structure for the AC indices: a matrix of lists. The list are composed by only 1 element: the matrix of the AC # indices AC.PMO.Bhatta.Lung.filtered.norm.ward <- matrix(list(), nrow=length(range.c), ncol=length(subspace.dim)); AC.PMO.Bhatta.Lung.filtered.norm.average <- matrix(list(), nrow=length(range.c), ncol=length(subspace.dim)); for (c in 1:length(range.c)) { # matrix of the validity indices for different subspace dimensions: val.ward.matrix <- matrix(numeric(range.c[c]*length(subspace.dim)), nrow=range.c[c]); val.average.matrix <- matrix(numeric(range.c[c]*length(subspace.dim)), nrow=range.c[c]); ov.validity.ward <- numeric(length(subspace.dim)); ov.validity.average <- numeric(length(subspace.dim)); for (d in 1:length(subspace.dim)) { PMOcl.Bhatta.Lung.filtered.norm.ward[c,d] <- list(Generate.clusters(PMOtree.Bhatta.Lung.filtered.norm.ward[[d]], c=range.c[c])); PMOcl.Bhatta.Lung.filtered.norm.average[c,d] <- list(Generate.clusters(PMOtree.Bhatta.Lung.filtered.norm.average[[d]], c=range.c[c])); l <- Cluster.validity (cl.Bhatta.Lung.filtered.norm.ward[[c]], PMOcl.Bhatta.Lung.filtered.norm.ward[c,d][[1]]); val.ward.matrix[,d] <- l$validity; ov.validity.ward[d] <- l$overall.validity; AC.PMO.Bhatta.Lung.filtered.norm.ward[c,d][[1]] <- AC.index(cl.Bhatta.Lung.filtered.norm.ward[[c]], range.c[c], l$similarity.matrix); l <- Cluster.validity (cl.Bhatta.Lung.filtered.norm.average[[c]], PMOcl.Bhatta.Lung.filtered.norm.average[c,d][[1]]); val.average.matrix[,d] <- l$validity; ov.validity.average[d] <- l$overall.validity; AC.PMO.Bhatta.Lung.filtered.norm.average[c,d][[1]] <- AC.index(cl.Bhatta.Lung.filtered.norm.average[[c]], range.c[c], l$similarity.matrix); } list.PMO.Bhatta.Lung.filtered.norm.ward.validity[c] <- list(val.ward.matrix); list.PMO.Bhatta.Lung.filtered.norm.average.validity[c] <- list(val.average.matrix); list.PMO.Bhatta.Lung.filtered.norm.ward.ov.validity[c] <- list(ov.validity.ward); list.PMO.Bhatta.Lung.filtered.norm.average.ov.validity[c] <- list(ov.validity.average); cat("Validity indices for ",range.c[c], "-clusters clustering done.\n"); } print("Validity computation done."); print("Saving objects."); # saving objects save(list.PMO.Bhatta.Lung.filtered.norm.ward.validity, list.PMO.Bhatta.Lung.filtered.norm.average.validity, list.PMO.Bhatta.Lung.filtered.norm.ward.ov.validity, list.PMO.Bhatta.Lung.filtered.norm.average.ov.validity, PMOtree.Bhatta.Lung.filtered.norm.ward,PMOtree.Bhatta.Lung.filtered.norm.average,PMOcl.Bhatta.Lung.filtered.norm.ward,PMOcl.Bhatta.Lung.filtered.norm.average, cl.Bhatta.Lung.filtered.norm.ward,cl.Bhatta.Lung.filtered.norm.average,tree.Bhatta.Lung.filtered.norm.ward,tree.Bhatta.Lung.filtered.norm.average, AC.PMO.Bhatta.Lung.filtered.norm.average, AC.PMO.Bhatta.Lung.filtered.norm.ward, file="Bhatta.Lung.filtered.norm.validityPMO.objects"); print("Done with Bhatta.Lung.filtered.norm.");