# Validity index R script for Bhatta.Lung.filtered.norm.norm using PMO projections and kmeans clustering # December 2005 library(Biobase); library(clusterv); ########################################### # Loading Bhatta.Lung.filtered.norm set.seed(100); load("Bhatta.Lung.filtered.norm"); M <- exprs(Bhatta.Lung.filtered.norm); # The dataset includes 203 specimens histologically defined: # 127 lung adenocarcinoma (AD); # 21 squamous cell lung adenocarcinoma (SQ) # 20 pulmonary carcinoids (COID) # 6 small-cell lung adenocarcinoma (SCLC) # 17 normal lung (NL) colnames(M)<-rownames(Bhatta.Lung.filtered.norm@phenoData@pData); num.examples <- ncol(M); ############################################ # Range of the number c of clusters range.c <- c(2,3,4,5,6,7,8,9,10,20); # Set of epsilon values to be considered epsilon.set <- c(0.5,0.4,0.3,0.2,0.1); # Set of corresponding subspace dimensions (w.r.t. JL lemma) subspace.dim <- numeric(length(epsilon.set)); # Computation of the set of the subspace dimensions corresponding to the desired epsilon values (w.r.t. JL lemma). subspace.dim <- ceiling(JL.predict.dim(num.examples, epsilon.set)); # number of projections n.projections <- 30; initial.seed <- 100; ########################################### # Computing validity of the kmeans clustering for different number of clusters and different subspace dimensions # matrix of lists. Each element of the matrix is the list returned by Random.kmeans.validity. # The numbers of rows are equal to number of different c values; number of columns are equal to the number of different subspace # dimensions kmeans.Bhatta.Lung.filtered.norm <- matrix(list(), nrow=length(range.c), ncol=length(subspace.dim)); for (c in 1:length(range.c)) for (d in 1:length(subspace.dim)) { kmeans.Bhatta.Lung.filtered.norm[c,d] <- list(Random.kmeans.validity(M=M, dim=subspace.dim[d], pmethod="PMO", c=range.c[c], it.max=1000, n=n.projections, scale=TRUE, seed=initial.seed, AC=TRUE)); cat("Validity indices for",range.c[c], " clusters clustering and epsilon = ", epsilon.set[d], " done.\n"); } print("Validity computation done."); print("Saving objects."); # saving objects save(kmeans.Bhatta.Lung.filtered.norm, file="kmeans.Bhatta.Lung.filtered.norm.validity.PMO.objects"); print("Done with Bhatta.Lung.filtered.norm.");