############################################################################################################## # doLeukemia.small.R # September 2006 # # Script to evaluate the number of cluster in the classical Golub's ALL-AML Leukemia data set (using 100 genes # with the highest variance) through the r package mosclust. # PAM, k-means and hierarchical clustering algorithms are used using PMO random projections and subsampling # techniques to perturb the data. # The chi-square based, Bernstein and the hybrid two-steps statistical test are applied to assess the # significance of the clustering solutions ############################################################################################################## library(Biobase); library(mosclust); print(date()); # parameters nsubsamples <- 100; # number of pairs od clusterings to be evaluated max.num.clust <- 10; # maximum number of cluster to be evaluated fract.resampled <- 0.8; # fraction of samples to subsampled dim.projection <- 80; # Loading Golub's Leukemia data set load("Leukemia.filtered.norm.hv"); M <- Leukemia.filtered.norm.hv@exprs; ############################################## #### Kmeans clustering ######### ############################################## ## Resampling Sr.kmeans.Leukemia.small <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, alg.clust.sim=Kmeans.sim.resampling); pr.kmeans.Leukemia.small.Bernstein <- Bernstein.compute.pvalues(Sr.kmeans.Leukemia.small); pr.kmeans.Leukemia.small.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sr.kmeans.Leukemia.small); pr07.kmeans.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sr.kmeans.Leukemia.small,s0=0.7); pr09.kmeans.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sr.kmeans.Leukemia.small,s0=0.9); lr07.kmeans.Leukemia.small <- Hybrid.testing(Sr.kmeans.Leukemia.small, alpha=0.01, s0=0.7); lr09.kmeans.Leukemia.small <- Hybrid.testing(Sr.kmeans.Leukemia.small, alpha=0.01, s0=0.9); ## PMO Random projections Sp.kmeans.Leukemia.small <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, dim=dim.projection, pmethod="PMO", alg.clust.sim=Kmeans.sim.projection); pp.kmeans.Leukemia.small.Bernstein <- Bernstein.compute.pvalues(Sp.kmeans.Leukemia.small); pp.kmeans.Leukemia.small.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sp.kmeans.Leukemia.small); pp07.kmeans.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sp.kmeans.Leukemia.small,s0=0.7); pp09.kmeans.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sp.kmeans.Leukemia.small,s0=0.9); lp07.kmeans.Leukemia.small <- Hybrid.testing(Sp.kmeans.Leukemia.small, alpha=0.01, s0=0.7); lp09.kmeans.Leukemia.small <- Hybrid.testing(Sp.kmeans.Leukemia.small, alpha=0.01, s0=0.9); # saving objects save(Sr.kmeans.Leukemia.small,pr.kmeans.Leukemia.small.Bernstein,pr.kmeans.Leukemia.small.ind.Bernstein,pr07.kmeans.Leukemia.small.chi.sq, pr09.kmeans.Leukemia.small.chi.sq,lr07.kmeans.Leukemia.small,lr09.kmeans.Leukemia.small, Sp.kmeans.Leukemia.small,pp.kmeans.Leukemia.small.Bernstein,pp.kmeans.Leukemia.small.ind.Bernstein,pp07.kmeans.Leukemia.small.chi.sq, pp09.kmeans.Leukemia.small.chi.sq,lp07.kmeans.Leukemia.small,lp09.kmeans.Leukemia.small, file="Leukemia.small.kmeans.objects"); ############################################## ## Hierarchical clustering ######## ############################################## ## Resampling Sr.HC.Leukemia.small <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, alg.clust.sim=Hierarchical.sim.resampling); pr.HC.Leukemia.small.Bernstein <- Bernstein.compute.pvalues(Sr.HC.Leukemia.small); pr.HC.Leukemia.small.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sr.HC.Leukemia.small); pr07.HC.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sr.HC.Leukemia.small,s0=0.7); pr09.HC.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sr.HC.Leukemia.small,s0=0.9); lr07.HC.Leukemia.small <- Hybrid.testing(Sr.HC.Leukemia.small, alpha=0.01, s0=0.7); lr09.HC.Leukemia.small <- Hybrid.testing(Sr.HC.Leukemia.small, alpha=0.01, s0=0.9); ## PMO Random projections Sp.HC.Leukemia.small <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, dim=dim.projection, pmethod="PMO", alg.clust.sim=Hierarchical.sim.projection); pp.HC.Leukemia.small.Bernstein <- Bernstein.compute.pvalues(Sp.HC.Leukemia.small); pp.HC.Leukemia.small.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sp.HC.Leukemia.small); pp07.HC.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sp.HC.Leukemia.small,s0=0.7); pp09.HC.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sp.HC.Leukemia.small,s0=0.9); lp07.HC.Leukemia.small <- Hybrid.testing(Sp.HC.Leukemia.small, alpha=0.01, s0=0.7); lp09.HC.Leukemia.small <- Hybrid.testing(Sp.HC.Leukemia.small, alpha=0.01, s0=0.9); # saving objects save(Sr.HC.Leukemia.small,pr.HC.Leukemia.small.Bernstein,pr.HC.Leukemia.small.ind.Bernstein,pr07.HC.Leukemia.small.chi.sq, pr09.HC.Leukemia.small.chi.sq,lr07.HC.Leukemia.small,lr09.HC.Leukemia.small, Sp.HC.Leukemia.small,pp.HC.Leukemia.small.Bernstein,pp.HC.Leukemia.small.ind.Bernstein,pp07.HC.Leukemia.small.chi.sq, pp09.HC.Leukemia.small.chi.sq,lp07.HC.Leukemia.small,lp09.HC.Leukemia.small, file="Leukemia.small.HC.objects"); ############################################## ## PAM clustering ######## ############################################## ## Resampling Sr.PAM.Leukemia.small <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, alg.clust.sim=PAM.sim.resampling); pr.PAM.Leukemia.small.Bernstein <- Bernstein.compute.pvalues(Sr.PAM.Leukemia.small); pr.PAM.Leukemia.small.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sr.PAM.Leukemia.small); pr07.PAM.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sr.PAM.Leukemia.small,s0=0.7); pr09.PAM.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sr.PAM.Leukemia.small,s0=0.9); lr07.PAM.Leukemia.small <- Hybrid.testing(Sr.PAM.Leukemia.small, alpha=0.01, s0=0.7); lr09.PAM.Leukemia.small <- Hybrid.testing(Sr.PAM.Leukemia.small, alpha=0.01, s0=0.9); ## PMO Random projections Sp.PAM.Leukemia.small <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, dim=dim.projection, pmethod="PMO", alg.clust.sim=PAM.sim.projection); pp.PAM.Leukemia.small.Bernstein <- Bernstein.compute.pvalues(Sp.PAM.Leukemia.small); pp.PAM.Leukemia.small.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sp.PAM.Leukemia.small); pp07.PAM.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sp.PAM.Leukemia.small,s0=0.7); pp09.PAM.Leukemia.small.chi.sq <- Chi.square.compute.pvalues(Sp.PAM.Leukemia.small,s0=0.9); lp07.PAM.Leukemia.small <- Hybrid.testing(Sp.PAM.Leukemia.small, alpha=0.01, s0=0.7); lp09.PAM.Leukemia.small <- Hybrid.testing(Sp.PAM.Leukemia.small, alpha=0.01, s0=0.9); # saving objects save(Sr.PAM.Leukemia.small,pr.PAM.Leukemia.small.Bernstein,pr.PAM.Leukemia.small.ind.Bernstein,pr07.PAM.Leukemia.small.chi.sq, pr09.PAM.Leukemia.small.chi.sq,lr07.PAM.Leukemia.small,lr09.PAM.Leukemia.small, Sp.PAM.Leukemia.small,pp.PAM.Leukemia.small.Bernstein,pp.PAM.Leukemia.small.ind.Bernstein,pp07.PAM.Leukemia.small.chi.sq, pp09.PAM.Leukemia.small.chi.sq,lp07.PAM.Leukemia.small,lp09.PAM.Leukemia.small, file="Leukemia.small.PAM.objects"); ################################### ##### Writing results on text file ################################### sink("Leukemia.small.res"); cat("** Experiments with Leukemia data **\n\n"); ######## Kmeans cat("------------------------\n") cat("---- Kmeans clustering subsampling\n\n"); cat("Leukemia.small kmeans subsampling Bernstein p-values: \n"); print(pr.kmeans.Leukemia.small.Bernstein); cat("Leukemia.small kmeans subsampling Bernstein independent p-values: \n"); print(pr.kmeans.Leukemia.small.ind.Bernstein); cat("Leukemia.small kmeans subsampling Chi square p-values s=0.7: \n"); print(pr07.kmeans.Leukemia.small.chi.sq); cat("Leukemia.small kmeans subsampling Chi square p-values s=0.9: \n"); print(pr09.kmeans.Leukemia.small.chi.sq); cat("Leukemia.small kmeans subsampling clusterings selected by the Hybrid test s=0.7: \n"); print(lr07.kmeans.Leukemia.small$selected.res); cat("Leukemia.small kmeans subsampling clusterings selected by the Hybrid test s=0.9: \n"); print(lr09.kmeans.Leukemia.small$selected.res); cat("------------------------\n") cat("---- Kmeans clustering projections\n\n"); cat("Leukemia.small kmeans projections Bernstein p-values: \n"); print(pp.kmeans.Leukemia.small.Bernstein); cat("Leukemia.small kmeans projections Bernstein independent p-values: \n"); print(pp.kmeans.Leukemia.small.ind.Bernstein); cat("Leukemia.small kmeans projections Chi square p-values s=0.7: \n"); print(pp07.kmeans.Leukemia.small.chi.sq); cat("Leukemia.small kmeans projections Chi square p-values s=0.9: \n"); print(pp09.kmeans.Leukemia.small.chi.sq); cat("Leukemia.small kmeans projections clusterings selected by the Hybrid test s=0.7: \n"); print(lp07.kmeans.Leukemia.small$selected.res); cat("Leukemia.small kmeans projections clusterings selected by the Hybrid test s=0.9: \n"); print(lp09.kmeans.Leukemia.small$selected.res); ######## HC cat("------------------------\n") cat("---- HC clustering subsampling\n\n"); cat("Leukemia.small HC subsampling Bernstein p-values: \n"); print(pr.HC.Leukemia.small.Bernstein); cat("Leukemia.small HC subsampling Bernstein independent p-values: \n"); print(pr.HC.Leukemia.small.ind.Bernstein); cat("Leukemia.small HC subsampling Chi square p-values s=0.7: \n"); print(pr07.HC.Leukemia.small.chi.sq); cat("Leukemia.small HC subsampling Chi square p-values s=0.9: \n"); print(pr09.HC.Leukemia.small.chi.sq); cat("Leukemia.small HC subsampling clusterings selected by the Hybrid test s=0.7: \n"); print(lr07.HC.Leukemia.small$selected.res); cat("Leukemia.small HC subsampling clusterings selected by the Hybrid test s=0.9: \n"); print(lr09.HC.Leukemia.small$selected.res); cat("------------------------\n") cat("---- HC clustering projections\n\n"); cat("Leukemia.small HC projections Bernstein p-values: \n"); print(pp.HC.Leukemia.small.Bernstein); cat("Leukemia.small HC projections Bernstein independent p-values: \n"); print(pp.HC.Leukemia.small.ind.Bernstein); cat("Leukemia.small HC projections Chi square p-values s=0.7: \n"); print(pp07.HC.Leukemia.small.chi.sq); cat("Leukemia.small HC projections Chi square p-values s=0.9: \n"); print(pp09.HC.Leukemia.small.chi.sq); cat("Leukemia.small HC projections clusterings selected by the Hybrid test s=0.7: \n"); print(lp07.HC.Leukemia.small$selected.res); cat("Leukemia.small HC projections clusterings selected by the Hybrid test s=0.9: \n"); print(lp09.HC.Leukemia.small$selected.res); ######## PAM cat("------------------------\n") cat("---- PAM clustering subsampling\n\n"); cat("Leukemia.small PAM subsampling Bernstein p-values: \n"); print(pr.PAM.Leukemia.small.Bernstein); cat("Leukemia.small PAM subsampling Bernstein independent p-values: \n"); print(pr.PAM.Leukemia.small.ind.Bernstein); cat("Leukemia.small PAM subsampling Chi square p-values s=0.7: \n"); print(pr07.PAM.Leukemia.small.chi.sq); cat("Leukemia.small PAM subsampling Chi square p-values s=0.9: \n"); print(pr09.PAM.Leukemia.small.chi.sq); cat("Leukemia.small PAM subsampling clusterings selected by the Hybrid test s=0.7: \n"); print(lr07.PAM.Leukemia.small$selected.res); cat("Leukemia.small PAM subsampling clusterings selected by the Hybrid test s=0.9: \n"); print(lr09.PAM.Leukemia.small$selected.res); cat("------------------------\n") cat("---- PAM clustering projections\n\n"); cat("Leukemia.small PAM projections Bernstein p-values: \n"); print(pp.PAM.Leukemia.small.Bernstein); cat("Leukemia.small PAM projections Bernstein independent p-values: \n"); print(pp.PAM.Leukemia.small.ind.Bernstein); cat("Leukemia.small PAM projections Chi square p-values s=0.7: \n"); print(pp07.PAM.Leukemia.small.chi.sq); cat("Leukemia.small PAM projections Chi square p-values s=0.9: \n"); print(pp09.PAM.Leukemia.small.chi.sq); cat("Leukemia.small PAM projections clusterings selected by the Hybrid test s=0.7: \n"); print(lp07.PAM.Leukemia.small$selected.res); cat("Leukemia.small PAM projections clusterings selected by the Hybrid test s=0.9: \n"); print(lp09.PAM.Leukemia.small$selected.res); sink(); print(date());