############################################################################################################## # doAlizadehLange.R # November 2006 # # Script to evaluate the number of cluster in the ALizadehLange lymphoma data set (using 4026 genes) # with mosclust (k-means and PAM algorithms) using PMO random projections and subsampling # Data sets: Leukemia.filtered.norm # Clustering algorithms : PAM, hierarchical, c-mean # Perturbation: resampling and random projections # Test: Bernstein, Bernstein.ind, chi-square, hybrid ############################################################################################################## library(mosclust); # parameters nsubsamples <- 100; # number of pairs od clusterings to be evaluated max.num.clust <- 10; # maximum number of cluster to be evaluated fract.resampled <- 0.8; # fraction of samples to subsampled dim.projection <- JL.predict.dim(62,epsilon=0.2); # Data set preparation load("AlizadehLange.norm"); M <- AlizadehLange.norm@exprs; ############################################## #### Kmeans clustering ######### ############################################## ## Resampling Sr.kmeans.AlizadehLange <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, alg.clust.sim=Kmeans.sim.resampling); pr.kmeans.AlizadehLange.Bernstein <- Bernstein.compute.pvalues(Sr.kmeans.AlizadehLange); pr.kmeans.AlizadehLange.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sr.kmeans.AlizadehLange); pr07.kmeans.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sr.kmeans.AlizadehLange,s0=0.7); pr09.kmeans.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sr.kmeans.AlizadehLange,s0=0.9); lr07.kmeans.AlizadehLange <- Hybrid.testing(Sr.kmeans.AlizadehLange, alpha=0.01, s0=0.7); lr09.kmeans.AlizadehLange <- Hybrid.testing(Sr.kmeans.AlizadehLange, alpha=0.01, s0=0.9); ## PMO Random projections Sp.kmeans.AlizadehLange <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, dim=dim.projection, pmethod="PMO", alg.clust.sim=Kmeans.sim.projection); pp.kmeans.AlizadehLange.Bernstein <- Bernstein.compute.pvalues(Sp.kmeans.AlizadehLange); pp.kmeans.AlizadehLange.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sp.kmeans.AlizadehLange); pp07.kmeans.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sp.kmeans.AlizadehLange,s0=0.7); pp09.kmeans.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sp.kmeans.AlizadehLange,s0=0.9); lp07.kmeans.AlizadehLange <- Hybrid.testing(Sp.kmeans.AlizadehLange, alpha=0.01, s0=0.7); lp09.kmeans.AlizadehLange <- Hybrid.testing(Sp.kmeans.AlizadehLange, alpha=0.01, s0=0.9); # saving objects save(Sr.kmeans.AlizadehLange,pr.kmeans.AlizadehLange.Bernstein,pr.kmeans.AlizadehLange.ind.Bernstein,pr07.kmeans.AlizadehLange.chi.sq, pr09.kmeans.AlizadehLange.chi.sq,lr07.kmeans.AlizadehLange,lr09.kmeans.AlizadehLange, Sp.kmeans.AlizadehLange,pp.kmeans.AlizadehLange.Bernstein,pp.kmeans.AlizadehLange.ind.Bernstein,pp07.kmeans.AlizadehLange.chi.sq, pp09.kmeans.AlizadehLange.chi.sq,lp07.kmeans.AlizadehLange,lp09.kmeans.AlizadehLange, file="AlizadehLange.kmeans.objects"); ############################################## ## Hierarchical clustering ######## ############################################## ## Resampling Sr.HC.AlizadehLange <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, alg.clust.sim=Hierarchical.sim.resampling); pr.HC.AlizadehLange.Bernstein <- Bernstein.compute.pvalues(Sr.HC.AlizadehLange); pr.HC.AlizadehLange.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sr.HC.AlizadehLange); pr07.HC.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sr.HC.AlizadehLange,s0=0.7); pr09.HC.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sr.HC.AlizadehLange,s0=0.9); lr07.HC.AlizadehLange <- Hybrid.testing(Sr.HC.AlizadehLange, alpha=0.01, s0=0.7); lr09.HC.AlizadehLange <- Hybrid.testing(Sr.HC.AlizadehLange, alpha=0.01, s0=0.9); ## PMO Random projections Sp.HC.AlizadehLange <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, dim=dim.projection, pmethod="PMO", alg.clust.sim=Hierarchical.sim.projection); pp.HC.AlizadehLange.Bernstein <- Bernstein.compute.pvalues(Sp.HC.AlizadehLange); pp.HC.AlizadehLange.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sp.HC.AlizadehLange); pp07.HC.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sp.HC.AlizadehLange,s0=0.7); pp09.HC.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sp.HC.AlizadehLange,s0=0.9); lp07.HC.AlizadehLange <- Hybrid.testing(Sp.HC.AlizadehLange, alpha=0.01, s0=0.7); lp09.HC.AlizadehLange <- Hybrid.testing(Sp.HC.AlizadehLange, alpha=0.01, s0=0.9); # saving objects save(Sr.HC.AlizadehLange,pr.HC.AlizadehLange.Bernstein,pr.HC.AlizadehLange.ind.Bernstein,pr07.HC.AlizadehLange.chi.sq, pr09.HC.AlizadehLange.chi.sq,lr07.HC.AlizadehLange,lr09.HC.AlizadehLange, Sp.HC.AlizadehLange,pp.HC.AlizadehLange.Bernstein,pp.HC.AlizadehLange.ind.Bernstein,pp07.HC.AlizadehLange.chi.sq, pp09.HC.AlizadehLange.chi.sq,lp07.HC.AlizadehLange,lp09.HC.AlizadehLange, file="AlizadehLange.HC.objects"); ############################################## ## PAM clustering ######## ############################################## ## Resampling Sr.PAM.AlizadehLange <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, alg.clust.sim=PAM.sim.resampling); pr.PAM.AlizadehLange.Bernstein <- Bernstein.compute.pvalues(Sr.PAM.AlizadehLange); pr.PAM.AlizadehLange.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sr.PAM.AlizadehLange); pr07.PAM.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sr.PAM.AlizadehLange,s0=0.7); pr09.PAM.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sr.PAM.AlizadehLange,s0=0.9); lr07.PAM.AlizadehLange <- Hybrid.testing(Sr.PAM.AlizadehLange, alpha=0.01, s0=0.7); lr09.PAM.AlizadehLange <- Hybrid.testing(Sr.PAM.AlizadehLange, alpha=0.01, s0=0.9); ## PMO Random projections Sp.PAM.AlizadehLange <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, dim=dim.projection, pmethod="PMO", alg.clust.sim=PAM.sim.projection); pp.PAM.AlizadehLange.Bernstein <- Bernstein.compute.pvalues(Sp.PAM.AlizadehLange); pp.PAM.AlizadehLange.ind.Bernstein <- Bernstein.ind.compute.pvalues(Sp.PAM.AlizadehLange); pp07.PAM.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sp.PAM.AlizadehLange,s0=0.7); pp09.PAM.AlizadehLange.chi.sq <- Chi.square.compute.pvalues(Sp.PAM.AlizadehLange,s0=0.9); lp07.PAM.AlizadehLange <- Hybrid.testing(Sp.PAM.AlizadehLange, alpha=0.01, s0=0.7); lp09.PAM.AlizadehLange <- Hybrid.testing(Sp.PAM.AlizadehLange, alpha=0.01, s0=0.9); # saving objects save(Sr.PAM.AlizadehLange,pr.PAM.AlizadehLange.Bernstein,pr.PAM.AlizadehLange.ind.Bernstein,pr07.PAM.AlizadehLange.chi.sq, pr09.PAM.AlizadehLange.chi.sq,lr07.PAM.AlizadehLange,lr09.PAM.AlizadehLange, Sp.PAM.AlizadehLange,pp.PAM.AlizadehLange.Bernstein,pp.PAM.AlizadehLange.ind.Bernstein,pp07.PAM.AlizadehLange.chi.sq, pp09.PAM.AlizadehLange.chi.sq,lp07.PAM.AlizadehLange,lp09.PAM.AlizadehLange, file="AlizadehLange.PAM.objects"); ################################### ##### Writing results on text file ################################### sink("AlizadehLange.res"); cat("** Experiments with AlizadehLange data **\n\n"); ######## Kmeans cat("------------------------\n") cat("---- Kmeans clustering subsampling\n\n"); cat("AlizadehLange kmeans subsampling Bernstein p-values: \n"); print(pr.kmeans.AlizadehLange.Bernstein); cat("AlizadehLange kmeans subsampling Bernstein independent p-values: \n"); print(pr.kmeans.AlizadehLange.ind.Bernstein); cat("AlizadehLange kmeans subsampling Chi square p-values s=0.7: \n"); print(pr07.kmeans.AlizadehLange.chi.sq); cat("AlizadehLange kmeans subsampling Chi square p-values s=0.9: \n"); print(pr09.kmeans.AlizadehLange.chi.sq); cat("AlizadehLange kmeans subsampling clusterings selected by the Hybrid test s=0.7: \n"); print(lr07.kmeans.AlizadehLange$selected.res); cat("AlizadehLange kmeans subsampling clusterings selected by the Hybrid test s=0.9: \n"); print(lr09.kmeans.AlizadehLange$selected.res); cat("------------------------\n") cat("---- Kmeans clustering projections\n\n"); cat("AlizadehLange kmeans projections Bernstein p-values: \n"); print(pp.kmeans.AlizadehLange.Bernstein); cat("AlizadehLange kmeans projections Bernstein independent p-values: \n"); print(pp.kmeans.AlizadehLange.ind.Bernstein); cat("AlizadehLange kmeans projections Chi square p-values s=0.7: \n"); print(pp07.kmeans.AlizadehLange.chi.sq); cat("AlizadehLange kmeans projections Chi square p-values s=0.9: \n"); print(pp09.kmeans.AlizadehLange.chi.sq); cat("AlizadehLange kmeans projections clusterings selected by the Hybrid test s=0.7: \n"); print(lp07.kmeans.AlizadehLange$selected.res); cat("AlizadehLange kmeans projections clusterings selected by the Hybrid test s=0.9: \n"); print(lp09.kmeans.AlizadehLange$selected.res); ######## HC cat("------------------------\n") cat("---- HC clustering subsampling\n\n"); cat("AlizadehLange HC subsampling Bernstein p-values: \n"); print(pr.HC.AlizadehLange.Bernstein); cat("AlizadehLange HC subsampling Bernstein independent p-values: \n"); print(pr.HC.AlizadehLange.ind.Bernstein); cat("AlizadehLange HC subsampling Chi square p-values s=0.7: \n"); print(pr07.HC.AlizadehLange.chi.sq); cat("AlizadehLange HC subsampling Chi square p-values s=0.9: \n"); print(pr09.HC.AlizadehLange.chi.sq); cat("AlizadehLange HC subsampling clusterings selected by the Hybrid test s=0.7: \n"); print(lr07.HC.AlizadehLange$selected.res); cat("AlizadehLange HC subsampling clusterings selected by the Hybrid test s=0.9: \n"); print(lr09.HC.AlizadehLange$selected.res); cat("------------------------\n") cat("---- HC clustering projections\n\n"); cat("AlizadehLange HC projections Bernstein p-values: \n"); print(pp.HC.AlizadehLange.Bernstein); cat("AlizadehLange HC projections Bernstein independent p-values: \n"); print(pp.HC.AlizadehLange.ind.Bernstein); cat("AlizadehLange HC projections Chi square p-values s=0.7: \n"); print(pp07.HC.AlizadehLange.chi.sq); cat("AlizadehLange HC projections Chi square p-values s=0.9: \n"); print(pp09.HC.AlizadehLange.chi.sq); cat("AlizadehLange HC projections clusterings selected by the Hybrid test s=0.7: \n"); print(lp07.HC.AlizadehLange$selected.res); cat("AlizadehLange HC projections clusterings selected by the Hybrid test s=0.9: \n"); print(lp09.HC.AlizadehLange$selected.res); ######## PAM cat("------------------------\n") cat("---- PAM clustering subsampling\n\n"); cat("AlizadehLange PAM subsampling Bernstein p-values: \n"); print(pr.PAM.AlizadehLange.Bernstein); cat("AlizadehLange PAM subsampling Bernstein independent p-values: \n"); print(pr.PAM.AlizadehLange.ind.Bernstein); cat("AlizadehLange PAM subsampling Chi square p-values s=0.7: \n"); print(pr07.PAM.AlizadehLange.chi.sq); cat("AlizadehLange PAM subsampling Chi square p-values s=0.9: \n"); print(pr09.PAM.AlizadehLange.chi.sq); cat("AlizadehLange PAM subsampling clusterings selected by the Hybrid test s=0.7: \n"); print(lr07.PAM.AlizadehLange$selected.res); cat("AlizadehLange PAM subsampling clusterings selected by the Hybrid test s=0.9: \n"); print(lr09.PAM.AlizadehLange$selected.res); cat("------------------------\n") cat("---- PAM clustering projections\n\n"); cat("AlizadehLange PAM projections Bernstein p-values: \n"); print(pp.PAM.AlizadehLange.Bernstein); cat("AlizadehLange PAM projections Bernstein independent p-values: \n"); print(pp.PAM.AlizadehLange.ind.Bernstein); cat("AlizadehLange PAM projections Chi square p-values s=0.7: \n"); print(pp07.PAM.AlizadehLange.chi.sq); cat("AlizadehLange PAM projections Chi square p-values s=0.9: \n"); print(pp09.PAM.AlizadehLange.chi.sq); cat("AlizadehLange PAM projections clusterings selected by the Hybrid test s=0.7: \n"); print(lp07.PAM.AlizadehLange$selected.res); cat("AlizadehLange PAM projections clusterings selected by the Hybrid test s=0.9: \n"); print(lp09.PAM.AlizadehLange$selected.res); sink();