############################################################################################################## # clusterv.R # January 2005 # modified June 2005 and August 2005 and February 2006 and August 2006 # Set of functions for computing cluster validity indices based on random projections ############################################################################################################## library(stats); source("rp.R"); source("rData.R"); source("clusterv2.R"); ############################################################################## # Function to compute and build up a pairwise similarity matrix when a clustering perform a partition of the data # Each clustering used to compute the pairwise similarity matrix may have different number of clusters # Input: # l : list of clusterings # Output: # Sim.M : the pairwise similarity matrix whose elements represents how much 2 examples fall in the same cluster across multiple # clusterings. Each element of Sim.M is normalized so that its value is beween 0 and 1. Do.similarity.matrix.partition <- function(l) { dim.Sim.M = 0; # dimension of the similarity matrix (to be computed) n <- length(l); # number of projections (clusterings) cl <- l[[1]]; # first clustering selected c <- length(cl); # number of clusters for (j in 1:c) dim.Sim.M <- dim.Sim.M + length(cl[[j]]); # summing up the elements of the clusters Sim.M <- matrix(numeric(dim.Sim.M*dim.Sim.M), nrow=dim.Sim.M); singletons <- numeric(dim.Sim.M); for (i in 1:n) { cl <- l[[i]]; c <- length(cl); # number of clusters for the current clustering for (j in 1:c) { n.ex <- length(cl[[j]]); if (n.ex == 1) singletons[cl[[j]][1]] <- singletons[cl[[j]][1]] + 1 else { for (x1 in 1:(n.ex-1)) { for (x2 in (x1+1):n.ex) { x <- cl[[j]][x1]; y <- cl[[j]][x2]; Sim.M[x,y] <- Sim.M[x,y] + 1; } } } } } for (x1 in 1:(dim.Sim.M-1)) for (x2 in (x1+1):dim.Sim.M) Sim.M[x2,x1] <- Sim.M[x1,x2]; for (x in 1:(dim.Sim.M)) Sim.M[x,x] <- singletons[x]; Sim.M <- Sim.M / n; return(Sim.M); } ############################################################################## # Function to compute and build up a pairwise similarity matrix. # This function may be used also with clusterings that do not define strictly a partition of the data and using # variable number of clusters for each clustering. # Input: # l : list of clusterings # dim.Sim.M : dimension of the similarity matrix (number of examples) # Output: # Sim.M : the pairwise similarity matrix whose elements represents how much 2 examples fall in the same cluster across multiple # clusterings. Each element of Sim.M is normalized so that its value is beween 0 and 1. Do.similarity.matrix <- function(l, dim.Sim.M) { Sim.M <- matrix(numeric(dim.Sim.M*dim.Sim.M), nrow=dim.Sim.M); singletons <- numeric(dim.Sim.M); n <- length(l); # number of projections (clusterings) for (i in 1:n) { cl <- l[[i]]; c <- length(cl); # number of clusters for the current clustering for (j in 1:c) { n.ex <- length(cl[[j]]); if (n.ex == 1) singletons[cl[[j]][1]] <- singletons[cl[[j]][1]] + 1 else { for (x1 in 1:(n.ex-1)) { for (x2 in (x1+1):n.ex) { x <- cl[[j]][x1]; y <- cl[[j]][x2]; Sim.M[x,y] <- Sim.M[x,y] + 1; } } } } } for (x1 in 1:(dim.Sim.M-1)) for (x2 in (x1+1):dim.Sim.M) Sim.M[x2,x1] <- Sim.M[x1,x2]; for (x in 1:(dim.Sim.M)) Sim.M[x,x] <- singletons[x]; Sim.M <- Sim.M / n; return(Sim.M); } ########################## # Validity indices computation. # It assumes that the label of the examples are integers. It computes the stability indices for each individual cluster, # the overall validity index of the clustering and (optionally) the Assignnment Confidence (AC) index for each example. # To compute the indices a set of clusterings is used. # Input: # cluster is the list of the original clustering whose validity indices will be computed # M.clusters is the list of the n clusterings (a list of lists) used for validity index computation # AC is a boolean variable: if it is TRUE the Assignment Confidence index for each example is computed # Output: # a list with four components: "validity", "overall.validity", "similarity.matrix", "AC" (optional): # "validity" is a vector with the validity of each of the c clusters; # "overall.validity" is the validity index of the overall cluster # "similarity.matrix" is the pairwise similarity matrix between examples. # "AC" is a matrix with the Assignment Confidence index for each example. Each row corresponds to an example, # each column to a cluster. Cluster.validity <- function(cluster, M.clusters, AC=FALSE) { dim.Sim.M <- 0; c <- length(cluster); for (i in 1:c) dim.Sim.M <- dim.Sim.M + length(cluster[[i]]); Sim.M <- Do.similarity.matrix(M.clusters, dim.Sim.M); vi <- Validity.indices(cluster, c, Sim.M); ov.vi <- sum(vi)/c; if (AC == TRUE) { ac <- AC.index(cluster, c, Sim.M); res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M, AC=ac); } else res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M); return(res) } ########################## # Validity indices computation using a clustering and a similarity matrix # It assumes that the label of the examples are integers. It computes the stability indices for each individual cluster, # overall validity index of the clustering and (optionally) the Assignnment Confidence (AC) index for each example. # To compute the indices a similarity matrix is used. # Input: # cluster : the list of the original clustering whose validity indices will be computed # Sim.M : similarity matrix # AC is a boolean variable: if it is TRUE the Assignment Confidence index for each example is computed # Output: # a list with three components: "validity", "overall.validity", "AC": # "validity" is a vector with the validity of each of the c clusters; # "overall.validity" is the validity index of the overall cluster # "AC" is a matrix with the Assignment Confidence index for each example. Each row corresponds to an example, # each column to a cluster (optional) Cluster.validity.from.similarity <- function(cluster, Sim.M, AC=TRUE) { c <- length(cluster); # number of clusters # Computing the validity indices vi vi <- Validity.indices(cluster, c, Sim.M); # Computing overall (average) validity of the clustering: ov.vi <- sum(vi)/c; if (AC == TRUE) { ac <- AC.index(cluster, c, Sim.M); res <- list (validity=vi, overall.validity=ov.vi, AC=ac); } else res <- list (validity=vi, overall.validity=ov.vi); return(res) } ########################## # Function to compute the validity index (e.g. the stability index) of each cluster. # It computes the validity index for each individual cluster. # This function is called by \code{Cluster.validity} and \code{Cluster.validity.from.similarity} # Input: # cluster : list of clusters representing a clustering in the original space. Each element of the list is a # vector whose elements are the examples belonging to the cluster. # c : number of cluster # Sim.M : the pairwise similarity matrix # Output: # vi : vector of the validity indices. Each element id the validity index for each cluster. Validity.indices <- function(cluster, c, Sim.M) { vi <- rep(0,c); for (i in 1:c) { n.ex <- length(cluster[[i]]); if (n.ex == 1) { x <- cluster[[i]][1]; vi[i] <- vi[i] + Sim.M[x,x]; } else { for (x1 in 1:(n.ex-1)) { for (x2 in (x1+1):n.ex) { x <- cluster[[i]][x1]; y <- cluster[[i]][x2]; vi[i] <- vi[i] + Sim.M[x,y]; } } } if (n.ex != 1) vi[i] <- vi[i] / (n.ex*(n.ex-1)/2); } return(vi); } ########################## # Assignment confidence index computation. # For a given clustering and similarity matrix, the set of AC indices are computed (for each cluster and each example) # It assumes that the label of the examples are integers. # Input: # cluster is the list of the original clustering whose validity indices will be computed # c : number of clusters # Sim.M : similarity matrix # Output: # ac : a matrix with the Assignment Confidence index for each example. Each row corresponds to an example, # each column to a cluster. AC.index <- function(cluster, c, Sim.M) { ac <- matrix( numeric(nrow(Sim.M)*c), nrow=nrow(Sim.M)); for (i in 1:c) { n.ex <- length(cluster[[i]]); if (n.ex == 1) { x <- cluster[[i]][1]; # the singleton element ac[x,i] <- Sim.M[x,x]; } else { for (x1 in 1:(n.ex)) { x <- cluster[[i]][x1]; # computing ac[x,i] for (x2 in 1:(n.ex)) { y <- cluster[[i]][x2]; if (y!=x) ac[x,i] <- ac[x,i] + Sim.M[x,y]; } ac[x,i] <- ac[x,i] / (n.ex -1); } } } return(ac); } ################################################################### # Functions specific for each clustering algorithm. # They perform multiple clustering on randomly projected data # using a specific clustering algorithm ################################################################### ######### HIERARCHICAL CLUSTERING ############################### ########################## # Multiple Random hierarchical clustering. # Multiple Random hierarchical clusterings are computed using random projections of data. # It assumes that the label of the examples are integers starting from 1 to ncol(M). # Several randomized maps may be used: RS, PMO, Normal and Achliopotas random projections # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # c : number of clusters # pmethod : projection method. It must be one of the following: # "RS" (random subspace projection) # "PMO" (Plus Minus One random projection) # "Norm" (normal random projection) # "Achlioptas" (Achlioptas random projection) # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of RS projections # scale : if TRUE randomized projections are scaled (default) # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list of the n clusterings obtained by randomized hierarchical clustering Multiple.Random.hclustering <- function(M, dim, pmethod="RS", c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { dim.Sim.M <- ncol(M); # A. Perform multiple clusterings using randomized embeddings lRS <- switch(pmethod, RS = RS.hclustering (M, dim, c, hmethod, n, scale, seed, distance), PMO = PMO.hclustering(M, dim, c, hmethod, n, scale, seed, distance), Norm = Norm.hclustering(M, dim, c, hmethod, n, scale, seed, distance), Achlioptas = Achlioptas.hclustering(M, dim, c, hmethod, n, scale, seed, distance)); return(lRS$cluster); } ########################## # Multiple Hierarchical Random Subspace clustering. # Multiple Hierarchical clustering using multiple random subspace (RS) projections of the data. # The function outputs both the corresponding trees and clusterings. # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # c : number of cluster # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of RS projections # scale : if TRUE RS projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. # This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M). # The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each RS projection). RS.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { cl <- list(); tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- random.subspace(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); plot(tr[[i]], main=""); cl[i] <- list(rect.hclust(tr[[i]], k = c)); } l <- list(cluster=cl, tree=tr); l } ########################## # Multiple Hierarchical Plus Mins One (PMO) clustering. # Multiple Hierarchical clusterings using multiple Plus Mins One (PMO) random projections of data. # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # c : number of cluster # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of PMO projections # scale : if TRUE PMO projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. # This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M). # The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each PMO projection). PMO.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { cl <- list(); tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- Plus.Minus.One.random.projection(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); plot(tr[[i]], main=""); cl[i] <- list(rect.hclust(tr[[i]], k = c)); } l <- list(cluster=cl, tree=tr); l } ########################## # Multiple Hierarchical Normal random projections clustering. # Multiple Hierarchical clusterings using multiple normal random projections of the data. # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # c : number of cluster # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of normal random projections # scale : if TRUE normal random projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. # This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M). # The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each normal random projection). Norm.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { cl <- list(); tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- norm.random.projection(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); plot(tr[[i]], main=""); cl[i] <- list(rect.hclust(tr[[i]], k = c)); } l <- list(cluster=cl, tree=tr); l } ########################## # Multiple Hierarchical Achlioptas random projections clustering. # Multiple Hierarchical clusterings using Achlioptas random projections of the data. # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # c : number of cluster # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of normal random projections # scale : if TRUE normal random projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. # This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M). # The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each normal random projection). Achlioptas.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { cl <- list(); tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- Achlioptas.random.projection(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); plot(tr[[i]], main=""); cl[i] <- list(rect.hclust(tr[[i]], k = c)); } l <- list(cluster=cl, tree=tr); l } ########################## # Multiple Hierarchical clusterings using random subspace (RS) projections of data. # The function outputs only the corresponding trees # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of RS projections # scale : if TRUE RS projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each RS projection). RS.hclustering.tree <- function(M, dim, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- random.subspace(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); } tr } ########################## # Multiple Hierarchical clusterings using Achlioptas projections of data. # The function outputs only the corresponding trees # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of RS projections # scale : if TRUE RS projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each Achlioptas projection). Achlioptas.hclustering.tree <- function(M, dim, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- Achlioptas.random.projection(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); } tr } ########################## # Mutltiple Hierarchical clusterings using "Normal" projections of data. # The function outputs only the corresponding trees # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of RS projections # scale : if TRUE RS projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each "Normal" projection). Norm.hclustering.tree <- function(M, dim, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- norm.random.projection(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); } tr } ########################## # Multiple Hierarchical clustering using Achlioptas projections of data. # The function outputs only the corresponding trees # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust # method of the package stats. # n : number of RS projections # scale : if TRUE RS projections are scaled # seed : numerical seed for the random generator # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list of trees as returned by the hclust algorithm: n of such trees are generated (one for # each Achlioptas projection). PMO.hclustering.tree <- function(M, dim, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") { tr <- list(); set.seed(seed); for (i in 1:n) { P.M<- Achlioptas.random.projection(d=dim, M, scaling=scale); if (distance == "euclidean") d <- dist (t(P.M)) else if (distance == "pearson") d <- as.dist(1 - cor(P.M)) else stop("distance measure not implemented"); tr[i] <- list(hclust(d, method = hmethod)); } tr } ########################## # Multiple clusterings generation from the corresponding trees for a given cut (number of clusters) # Input: # tr : a list of trees as returned by the hclust algorithm # c : number of cluster # Output: # A list of vectors, whose elements are the labels of the examples: each vector represents a different cluster. Generate.clusters <- function(tr, c=3) { cl <- list(); n <- length(tr); for (i in 1:n) { plot(tr[[i]], main=""); cl[i] <- list(rect.hclust(tr[[i]], k = c)); } cl } ########################## # Random hierarchical clustering and validity index computation using random projections of data. # This function applies a hierarchical clustering algorithm to the data and then computes stability indices for the # obtained cluster using multiple random subspace projections. # Different hierarchical clusterings may be used (e.g. average, complete and single linkage or the Ward's method) as # well as different randomized maps (e.g. PMO, Achlioptas, Normal, Random Subspace projections). # It assumes that the label of the examples are integer starting from 1 to ncol(M). # Input: # M : matrix of data: rows are variables and columns are examples # dim : subspace dimension # c : number of cluster # pmethod : projection method. It must be one of the following: # "RS" (random subspace projection) # "PMO" (Plus Minus One random projection) # "Norm" (normal random projection) # "Achlioptas" (Achlioptas random projection) # hmethod : the agglomeration method to be used. This should be one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of the hclust # method of the package stats. # n : number of RS projections # scale : if TRUE randomized projections are scaled # seed : numerical seed for the random generator # AC: if TRUE (default) the AC indices are computed. # distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation) # Output: # a list with eight components: "validity", "overall.validity", "similarity.matrix", "dim", # "cluster", "tree", "orig.tree", "orig.cluster": # "validity" is a vector with the validity of each of the c clusters; # "overall.validity" is the validity index of the overall cluster # "similarity.matrix" is the pairwise similarity matrix between examples. # "dimension" is the dimension of the random subspace dimension. # "cluster" is the list of the n clustering obtained by randomized hierarchical clustering # "tree" is the list of the n trees obtained by RS hierarchical clustering # "orig.tree" is the tree built by hclust in the original space # "orig.cluster" is the list of the clusters in the original space Random.hclustering.validity <- function(M, dim, pmethod="RS", c=3, hmethod="average", n=50, scale=TRUE, seed=100, AC=TRUE, distance="euclidean") { dim.Sim.M <- ncol(M); # A. Perform multiple clusterings using randomized embeddings lRS <- switch(pmethod, RS = RS.hclustering (M, dim, c, hmethod, n, scale, seed, distance), PMO = PMO.hclustering(M, dim, c, hmethod, n, scale, seed, distance), Norm = Norm.hclustering(M, dim, c, hmethod, n, scale, seed, distance), Achlioptas = Achlioptas.hclustering(M, dim, c, hmethod, n, scale, seed, distance)); # B. Update similarity matrix Sim.M <- Do.similarity.matrix(lRS$cluster, dim.Sim.M); # computing the list of validity measures # C. computing the clusters in the original space if (distance == "euclidean") d <- dist (t(M)) else if (distance == "pearson") d <- as.dist(1 - cor(M)) else stop("Random.hclustering.validity: distance measure not implemented"); tree <- hclust(d, method = hmethod); plot(tree, main=""); cl.orig <- rect.hclust(tree, k = c); # D. computing the validity indices vi vi <- Validity.indices(cl.orig, c, Sim.M); # E. Computing overall (average) validity of the clustering: ov.vi <- sum(vi)/c; if (AC == TRUE) { ac <- AC.index(cl.orig, c, Sim.M); res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M, dimension=dim, cluster=lRS$cluster, tree=lRS$tree, orig.tree=tree, orig.cluster=cl.orig, AC=ac); } else res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M, dimension=dim, cluster=lRS$cluster, tree=lRS$tree, orig.tree=tree, orig.cluster=cl.orig); return(res); } ############################################################################## ##############################################################################