##############################################################################################################
# clusterv.R
# January 2005
# modified June 2005 and August 2005 and February 2006 and August 2006
# Set of functions for computing cluster validity indices based on random projections
##############################################################################################################

library(stats);
source("rp.R");
source("rData.R");
source("clusterv2.R");

##############################################################################
# Function to compute and build up a pairwise similarity matrix when a clustering perform a partition of the data
# Each clustering used to compute the pairwise similarity matrix may have different number of clusters
# Input:
# l : list of clusterings
# Output:
# Sim.M : the pairwise similarity matrix whose elements represents how much 2 examples fall in the same cluster across multiple 
#         clusterings. Each element of Sim.M is normalized so that its value is beween 0 and 1.
Do.similarity.matrix.partition <- function(l) {
  	
	dim.Sim.M = 0; # dimension of the similarity matrix (to be computed)
	n <- length(l);  # number of projections (clusterings)
	cl <- l[[1]]; # first clustering selected
  c <- length(cl); # number of clusters
  for (j in 1:c) 
		dim.Sim.M <- dim.Sim.M + length(cl[[j]]); # summing up the elements of the clusters	
	Sim.M <- matrix(numeric(dim.Sim.M*dim.Sim.M), nrow=dim.Sim.M);
	singletons <- numeric(dim.Sim.M);  	
	
	for (i in 1:n)  {
	   cl <- l[[i]];
		 c <- length(cl); # number of clusters for the current clustering
	   for (j in 1:c) {
		   n.ex <- length(cl[[j]]);
			if (n.ex == 1)
			  singletons[cl[[j]][1]] <- singletons[cl[[j]][1]] + 1
			else {
			    for (x1 in 1:(n.ex-1)) {
			       for (x2 in (x1+1):n.ex) {
			         x <- cl[[j]][x1];
				       y <- cl[[j]][x2];
			         Sim.M[x,y] <- Sim.M[x,y] + 1;
			       }
			    }
			 }
		 }
	}
	for (x1 in 1:(dim.Sim.M-1)) 
		for (x2 in (x1+1):dim.Sim.M) 
		  Sim.M[x2,x1] <- Sim.M[x1,x2];
	for (x in 1:(dim.Sim.M)) 
	   Sim.M[x,x] <- singletons[x];
	Sim.M <- Sim.M / n;
	return(Sim.M);
}


##############################################################################
# Function to compute and build up a pairwise similarity matrix.
# This function may be used also with clusterings that do not define strictly a partition of the data and using
# variable number of clusters for each clustering.
# Input:
# l : list of clusterings
# dim.Sim.M : dimension of the similarity matrix (number of examples)
# Output:
# Sim.M : the pairwise similarity matrix whose elements represents how much 2 examples fall in the same cluster across multiple 
#         clusterings. Each element of Sim.M is normalized so that its value is beween 0 and 1.
Do.similarity.matrix <- function(l, dim.Sim.M) {
  Sim.M <- matrix(numeric(dim.Sim.M*dim.Sim.M), nrow=dim.Sim.M);
	singletons <- numeric(dim.Sim.M);  
	n <- length(l);  # number of projections (clusterings)
	for (i in 1:n)  {
	   cl <- l[[i]];
		 c <- length(cl); # number of clusters for the current clustering
	   for (j in 1:c) {
		   n.ex <- length(cl[[j]]);
			if (n.ex == 1)
			  singletons[cl[[j]][1]] <- singletons[cl[[j]][1]] + 1
			else {
			    for (x1 in 1:(n.ex-1)) {
			       for (x2 in (x1+1):n.ex) {
			         x <- cl[[j]][x1];
				       y <- cl[[j]][x2];
			         Sim.M[x,y] <- Sim.M[x,y] + 1;
			       }
			    }
			 }
		 }
	}
	for (x1 in 1:(dim.Sim.M-1)) 
		for (x2 in (x1+1):dim.Sim.M) 
		  Sim.M[x2,x1] <- Sim.M[x1,x2];
	for (x in 1:(dim.Sim.M)) 
	   Sim.M[x,x] <- singletons[x];
	Sim.M <- Sim.M / n;
	return(Sim.M);
}


##########################
# Validity indices computation.
# It assumes that the label of the examples are integers. It computes the stability indices for each individual cluster,
# the overall validity index of the clustering and (optionally) the Assignnment Confidence (AC) index for each example.
# To compute the indices a set of clusterings is used.
# Input:
# cluster is the list of the original clustering whose validity indices will be computed
# M.clusters is the list of the n clusterings (a list of lists) used for validity index computation
# AC is a boolean variable: if it is TRUE the Assignment Confidence index for each example is computed
# Output:
# a list with four components: "validity", "overall.validity", "similarity.matrix", "AC" (optional):
# "validity" is a vector with the validity of each of the c clusters;
# "overall.validity" is the validity index of the overall cluster
# "similarity.matrix" is the pairwise similarity matrix between examples.
# "AC" is a matrix with the Assignment Confidence index for each example. Each row corresponds to an example, 
#  each column to a cluster.
Cluster.validity <- function(cluster, M.clusters, AC=FALSE) {
  dim.Sim.M <- 0;
	c <- length(cluster);
	for (i in 1:c)
	  dim.Sim.M <- dim.Sim.M + length(cluster[[i]]);
	Sim.M <- Do.similarity.matrix(M.clusters, dim.Sim.M);
	vi <- Validity.indices(cluster, c, Sim.M);
	ov.vi <- sum(vi)/c;	
	
	if (AC == TRUE) {
	  ac <- AC.index(cluster, c, Sim.M);		
		res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M, AC=ac);	
	}														 
	else
	  res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M);
	return(res)	
}


##########################
# Validity indices computation using a clustering and a similarity matrix
# It assumes that the label of the examples are integers. It computes the stability indices for each individual cluster,
#  overall validity index of the clustering and (optionally) the Assignnment Confidence (AC) index for each example.
# To compute the indices  a similarity matrix is used.
# Input:
# cluster : the list of the original clustering whose validity indices will be computed
# Sim.M : similarity matrix
# AC is a boolean variable: if it is TRUE the Assignment Confidence index for each example is computed
# Output:
# a list with three components: "validity", "overall.validity", "AC":
# "validity" is a vector with the validity of each of the c clusters;
# "overall.validity" is the validity index of the overall cluster
# "AC" is a matrix with the Assignment Confidence index for each example. Each row corresponds to an example, 
#  each column to a cluster (optional)
Cluster.validity.from.similarity <- function(cluster, Sim.M, AC=TRUE) {
	
	c <- length(cluster); # number of clusters
	# Computing the validity indices vi
	vi <- Validity.indices(cluster, c, Sim.M);
	
	# Computing overall (average) validity of the clustering:
	ov.vi <- sum(vi)/c;	
	
	if (AC == TRUE) {
	  ac <- AC.index(cluster, c, Sim.M);		
		res <- list (validity=vi, overall.validity=ov.vi, AC=ac);	
	}														 
	else
	  res <- list (validity=vi, overall.validity=ov.vi);
	return(res)	
}


##########################
# Function to compute the validity index (e.g. the stability index) of each cluster.
# It computes the validity index for each individual cluster.
# This function is  called by \code{Cluster.validity} and \code{Cluster.validity.from.similarity}
# Input:
# cluster : list of clusters representing a clustering in the original space. Each element of the list is a         
#           vector whose elements are the examples belonging to the cluster.         
# c : number of cluster
# Sim.M : the pairwise similarity matrix
# Output:
# vi : vector of the validity indices. Each element id the validity index for each cluster.
Validity.indices <- function(cluster, c, Sim.M) {
	vi <- rep(0,c);
	for (i in 1:c) {
	  n.ex <- length(cluster[[i]]);
		if (n.ex == 1) {
		  x <- cluster[[i]][1];
		  vi[i] <- vi[i] + Sim.M[x,x];
		}
		else {															 
		  for (x1 in 1:(n.ex-1)) {                        
		     for (x2 in (x1+1):n.ex) {                    
			     x <- cluster[[i]][x1]; 									  
			     y <- cluster[[i]][x2];        						  
		       vi[i] <- vi[i] + Sim.M[x,y];                
		     }                                            
		  }                                           
		}
		if (n.ex != 1) 
		  vi[i] <- vi[i] / (n.ex*(n.ex-1)/2);
	}
	return(vi);		
}


##########################
# Assignment confidence index computation.
# For a given clustering and similarity matrix, the set of AC indices are computed (for each cluster and each example)
# It assumes that the label of the examples are integers.
# Input:
# cluster is the list of the original clustering whose validity indices will be computed
# c : number of clusters
# Sim.M : similarity matrix
# Output:
# ac :  a matrix with the Assignment Confidence index for each example. Each row corresponds to an example, 
#  each column to a cluster.
AC.index <- function(cluster, c, Sim.M) {
  ac <- matrix( numeric(nrow(Sim.M)*c), nrow=nrow(Sim.M));
	for (i in 1:c) {
	  n.ex <- length(cluster[[i]]);
		if (n.ex == 1) {
		  x <- cluster[[i]][1]; # the singleton element
		  ac[x,i] <- Sim.M[x,x];
		}
		else {															 
		  for (x1 in 1:(n.ex)) {    
			   x <- cluster[[i]][x1];  
				 # computing ac[x,i]                   
		     for (x2 in 1:(n.ex)) {                    			     									  
			     y <- cluster[[i]][x2]; 
					 if (y!=x)       						  
		         ac[x,i] <- ac[x,i] +  Sim.M[x,y];           
		     }
				 ac[x,i] <- ac[x,i] / (n.ex -1);                                         
		  }                                           
		}		
	}
	return(ac);									
}


###################################################################
#  Functions specific for each clustering algorithm.
#  They perform multiple clustering on randomly projected data 
#  using a specific clustering algorithm
###################################################################

######### HIERARCHICAL CLUSTERING ###############################

##########################
# Multiple Random hierarchical clustering.
# Multiple Random hierarchical clusterings are computed using random projections of data.
# It assumes that the label of the examples are integers starting from 1 to ncol(M).
# Several randomized maps may be used: RS, PMO, Normal and Achliopotas random projections
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# c : number of clusters
# pmethod : projection method. It must be one of the following: 
# "RS" (random subspace projection)
# "PMO" (Plus Minus One random projection)
# "Norm" (normal random projection)
# "Achlioptas" (Achlioptas random projection)
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE randomized projections are scaled (default)
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list  of the n clusterings obtained by randomized hierarchical clustering 
Multiple.Random.hclustering <- function(M, dim, pmethod="RS", c=3, hmethod="average", n=50, scale=TRUE, seed=100, 
                                                                                           distance="euclidean") {
  dim.Sim.M <- ncol(M);
	# A. Perform multiple clusterings using randomized embeddings
	lRS <- switch(pmethod,
	              RS = RS.hclustering (M, dim, c, hmethod, n, scale, seed, distance),
								PMO = PMO.hclustering(M, dim, c, hmethod, n, scale, seed, distance),
								Norm = Norm.hclustering(M, dim, c, hmethod, n, scale, seed, distance),
								Achlioptas = Achlioptas.hclustering(M, dim, c, hmethod, n, scale, seed, distance));
	
	return(lRS$cluster);								 
}

##########################
# Multiple Hierarchical Random Subspace clustering. 
# Multiple Hierarchical clustering using multiple random subspace (RS) projections of the data. 
# The function outputs both the corresponding trees and clusterings.
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# c : number of cluster
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE RS projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. 
# This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M).
# The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each RS projection).
RS.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
  cl <- list();
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- random.subspace(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
		plot(tr[[i]], main="");
	  cl[i] <- list(rect.hclust(tr[[i]], k = c));
	}
  l <- list(cluster=cl, tree=tr);
  l
}

##########################
# Multiple Hierarchical Plus Mins One (PMO)  clustering. 
# Multiple Hierarchical clusterings using multiple Plus Mins One (PMO) random projections of data.
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# c : number of cluster
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of PMO projections
# scale : if TRUE PMO projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. 
# This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M).
# The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each PMO projection).
PMO.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
  cl <- list();
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- Plus.Minus.One.random.projection(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
		plot(tr[[i]], main="");
	  cl[i] <- list(rect.hclust(tr[[i]], k = c));
	}
  l <- list(cluster=cl, tree=tr);
  l
}


##########################
# Multiple Hierarchical Normal random projections  clustering. 
# Multiple Hierarchical clusterings using multiple normal random projections of the data.
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# c : number of cluster
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of normal random projections
# scale : if TRUE normal random projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. 
# This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M).
# The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each normal random projection).
Norm.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
  cl <- list();
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- norm.random.projection(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
		plot(tr[[i]], main="");
	  cl[i] <- list(rect.hclust(tr[[i]], k = c));
	}
  l <- list(cluster=cl, tree=tr);
  l
}


##########################
# Multiple Hierarchical Achlioptas random projections  clustering. 
# Multiple Hierarchical clusterings using Achlioptas random projections of the data.
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# c : number of cluster
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of normal random projections
# scale : if TRUE normal random projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list l with components "cluster" and "tree". The cluster component is the the lists of the n clusterings obtained. 
# This list is a set of vectors, whose elements are the labels of the examples (columns of the data matrix M).
# The component "tree" is a list of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each normal random projection).
Achlioptas.hclustering <- function(M, dim, c=3, hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
  cl <- list();
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- Achlioptas.random.projection(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
		plot(tr[[i]], main="");
	  cl[i] <- list(rect.hclust(tr[[i]], k = c));
	}
  l <- list(cluster=cl, tree=tr);
  l
}


##########################
# Multiple Hierarchical clusterings using random subspace (RS) projections of data. 
# The function outputs only the corresponding trees
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE RS projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list  of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each RS projection).
RS.hclustering.tree <- function(M, dim,  hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- random.subspace(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
	}
  tr
}

##########################
# Multiple Hierarchical clusterings using Achlioptas projections of data. 
# The function outputs only the corresponding trees
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE RS projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list  of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each Achlioptas projection).
Achlioptas.hclustering.tree <- function(M, dim,  hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- Achlioptas.random.projection(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
	}
  tr
}

##########################
# Mutltiple Hierarchical clusterings using "Normal" projections of data. 
# The function outputs only the corresponding trees
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE RS projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list  of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each "Normal" projection).
Norm.hclustering.tree <- function(M, dim,  hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- norm.random.projection(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
	}
  tr
}

##########################
# Multiple Hierarchical clustering using Achlioptas projections of data. 
# The function outputs only the corresponding trees
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of he hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE RS projections are scaled 
# seed : numerical seed for the random generator
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list  of trees as returned by the hclust algorithm: n of such trees are generated (one for
# each Achlioptas projection).
PMO.hclustering.tree <- function(M, dim,  hmethod="average", n=50, scale=TRUE, seed=100, distance="euclidean") {
	tr <- list();
	set.seed(seed);
  for (i in 1:n) {
	  P.M<- Achlioptas.random.projection(d=dim, M, scaling=scale);
		if (distance == "euclidean")
		  d <- dist (t(P.M))
		else if (distance == "pearson")
			d <- as.dist(1 - cor(P.M))
	  else
	    stop("distance measure not implemented");
	  tr[i] <- list(hclust(d, method = hmethod));
	}
  tr
}

##########################
# Multiple clusterings generation from the corresponding trees for a given cut (number of clusters)
# Input:
# tr : a list  of trees as returned by the hclust algorithm
# c : number of cluster
# Output:
# A list  of vectors, whose elements are the labels of the examples: each vector represents a different cluster.
Generate.clusters <- function(tr, c=3) {
  cl <- list();	
	n <- length(tr);
  for (i in 1:n) {
		plot(tr[[i]], main="");
	  cl[i] <- list(rect.hclust(tr[[i]], k = c));
	}
  cl
}


##########################
# Random hierarchical clustering and validity index computation using random projections of data.
# This function applies a hierarchical clustering algorithm to the data and then computes stability indices for the
# obtained cluster using multiple random subspace projections.
# Different hierarchical clusterings may be used (e.g. average, complete and single linkage or the Ward's method) as
# well as different randomized maps (e.g. PMO, Achlioptas, Normal, Random Subspace projections).
# It assumes that the label of the examples are integer starting from 1 to ncol(M).
# Input:
# M : matrix of data: rows are variables and columns are examples
# dim : subspace dimension
# c : number of cluster
# pmethod : projection method. It must be one of the following: 
# "RS" (random subspace projection)
# "PMO" (Plus Minus One random projection)
# "Norm" (normal random projection)
# "Achlioptas" (Achlioptas random projection)
# hmethod : the agglomeration method to be used. This should be one of 
#          "ward", "single", "complete", "average", "mcquitty", "median" or "centroid", according of the hclust
#          method of the package stats.
# n : number of RS projections
# scale : if TRUE randomized projections are scaled 
# seed : numerical seed for the random generator
# AC:   if TRUE (default) the AC indices are computed.
# distance : it must be one of the two: "euclidean" (default) or "pearson" (that is 1 - Pearson correlation)
# Output:
# a list with eight components: "validity", "overall.validity", "similarity.matrix", "dim", 
# "cluster", "tree", "orig.tree", "orig.cluster":
# "validity" is a vector with the validity of each of the c clusters;
# "overall.validity" is the validity index of the overall cluster
# "similarity.matrix" is the pairwise similarity matrix between examples.
# "dimension" is the dimension of the random subspace dimension.
# "cluster" is the list of the n clustering obtained by randomized hierarchical clustering
# "tree" is the list of the n trees obtained by RS hierarchical clustering
# "orig.tree" is the tree built by hclust in the original space
# "orig.cluster" is the list of the clusters in the original space
Random.hclustering.validity <- function(M, dim, pmethod="RS", c=3, hmethod="average", n=50, scale=TRUE, seed=100, AC=TRUE, distance="euclidean") {
  dim.Sim.M <- ncol(M);
	# A. Perform multiple clusterings using randomized embeddings
	lRS <- switch(pmethod,
	              RS = RS.hclustering (M, dim, c, hmethod, n, scale, seed, distance),
								PMO = PMO.hclustering(M, dim, c, hmethod, n, scale, seed, distance),
								Norm = Norm.hclustering(M, dim, c, hmethod, n, scale, seed, distance),
								Achlioptas = Achlioptas.hclustering(M, dim, c, hmethod, n, scale, seed, distance));
	# B. Update similarity matrix
	Sim.M <- Do.similarity.matrix(lRS$cluster, dim.Sim.M);
	
	# computing the list of validity measures
	# C. computing the clusters in the original space
	if (distance == "euclidean")
		  d <- dist (t(M))
	else if (distance == "pearson")
			d <- as.dist(1 - cor(M))
	else
	    stop("Random.hclustering.validity: distance measure not implemented");
	tree <- hclust(d, method = hmethod);
  plot(tree, main="");
	cl.orig <- rect.hclust(tree, k = c);
	# D. computing the validity indices vi
	vi <- Validity.indices(cl.orig, c, Sim.M);
	
	# E. Computing overall (average) validity of the clustering:
	ov.vi <- sum(vi)/c;	
	
	if (AC == TRUE) {
	  ac <- AC.index(cl.orig, c, Sim.M);		
		res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M, dimension=dim, 
	             cluster=lRS$cluster, tree=lRS$tree, orig.tree=tree, orig.cluster=cl.orig, AC=ac);
	}														 
	else															 
	  res <- list (validity=vi, overall.validity=ov.vi, similarity.matrix=Sim.M, dimension=dim, 
	             cluster=lRS$cluster, tree=lRS$tree, orig.tree=tree, orig.cluster=cl.orig);
	return(res);	
}


##############################################################################
##############################################################################