##############################################################################################################
# example-synthetic.R
# September 2006
# 
# Example script  to evaluate the number of clusters of the data set generated through 
# the synthetic data generator generate.sample6 using mosclust with 2 clustering 
# algorithms and perturbation by resampling, addition of noise and random projections
##############################################################################################################
library(mosclust);

# parameters
nsubsamples <- 10;  # number of pairs of clusterings to be evaluated
max.num.clust <- 10; # maximum number of cluster to be evaluated
fract.resampled <- 0.8; # fraction of samples to subsampled
dim.projection <- JL.predict.dim(120,epsilon=0.2);

# Data set generation (this funnction is available in the clusterv package)
M <- generate.sample6 (n=20, m=10, dim=500, d=3, s=0.2);


##################################################
# Analysis with the PAM algorithm

# 1. Perturbation by resampling and computation of the similarity matrix
Sr.PAM.sample6 <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, 
                                      alg.clust.sim=PAM.sim.resampling);
																			
# Computation of the stability indices and the p-values according to the chi square-based test 
dr.PAM.sample6 <- Chi.square.compute.pvalues(Sr.PAM.sample6);

# Test of hypothesis to individuate clustering solution at 0.01 significance level
hr.PAM.sample6 <- Hypothesis.testing(dr.PAM.sample6, alpha=0.01);

# 2. Perturbation by noise and computation of the similarity matrix
Sn.PAM.sample6 <- do.similarity.noise(M, c=max.num.clust, nnoisy=nsubsamples, perc=0.5, s=sFM, 
                                      alg.clust.sim=PAM.sim.noise);
																			
# Computation of the stability indices and the p-values according to the chi square-based test
dn.PAM.sample6 <- Chi.square.compute.pvalues(Sn.PAM.sample6);

# Test of hypothesis to individuate clustering solution at 0.01 significance level
hn.PAM.sample6 <- Hypothesis.testing(dn.PAM.sample6, alpha=0.01);

# 3. Perturbation by random projections and computation of the similarity matrix
Sp.PAM.sample6 <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, s=sFM, 
                                      alg.clust.sim=PAM.sim.projection);
																			
# Computation of the stability indices and the p-values according to the chi square-based test
dp.PAM.sample6 <- Chi.square.compute.pvalues(Sp.PAM.sample6);

# Test of hypothesis to individuate clustering solution at 0.01 significance level
hp.PAM.sample6 <- Hypothesis.testing(dp.PAM.sample6, alpha=0.01);

# saving objects 
save(Sr.PAM.sample6, dr.PAM.sample6, hr.PAM.sample6, Sn.PAM.sample6, dn.PAM.sample6, hn.PAM.sample6,
Sp.PAM.sample6, dp.PAM.sample6, hp.PAM.sample6, file="sample6.PAM.objects");


##################################################
# Analysis with the k-means algorithm

# 1. Perturbation by resampling and computation of the similarity matrix
Sr.Kmeans.sample6 <- do.similarity.resampling(M, c=max.num.clust, nsub=nsubsamples, f=fract.resampled, s=sFM, 
                                      alg.clust.sim=Kmeans.sim.resampling);

# Computation of the stability indices and the p-values according to the chi square-based test
dr.Kmeans.sample6 <- Chi.square.compute.pvalues(Sr.Kmeans.sample6);

# Test of hypothesis to individuate clustering solution at 0.01 significance level
hr.Kmeans.sample6 <- Hypothesis.testing(dr.Kmeans.sample6, alpha=0.01);

# 2. Perturbation by noise and computation of the similarity matrix
Sn.Kmeans.sample6 <- do.similarity.noise(M, c=max.num.clust, nnoisy=nsubsamples, perc=0.5, s=sFM, 
                                      alg.clust.sim=Kmeans.sim.noise);
# Computation of the stability indices and the p-values according to the chi square-based test
dn.Kmeans.sample6 <- Chi.square.compute.pvalues(Sn.Kmeans.sample6);

# Test of hypothesis to individuate clustering solution at 0.01 significance level
hn.Kmeans.sample6 <- Hypothesis.testing(dn.Kmeans.sample6, alpha=0.01);

# 3. Perturbation by random projections and computation of the similarity matrix
Sp.Kmeans.sample6 <- do.similarity.projection(M, c=max.num.clust, nprojections=nsubsamples, s=sFM, 
                                      alg.clust.sim=Kmeans.sim.projection);	
																																					
# Computation of the stability indices and the p-values according to the chi square-based test
dp.Kmeans.sample6 <- Chi.square.compute.pvalues(Sp.Kmeans.sample6);

# Test of hypothesis to individuate clustering solution at 0.01 significance level
hp.Kmeans.sample6 <- Hypothesis.testing(dp.Kmeans.sample6, alpha=0.01);

# saving objects 
save(Sr.Kmeans.sample6, dr.Kmeans.sample6, hr.Kmeans.sample6, Sn.Kmeans.sample6, dn.Kmeans.sample6, hn.Kmeans.sample6,
Sp.Kmeans.sample6, dp.Kmeans.sample6, hp.Kmeans.sample6, file="sample6.Kmeans.objects");