################################################################
# R source code for random projections
# January 2005
################################################################
library(stats)
library(MASS)

#source("rData.R");


##############################################################################
# Random projections
##############################################################################

# Generation of the vector containing the indices randomly selected.
# It is used by the function random.subspace
# Input:
# d : subspace dimension
# d.original : dimension of the space from which components are randomly selected 
# Output:
# vector of the selected features: it contain the indices of the components randomly selected
random.component.selection <- function(d=2, d.original=10)     {
  selected.features <- numeric(d);
  n.feat <- d.original+1;
  feat <- floor(runif(1,1,n.feat));
  selected.features[1] <- feat;
  for (i in 2:d) {
    present <- TRUE;
    while(present)  {
      feat <- floor(runif(1,1,n.feat));
      for (j in 1:(i-1)) {
        if (selected.features[j] == feat)
          break;
      }
      if ((j==i-1) && (selected.features[j] != feat)) {
        present<-FALSE;
        selected.features[i] <- feat;    
      }    
    }
  } 
  selected.features
} 

# Random projections to a lower dimension subspace (random subspace method)
# The projection is performed randomly selecting a subset of variables (components) and then projecting 
# the data onto the selected components.
# It is the projection used by Ho for its Random subspace ensemble algorithm. 
# Input:
# d : subspace dimension
# m : data matrix (rows are features and columns are examples)
# scaling : if TRUE (default) scaling is performed
# Output:
# data matrix (dimension d X ncol(m)) of the examples projected in a d-dimensional random subspace
random.subspace <- function(d=2, m, scaling=TRUE){
  d.original <- nrow(m);
  if (d >= d.original)
    stop("random.subspace: subspace dimension must be lower than space dimension", call.=FALSE);
  # generation of the vector selected.features  containing the indices randomly selected
  selected.features <- random.component.selection(d, d.original);

  # random data projection
  if (scaling == TRUE)
    reduced.m <- sqrt(d.original/d) * m[selected.features,]
  else 
    reduced.m <- m[selected.features,];
  reduced.m 
}


# Random projections to a lower dimension subspace with a normal distributed projection matrix
# The projection is performed using a normally distributed projection matrix P: each P[i,j]  ~ N(0,1).
# Input:
# d : subspace dimension
# m : data matrix (rows are features and columns are examples)
# scaling : if TRUE (default) scaling is performed
# Output:
# data matrix (dimension d X ncol(m)) of the examples projected in a d-dimensional random subspace
norm.random.projection <- function(d=2, m, scaling=TRUE){
  d.original <- nrow(m);
  if (d >= d.original)
    stop("norm.random.projection: subspace dimension must be lower than space dimension", call.=FALSE);
  # Projection matrix
  P <- rnorm(d*d.original);
  P <- matrix(P, nrow=d);
  
  # random data projection
  if (scaling == TRUE)
    reduced.m <- sqrt(1/d) * (P%*%m)
  else 
    reduced.m <- P%*%m;
  reduced.m 
}



# Random projections to a lower dimensional subspace with a random +1/-1 projection matrix
# The projection is performed using a  projection matrix P s.t. Prob(P[i,j]=1)=Prob(P[i,j]=-1)=1/2
# Input:
# d : subspace dimension
# m : data matrix (rows are features and columns are examples)
# scaling : if TRUE (default) scaling is performed
# Output:
# data matrix (dimension d X ncol(m)) of the examples projected in a d-dimensional random subspace
Plus.Minus.One.random.projection <- function(d=2, m, scaling=TRUE){
  d.original <- nrow(m);
  if (d >= d.original)
    stop("norm.random.projection: subspace dimension must be lower than space dimension", call.=FALSE);
  # Projection matrix
  P <- floor(runif(d*d.original,1,3)); # generate a vector 1-2 valued
  P[P==2] <- -1;
  P <- matrix(P, nrow=d);
  
  # random data projection
  if (scaling == TRUE)
    reduced.m <- (P%*%m) * sqrt(1/d)
  else 
    reduced.m <- P%*%m;
  reduced.m
}


# Random projections to a lower dimension subspace with the Achlioptas' projection matrix
# The projection is performed using a  projection matrix P s.t. Prob(P[i,j]=sqrt(3))=Prob(P[i,j]=-sqrt(3)=1/6;
# Prob(P[i,j]=0)=2/3
# Input:
# d : subspace dimension
# m : data matrix (rows are features and columns are examples)
# scaling : if TRUE (default) scaling is performed
# Output:
# data matrix (dimension d X ncol(m)) of the examples projected in a d-dimensional random subspace
Achlioptas.random.projection <- function(d=2, m, scaling=TRUE){
  d.original <- nrow(m);
  if (d >= d.original)
    stop("norm.random.projection: subspace dimension must be lower than space dimension", call.=FALSE);
  # Projection matrix
  P <- floor(runif(d*d.original,1,7)); # generate a vector 1 to 6 valued
  sqr3 <- sqrt(3);
  P[P==1] <- sqr3;
  P[P==6] <- -sqr3;
  P[P==2 | P==3 | P==4 | P==5] <- 0;
  P <- matrix(P, nrow=d);
  
  # random data projection
  if (scaling == TRUE)
    reduced.m <- sqrt(1/d) * (P%*%m)
  else 
    reduced.m <- P%*%m;;
  reduced.m 
}


###########################################################
# Distortion measures
###########################################################

# It computes the maximum expansion achieved by projecting from the original to the reduced space.
# expansion = max_{u,v \in S} \frac {|| f(u) - f(v) ||} {|| u - v ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# expansion
Max.Expansion <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  exps <- max(d.rid/d);
  exps
}

# It computes the minimum expansion achieved by projecting from the original to the reduced space.
# min.expansion = min_{u,v \in S} \frac {|| f(u) - f(v) ||} {|| u - v ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# minimum expansion
Min.Expansion <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  min.exps <- min(d.rid/d);
  min.exps
}

# It computes the maximum and minimum expansion achieved by projecting from the original to the reduced space.
# max.expansion = max_{u,v \in S} \frac {|| f(u) - f(v) ||} {|| u - v ||}
# min.expansion = min_{u,v \in S} \frac {|| f(u) - f(v) ||} {|| u - v ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# maximum and minimum expansion (a vector with 2 components)
Max.Min.Expansion <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  max.exps <- max(d.rid/d);
  min.exps <- min(d.rid/d);
  exps <- c(max.exps,min.exps);
  exps
}



# It computes the average expansion achieved by projecting from the original to the reduced space.
# average.expansion = 1/(|S|*(|S|-1)/2)  sum_{u,v \in S} \frac{|| f(u) - f(v) ||}{|| u - v ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# average.expansion
Average.Expansion <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  s <- length(d);
  average.expansion <- (1/s) * sum(d.rid/d);
  average.expansion  
}


# It computes the maximum contraction achieved by projecting from the original to the reduced space.
# contraction = max_{u,v \in S} \frac{|| u - v ||}{|| f(u) - f(v) ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# contraction
Max.Contraction <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  contr <- max(d/d.rid);
  contr
}

# It computes the maximum and minimum contraction achieved by projecting from the original to the reduced space.
# max.contraction = max_{u,v \in S} \frac{|| u - v ||}{|| f(u) - f(v) ||}
# min.contraction = min_{u,v \in S} \frac{|| u - v ||}{|| f(u) - f(v) ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# max and min contraction (a vector with 2 components)
Max.Min.Contraction <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  max.contr <- max(d/d.rid);
  min.contr <- min(d/d.rid);
  contr <- c(max.contr,min.contr);
  contr
}

# It computes the average contraction achieved by projecting from the original to the reduced space.
# average.contraction = 1/(|S|*(|S|-1)/2)  sum_{u,v \in S} \frac{|| u - v ||}{|| f(u) - f(v) ||}
# Input:
# m : data matrix in the original space (rows are are examples, columns are components)
# m.rid : data matrix in the reduced space (rows are are examples, columns are components)
# Output:
# average.contraction
Average.Contraction <- function(m, m.rid) {
  d <- dist(m); # it computes the euclidean distance between all pairs of rows of m
  d.rid <- dist(m.rid);
  s <- length(d);
  average.contraction <- (1/s) * sum(d/d.rid);
  average.contraction  
}


# Prediction of the dimension of random projection we need to obtain a given distortion according to JL lemma
# Input:
# n : cardinality of the data
# epsilon : distortion (0 < epsilon <= 0.5)
# Output:
# minimum dimension
JL.predict.dim <- function(n, epsilon=0.5) {
  d <- 4 * (log(n) / epsilon^2);
  ceiling(d)
}

# Prediction of the dimension of random projection we need to obtain a given distortion according to JL lemma
# when t multiple projections are performed
# Input:
# n : cardinality of the data
# epsilon : distortion (0 < epsilon <= 0.5)
# t : number of multiple projections
# Output:
# minimum dimension
JL.predict.dim.multiple <- function(n, epsilon=0.5, t=10) {
  d <- 4 * ((log(n)+log(t)) / epsilon^2);
  ceiling(d)
}

# Prediction of the minimum distortion of random projection for a given subspace dimension according to JL lemma
# Input:
# n : cardinality of the data
# dim : dimension of the projected subspace
# Output:
# minimum distortion
JL.predict.distortion <- function(n, dim=10) {
  epsilon <- sqrt(4 * log(n) / dim);
  epsilon
}