# April 2016 # High level function to select the best F-score by choosing an appropriate threshold on the flat scores matrix. # INPUT: # flat.file: name of the flat scores matrix (without rda extension). # ann.file: name of the target labels (without rda extension). It must be an .rda file containing the label matrix of the examples (def: ann.file) # dag.file: name of the graph that represents the hierarchy of the classes. # n.round: number of rounding digits to be applied to the hierarchical scores matrix (def. 3). # It's used for choosing the best threshold on the basis of the best F.measure (see f.criterion parameter). # f.criterion: character. Type of F-measure to be used to select the best F.measure. There are 2 possibilities: # 1. "F" (default) corresponds to the harmonic mean between the average precision and recall; # 2. "avF" corresponds to the per-example F-score averaged across all the examples. # verbose: boolean. If TRUE the number of iterations are printed on stdout, FALSE (def) otherwise. # flat.dir: relative path to folder where flat normalized scores matrix is stored # ann.dir: relative path to folder where annotation matrix is stored # dag.dir: relative path to folder where graph is stored # Fmeas.dir: relative path to folder where example-centric measures (i.e. Precision, Recall, Specificity, F-measure, Accuracy across example) are stored # OUTPUT: # an rda files stored in macro.dir folder contains Example-centric measures computed through find.best.f from F-hier.R file Do.FLAT.best.F.score <- function(flat.file=flat.file, ann.file=ann.file, dag.file=dag.file, n.round=3, f.criterion ="F", verbose=FALSE, b.per.example=TRUE, flat.dir=flat.dir, ann.dir=ann.dir, dag.dir=dag.dir, Fmeas.dir="Fmeas.dir/" ){ ## Loading Data ############ ## loading hpo dag dag.path <- paste0(dag.dir, dag.file,".rda"); hpo <- get(load(dag.path)); ##root node root <- root.node(hpo); ## loading flat scores matrix relative to a specific subontology flat.path <- paste0(flat.dir, flat.file,".rda"); S.flat <- get(load(flat.path)); gc(); ##in order to save ram memory.. ## removing root node from flat matrix if it exists if(root %in% colnames(S.flat)){ pred <- S.flat[,-which(colnames(S.flat)==root)]; }else{ pred <- S.flat; } ## loading annotation matrix ann.path <- paste0(ann.dir, ann.file,".rda"); hpo.ann <- get(load(ann.path)); gc(); ## removing root node from annotation table ann.no.root <- target <- hpo.ann[,-which(colnames(hpo.ann)==root)]; ## Computing Hierarchical Examples-Measures F.meas <- find.best.f(target, pred, n.round=n.round, f.criterion =f.criterion, verbose=FALSE, b.per.example=b.per.example); ## Storing Results ######### save(F.meas, file=paste0(Fmeas.dir,"PCM.",flat.file,".hierScores.flat.rda"), compress=TRUE); }