# example of a function definition p = function(x) { print(x) } # simple implementation of IC count_equal = function(A, S) { c = 0; for(i in 1:NROW(S)) if (all(A == S[i,])) c = c + 1; return(c); } count_equal_class = function(A, S, k, L) { c = 0; for(i in 1:NROW(S)) { if (L[i] == k && all(A == S[i,])) c = c + 1; } return(c); } IC = function(A, S, L){ ce = count_equal(A, S); L_values = unique(L); ce_class = 0; for (k in 1:length(L_values)) { v = count_equal_class(A, S, L_values[k], L); if (v > ce_class) ce_class = v; } return(ce - ce_class); } # integrated implementation of IC IC_quick = function(A, S, L) { ce = 0; L_values = unique(L); ce_class = vector(length=length(L_values)); names(ce_class) = L_values; for (k in 1:length(ce_class)) ce_class[k] = 0; for(i in 1:NROW(S)) if (all(A == S[i,])) { ce = ce + 1; ce_class[L[i]] = ce_class[L[i]] + 1 } maxk = 0 for (k in 1:length(ce_class)) if (ce_class[k] > maxk) maxk = ce_class[k]; return(ce - maxk); } # IR and evaluation measure definition IR = function(S, L) { c = 0; # for (i in 1:NROW(S)) c = c + IC(S[i,], S, L); for (i in 1:NROW(S)) c = c + IC_quick(S[i,], S, L); return (c / NROW(S)); } # notice the drop=F parameter that # inhibits automatic downcast from matrix to vector J = function(S, L, Xprime) { return (1 / (IR(S[,Xprime,drop=F], L) + 1)); } # simple Las Vegas Filter (fix a quality threshold, minimize number of features) LVF = function(S, L, E, maxit, E0) { best = c(1:ncol(S)); for (t in 1:maxit) { Xprime = sample(best, length(best) - 1); Ecurrent = E(S, L, Xprime); if (Ecurrent >= E0) best = Xprime; } return(best); } # simple Sequential Forward Generation (fix a quality threshold, minimize number of features) SFG = function(S, L, E, E0) { best = c(); for(t in 1:ncol(S)) { # --- select most promising new attribute (argmax) Emax = 0; imax = -1; for (i in 1:ncol(S)) if (! (i %in% best) ) { v = c(best,i); Ecurrent = E(S, L, v); if (Ecurrent > Emax) { Emax = Ecurrent; imax = i; } } print(imax); best = c(best, imax); # --- if (Emax >= E0) return(best); } return(best); } # simple Sequential Forward Generation (fix a number of features, maximize quality measure) SFG_k = function(S, L, E, k) { best = c(); for(t in 1:k) { Emax = 0; imax = -1; for (i in 1:ncol(S)) if (! (i %in% best) ) { v = c(best,i); Ecurrent = E(S, L, v); if (Ecurrent > Emax) { Emax = Ecurrent; imax = i; } } print (Emax); best = c(best, imax); print (best); } return(best); } # FOCUS # left as homework