#! perl # # Windows version (all in) # # Perform a Wilcoxon Matched-pair Signed-Ranks Test on the pairs in ARGV[0] # Either entered as two columns of pairs ("\n" separated) or as # two lines (ARGV[0] and ARGV[1]). If ARGV[0] is a filename (or '-') # the corresponding file is read (or STDIN). # # E.g., # > echo -e '1 2\n2 3\n3 4\n4 3\n'| SignedRankTest.pl - # => W+ = 2.50, W- = 7.50, N = 4, p <= 0.375 # # Copyright (C) 1996, 2001 Rob van Son # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # Where are the Normal-Z and WX-MP-SR tests (NOT needed in windows version) $Script = '.'; # # The absolute function sub abs{ ($_[0] >= 0) ? $_[0] : -$_[0]; } # # Significance levels @level5 = (0.05, 0, 0, 0, 0, 0, 21, 26, 33, 40, 47, 56, 65, 74, 84, 95); @level1 = (0.01, 0, 0, 0, 0, 0, 0, 0, 36, 44, 52, 61, 71, 81, 92, 104); @level05 = (0.005, 0, 0, 0, 0, 0, 0, 0, 0, 45, 54, 63, 73, 84, 96, 108); # # Get STDIN if needed if($ARGV[0] eq '-') { $ARGV[0] = join("\n", ); } elsif(-e $ARGV[0]) { open(INPUT, "<$ARGV[0]") || die "<$ARGV[0]: $!"; $ARGV[0] = join("\n", ); }; # # Recalculate input values to differences $ARGV[0] =~ s/[ ]+/ /g; $ARGV[0] =~ s/[\n]+/\n/g; @InputList = split('\n', $ARGV[0]); if($#InputList != 1) { foreach (@InputList) { @pair = split(' ', $_); $diff = ($#pair >= 1) ? ($pair[0]-$pair[1]) : $pair[0]; if($diff != 0){ push(@difference_list, $diff);}; }; } else { @ListA = split(' ', $InputList[0]); @ListB = split(' ', $InputList[1]); if($#ListA == $#ListB) { for($i=0; $i<=$#ListA; ++$i) { $diff = $ListA[$i]-$ListB[$i]; if($diff != 0){ push(@difference_list, $diff);}; }; } else { die "Unequal number of entries in input lists"}; }; @difference_list = sort {&abs($a) <=> &abs($b)} @difference_list; # # Replace differences by ranks $previous = undef; $start = 0; for($i=0; $i <= $#difference_list; ++$i) { if(&abs($difference_list[$i]) == $previous) { $mean_rank = ($start+$i+2)/2.0; for($j=$start; $j<=$i; ++$j) { $difference_rank[$j] = ($difference_list[$j]>0) ? $mean_rank : -$mean_rank; }; } else { $difference_rank[$i] = ($difference_list[$i]>0) ? $i+1 : -($i+1); $previous = &abs($difference_list[$i]); $start = $i; }; }; # # Determine test parameters $N = $#difference_rank+1; $Wplus = 0; $Wminus = 0; foreach $diff (@difference_rank) { if($diff > 0) { $Wplus += $diff;} else { $Wminus += &abs($diff);}; }; $Wmaximal = ($Wplus > $Wminus) ? $Wplus : $Wminus; # Determine level of significance $p = 0; if($N <= 16) # Exact { # $p = LevelOfSignificanceWXMPSR($Wmaximal, $N) # The Perl routine if(-e "$Script\\WX-MP-SR.exe") { $p = `$Script\\WX-MP-SR.exe $Wmaximal $N`; chomp $p; # The faster C routine } else { $p = LevelOfSignificanceWXMPSR($Wmaximal, $N); # The slower Perl routine # $p = `$Script\\WX-MP-SR.pl $Wmaximal $N`; chomp $p; # The slower Perl routine }; $Z = undef; } elsif($N <= 16) # Table lookup, used only for slow systems { $p = ($level05[$N] && $level05[$N]<= $Wmaximal) ? $level05[0] : ($level1[$N] && $level1[$N] <= $Wmaximal) ? $level1[0] : ($level5[$N] && $level5[$N] <= $Wmaximal) ? $level5[0] : 1.0; $Z = undef; } else # Normal approximation { # Normal approximation $Z = ($Wmaximal - 0.5 - $N*($N+1)/4)/sqrt($N*($N+1)*(2*$N+1)/24); $p = NormalZ($Z); }; # Print output if($Wplus == int($Wplus)) { printf("W+ = %d, W- = %d, N = %d, p <= %5.4g ", $Wplus, $Wminus, $N, $p);} else { printf("W+ = %5.2f, W- = %5.2f, N = %d, p <= %5.4g ", $Wplus, $Wminus, $N, $p); }; ######################################################################## # This is the actual routine that calculates the exact (two-tailed) # level of significance for the Wilcoxon Matched-Pairs Signed-Ranks # test in PERL. The inputs are the Sum of Ranks of either the positive of # negative samples (W) and the sample size (N). # The Level of significance is calculated by checking for each # possible outcome (2**N possibilities) whether the sum of ranks # is larger than or equal to the observed Sum of Ranks (W). # # NOTE: The execution-time ~ N*2**N, i.e., more than exponential. # Adding a single pair to the sample (i.e., increase N by 1) will # more than double the time needed to complete the calculations # (apart from an additive constant). # The execution-time of this program can easily outrun your # patience. # sub LevelOfSignificanceWXMPSR # ($W, $N) { my $W = shift; my $N = shift; # # Determine Wmax, i.e., work with the largest Rank Sum my $MaximalW = $N*($N+1)/2; $W = $MaximalW - $W if($W < $MaximalW/2); # # The total number of possible outcomes is 2**N my $NumberOfPossibilities = 2**$N; # # Initialize and loop. The loop-interior will be run 2**N times. my $CountLarger = 0; # Generate all distributions of sign over ranks as bit-strings. for(my $i=0; $i < $NumberOfPossibilities; ++$i) { my $RankSum = 0; # Shift "sign" bits out of $i to determine the Sum of Ranks for(my $j=0; $j<$N; ++$j) { $RankSum += $j + 1 if(($i >> $j) & 1); }; # Count the number of "samples" that have a Sum of Ranks larger or # equal to the one found (i.e., >= W). ++$CountLarger if($RankSum >= $W); }; # # The level of significance is the number of outcomes with a # sum of ranks equal to or larger than the one found (W) # divided by the total number of possible outcomes. # The level is doubled to get the two-tailed result. my $p = 2*$CountLarger / $NumberOfPossibilities; return $p; }; # # Calculate two-tailed significance level associated with Z(x) with x = ARGV[0] # # i.e., P(|Z|>=x|Normal Distribution), error(x) < 7.5*10(-8) # # Probabilities are calculated according to: # Abramowitz and Stegun, # Handbook of mathematical functions # Ninth printing, Dover publications, Inc., 1970 # p932, 26.2.17 # # The absolute function sub abs{ ($_[0] >= 0) ? $_[0] : -$_[0]; } # sub NormalZ # ($Z) -> $p { $x = shift; # # P(x) = 1 - Z(x)(b1*t+b2*t**2+b3*t**3+b4*t**4+b5*t**5) # Z(x) = exp(-$x*$x/2.0)/(sqrt(2*3.14159265358979323846)) # t = 1/(1+p*x) # # Parameters @b = (0.319381530, -0.356563782, 1.781477937, -1.821255978, 1.330274429); $p = 0.2316419; $t = 1/(1+$p*$x); # Initialize variables $fact = $t; $Sum = 0; # Sum polynomial for($i=0; $i <= $#b; ++$i) { $Sum += $b[$i]*$fact; $fact *= $t; }; # Calculate probability $p = 2*$Sum*exp(-$x*$x/2.0)/(sqrt(2*3.14159265358979323846)); # return $p; };