opendir(DIR,"TWiki");
while($file = readdir(DIR)) {
    next if ($file =~ /^\./);
    next if ($file !~ /\.txt$/);

    open(thisFile, "TWiki/$file") || die "Can't open '$file'\n";
    while(<thisFile>) {
        s!<[^>]*>! !g;

    # replace - and . with spaces, so we catch hyphenated words
        s![-.]! !g;
        s![^a-z0-9 ]! !ig;
    # remove individual character words (e.g. 'a', 'I')
        s!\b.\b! !ig;
    # remove things that are just numbers
        s!\b[0-9]+\b!!ig;

#       print;

        foreach $word (split) {
            if ($word =~ m!^[A-Z]+[a-z]+[A-Z]+[a-zA-Z0-9]*\b!)
            {
#               print "$word ==>";
                $word =~ s!([a-z])([A-Z0-9])!\1 \2!g;
                foreach $twikiWord (split(/ /,$word)) {
                    if ($twikiWord !~ "(METAFILEATTACHMENTname)|(METATOPICINFOauthor)|(METATOPICPARENTname)|(METAFIELDname)|(METAFORMname)") {
                    $words++;
                    $wordhash{lc("$twikiWord")}++;
                    #$wordhash{lc("$twikiWord")}++;
                } }
            } else {
                $words++;
                $wordhash{lc($word)}++;
            }
        }
    }
}
print "counted $words words\n";
$uniq_words = keys %wordhash;
printf "%8d Unique words\n", $uniq_words;

foreach $i ( sort { $wordhash{$b}  <=>  $wordhash{$a} }  keys %wordhash ) {
    printf "%8d %s\n", $wordhash{$i}, $i;
}

-- MattWalsh - 16 Apr 2003
Topic revision: r1 - 16 Apr 2003 - MattWalsh
 
This site is powered by the TWiki collaboration platformCopyright © 2008-2012 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback