Приглашаем посетить
Metaphone
#!/usr/local/bin/perl
#while (1) {
%matches = ();
# print "File to check? ";
# $file = <STDIN>;
$file = "../survey.log\n";
chop($file);
# print "Word to check? ";
# $word = <STDIN>;
$word = "matt\n";
chop($word);
# Convert search word to metaphone
($check = &metaphone($word)) || die "$word is not a valid word";
$check_first = substr($check, 0, 1);
$first_match = " " . $check_first;
# Read through file
open(FILE, "<$file") || die "Could not open $file";
while (<FILE>) {
next if /^$/;
# Convert to uppercase, remove non-alpha & dupes, prefix
$_ = ' ' . $_;
tr/a-zA-Z/ /cs;
$old_line = $_;
tr/a-z/A-Z/;
tr/G/-/;
tr/A-Z/A-Z/s;
tr/-/G/;
s/ ([GKP](N)|A(E)|W(R)|W(HO)|([GW])H)/ $2$3$4$5$6/;
s/ X/ S-/;
# Process each word with matching first letter
$start_word = index($_, $first_match) + 1;
while ($start_word > $[) {
$end_word = index($_, ' ', $start_word + 1);
$word_length = $end_word - $start_word;
$fileword = substr($_, $start_word, $word_length);
# Process if word is long enough
$startt = (times)[0];
if ($word_length > 2) {
# Convert to metaphone if the word is new
if (!$converted{$fileword}) {
$converted{$fileword} = &metaphone($fileword);
}
# Check for match
if ($check eq $converted{$fileword}) {
# Find original word
$save_line = $_;
$_ = substr($_, 0, $start_word);
$word_num = tr/ / /;
$_ = $save_line;
for ($pos = -1; $word_num; --$word_num) {
$pos = index($old_line, ' ', $pos + 1);
}
++$pos;
$endpos = index($old_line, ' ', $pos);
$fileword = substr($old_line, $pos, $endpos - $pos);
# Increment match count
$matches{$fileword}++;
}
}
$start_word = index($_, $first_match, $end_word);
$total += (times)[0] - $startt;
}
}
print "\n";
print "Total time: $total\n";
# print "The following words in $file match $word:\n";
# foreach $match (sort keys %matches) {
# print " $match ($matches{$match})\n"
# }
#}
### Subroutine: &metaphone()
###
### Function: To convert a word to its phonetic equivalent using
### a slightly modified version of the Metaphone
### algorithm, originally developed by Lawrence Philips
###
### Usage: &metaphone($word);
###
### Variables: $word -- Word to be converted.
### Example "cookbook"
###
### Returns: String containing converted result if successful
### Null string if unsuccessful ($word not a word)
###
### Files Created: None
sub metaphone {
# Initialize variables
local($word) = @_[0];
# Convert to uppercase, remove non-alpha
$word =~ tr/a-z/A-Z/;
$word =~ tr/A-Z//cd;
# Remove duplicate letters except GG
$word =~ tr/G/-/;
$word =~ tr/A-Z/A-Z/s;
$word =~ tr/-/G/;
# Convert to Metaphone phonetic
$word =~ s/^([GKP](N)|A(E)|W(R)|W(HO)|([GW])H)/$2$3$4$5$6/; # Prefix
$word =~ s/^X/S-/;
$word =~ s/MB$/M/; # B
$word =~ s/X/KS-/g; # X
$word =~ s/TH/0/g; # TH
$word =~ s/SC([EIY])/S-$1/g; # C
$word =~ s/C(IA)|SCH$|TCH|([^S])CH/$2X$1/g;
$word =~ s/C([EIY])/-S-$1/g;
$word =~ s/G((N)|(NED))$/-$2$3/; # G
$word =~ s/(D|([^G]))G([EIY])/$2J$3/g;
$word =~ s/((H...)|([BDH]..))GH$/$2$3/;
$word =~ s/(PH|V|GH$)/F/g; # PH/V/GH
$word =~ s/GHT/T/g;
$word =~ s/(.)G(H[^AEIOU])/$1-$2/g;
$word =~ s/([AEIOU])H([^AEIOU])/$1-$2/g; # H
$word =~ s/SH/X/g;
$word =~ s/[WY]($|[^AEIOU])/-$1/g; # W/Y
$word =~ s/(C[HK]|C|Q|G+)/K/g; # CK/Q/G
$word =~ s/(.)[ST](I[OA].)/$1X$2/g; # S/T
$word =~ tr/Z/S/; # Z
$word =~ tr/D/T/; # D
# Remove vowels and placeholders
$word =~ s/(.)[AEIOU-]+/$1/g;
# Cut to four characters, pad with spaces if necessary
if ($word) { $word = substr($word . " ", 0, 4) }
# Return the result (null if original $word was not a word)
$word;
}