Code for converting windows extended ascii format text (e.g., save as .txt from MSWord):
# (Windows-1252 converts \0x92 to \x{2019} for MSWord's apostrophe, etc)
open(TEXT, "<:encoding(Windows-1252)", $textFile) or die("Cannot read ".$textFile);
while (my $inp=) {
# convert MSWord apostrophes to ascii (hexdump shows X92/91 which are blindly
# converted to E28099)
$inp =~ s/[\x{2019}\x{2018}]/\'/g;
# convert MSWord ellipses to ascii (hexdump shows X85 which is blindly converted
# to E280A6)
$inp =~ s/\x{2026}/.../g;
# convert MSWord quotes to ascii (hexdump shows X93/94 which are blindly converted
# to E2809C/9D)
$inp =~ s/[\x{201C}\x{201D}]/\"/g;
# rest of code
}
close TEXT;