Code:
#!/usr/bin/perl -w
#Attempting to extract Anna's letters from Clarissa Text.
$input = "Clarissa_CleanText_Vol1";
$output = "Anna";
open(INPUT, $input) || die("couldn't open $input");
open(OUTPUT, ">$output");
@curr_letter_lines = (); # array that holds the lines of the current letter
$ref_to_curr_lines_arr = \@curr_letter_lines;
while(defined($inline = <INPUT>)){
if(begin_letter($inline) eq "yes"){
@curr_letter_lines = ($inline);
}elsif(end_letter($inline) eq "yes" ){
$ref_to_curr_lines_arr = \@curr_letter_lines;
print_curr_letter_lines(\@curr_letter_lines, $output);
@curr_letter_lines = ();
}else{
push(@curr_letter_lines, $inline);
}
}
sub print_curr_letter_lines{
my($ref_to_curr_letter_lines_arr, $output) = @_;
foreach $line(@{$ref_to_curr_letter_lines_arr}) {
print(OUTPUT $line);
}
}
sub begin_letter {
my ($all_letters) = @_;
my ($want_begin_anna) = "no";
if ($all_letters =~ /^Jan\.|Febr\.|March \d{2}\./) {
$want_begin_anna = "yes";
}
return $want_begin_anna;
}
sub end_letter {
my ($all_letters) = @_;
my ($want_end_anna) = "no";
if ($all_letters =~ /<p class="left">.*?ANNA HOWE\./) {
$want_end_anna = "yes"
}
return $want_end_anna;
}
