#!/usr/bin/perl
# Copyright 2023 Ted Clark
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License, version 2, as published by the Free
# Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
use strict;
use warnings;
use experimental 'switch';
my $document_head = 0;
my $document_title = 0;
my $in_css = 0;
my $in_table = 0;
my $paragraph_num = 0;
my $state = 'NO_OP';
my @th_class = ();
my $th_column = 0;
sub assert_head
{
$document_head or die "Missing document head\n";
}
sub close_tags
{
given ($state) {
when ($_ eq 'QUOTE' || $_ eq 'QUOTE2') {
print "\n";
}
when ($_ eq 'TD' || $_ eq 'TD2') {
print "\n";
}
when ($_ eq 'TH' || $_ eq 'TH2') {
print "\n";
}
when ($_ eq 'ULIST' || $_ eq 'ULIST2') {
print "\n";
}
}
}
sub decode_string
{
my $string = shift;
my $decoded_str = '';
my $skip = 0;
for (my $i = 0; $i < length($string); $i++) {
if ($skip) {
# Skip over HTML codes and tags
$skip--;
next;
}
if (substr($string, $i, 1) eq '&') {
# Replace '&' with &, but don't change HTML codes such as
# to .
my @codes = ('-', '✓', '&', 'ε', '>', '<',
'—', ' ', '–');
my $found = 0;
foreach my $code (@codes) {
if (substr($string, $i, length($code)) eq $code) {
$decoded_str .= $code;
$skip = length($code) - 1;
$found = 1;
last;
}
}
if ($found) {
next;
}
$decoded_str .= '&';
}
elsif (substr($string, $i, 1) eq '<') {
# Replace '<' with <, but don't change HTML tags such as
# to <br>.
my @tags = ('
',
'', '
',
'', '',
'', '',
'', '',
'', '',
'
', '
', '', '', '', '', '', '', '', '', ); my $found = 0; foreach my $tag (@tags) { if (substr($string, $i, length($tag)) eq $tag) { $decoded_str .= $tag; $skip = length($tag) - 1; $found = 1; last; } } if ($found) { next; } $decoded_str .= '<'; } elsif (substr($string, $i, 1) eq '>') { $decoded_str .= '>'; } else { $decoded_str .= substr($string, $i, 1); } } return $decoded_str; } sub get_state { my $line = shift; given ($line) { when ('---') { close_tags; $state = 'NO_OP'; return 1; } when ('---body') { if ($in_css) { print "\n"; $in_css = 0; } if (!$document_head) { print "\n\n"; $document_head = 1; } $state = 'NO_OP'; return 1; } when ('---br') { assert_head; # ---br does not close tags or change state, so table rows and tables # have to be explicitly closed. if ($in_table) { print "\n\n"; $in_table = 0; $state = 'NO_OP'; } print "