#!/usr/bin/perl # Copyright 2023 Ted Clark # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License, version 2, as published by the Free # Software Foundation. # # This program is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. use strict; use warnings; use experimental 'switch'; my $document_head = 0; my $document_title = 0; my $in_css = 0; my $in_table = 0; my $paragraph_num = 0; my $state = 'NO_OP'; my @th_class = (); my $th_column = 0; sub assert_head { $document_head or die "Missing document head\n"; } sub close_tags { given ($state) { when ($_ eq 'QUOTE' || $_ eq 'QUOTE2') { print "\n"; } when ($_ eq 'TD' || $_ eq 'TD2') { print "\n"; } when ($_ eq 'TH' || $_ eq 'TH2') { print "\n"; } when ($_ eq 'ULIST' || $_ eq 'ULIST2') { print "\n"; } } } sub decode_string { my $string = shift; my $decoded_str = ''; my $skip = 0; for (my $i = 0; $i < length($string); $i++) { if ($skip) { # Skip over HTML codes and tags $skip--; next; } if (substr($string, $i, 1) eq '&') { # Replace '&' with &, but don't change HTML codes such as   # to &nbsp;. my @codes = ('-', '✓', '&', 'ε', '>', '<', '—', ' ', '–'); my $found = 0; foreach my $code (@codes) { if (substr($string, $i, length($code)) eq $code) { $decoded_str .= $code; $skip = length($code) - 1; $found = 1; last; } } if ($found) { next; } $decoded_str .= '&'; } elsif (substr($string, $i, 1) eq '<') { # Replace '<' with <, but don't change HTML tags such as
# to <br>. my @tags = ('
', '', '', '', '', '', '', '', '', '', '', '

', '

', '
', '
', '', '', '', '', '', '', ); my $found = 0; foreach my $tag (@tags) { if (substr($string, $i, length($tag)) eq $tag) { $decoded_str .= $tag; $skip = length($tag) - 1; $found = 1; last; } } if ($found) { next; } $decoded_str .= '<'; } elsif (substr($string, $i, 1) eq '>') { $decoded_str .= '>'; } else { $decoded_str .= substr($string, $i, 1); } } return $decoded_str; } sub get_state { my $line = shift; given ($line) { when ('---') { close_tags; $state = 'NO_OP'; return 1; } when ('---body') { if ($in_css) { print "\n"; $in_css = 0; } if (!$document_head) { print "\n\n"; $document_head = 1; } $state = 'NO_OP'; return 1; } when ('---br') { assert_head; # ---br does not close tags or change state, so table rows and tables # have to be explicitly closed. if ($in_table) { print "\n\n"; $in_table = 0; $state = 'NO_OP'; } print "
\n"; return 1; } when ('---c') { close_tags; $state = 'COMMENT'; return 1; } when ('---css') { $state = 'CSS'; return 1; } when ('---h1') { assert_head; close_tags; $state = 'H1'; return 1; } when ('---h2') { assert_head; close_tags; $state = 'H2'; return 1; } when ('---head') { $state = 'HEAD'; return 1; } when ('---l') { assert_head; close_tags; $state = 'LINK'; return 1; } when ('---pnum') { $paragraph_num = 1; return 1; } when ('---q') { assert_head; close_tags; $state = 'QUOTE'; return 1; } when ('---raw') { assert_head; close_tags; $state = 'RAW'; return 1; } when ('---t') { assert_head; close_tags; $state = 'TEXT'; return 1; } when ('---td') { assert_head; close_tags; $state = 'TD'; return 1; } when ('---th') { assert_head; close_tags; $state = 'TH'; return 1; } when ('---ul') { assert_head; close_tags; $state = 'ULIST'; return 1; } } return 0; } sub is_numeric { my $string = shift; for (my $i = 0; $i < length($string); $i++) { if (substr($string, $i, 1) lt '0' || substr($string, $i, 1) gt '9') { return 0; } } return 1; } sub is_table_state { if ($state eq 'TD' || $state eq 'TD2' || $state eq 'TH' || $state eq 'TH2') { return 1; } return 0; } sub print_css { my $line = shift; if (!$in_css) { print "