#!/usr/bin/awk -f # Copyright 2023 Ted Clark # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License, version 2, as published by the Free # Software Foundation. # # This program is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. function assert_head() { if (!document_head) { print "Missing document head" > "/dev/stderr" do_end = 0 exit(1) } } function close_tags() { switch (state) { case "QUOTE": case "QUOTE2": print "" break case "TD": case "TD2": case "TH": case "TH2": print "" break case "ULIST": case "ULIST2": print "" break } } function decode_string(string) { decoded_str = "" skip = 0 for (ii = 1; ii <= length(string); ii++) { if (skip) { # Skip over HTML codes and tags skip-- continue } if (substr(string, ii, 1) == "&") { # Replace '&' with &, but don't change HTML codes such as   # to &nbsp;. codes[1] = "-" codes[2] = "✓" codes[3] = "&" codes[4] = "ε" codes[5] = ">" codes[6] = "<" codes[7] = "—" codes[8] = " " codes[9] = "–" found = 0 for (j = 1; j <= length(codes); j++) { if (substr(string, ii, length(codes[j])) == codes[j]) { decoded_str = decoded_str codes[j] skip = length(codes[j]) - 1 found = 1 break } } if (found) { continue } decoded_str = decoded_str "&" } else if (substr(string, ii, 1) == "<") { # Replace '<' with <, but don't change HTML tags such as
# to <br>. tags[1] = "
" tags[2] = "" tags[3] = "" tags[4] = "" tags[5] = "" tags[6] = "" tags[7] = "" tags[8] = "" tags[9] = "" tags[10] = "" tags[11] = "" tags[12] = "

" tags[13] = "

" tags[14] = "
"
			tags[15] = "
" tags[16] = "" tags[17] = "" tags[18] = "" tags[19] = "" tags[20] = "" tags[21] = "" found = 0 for (j = 1; j <= length(tags); j++) { if (substr(string, ii, length(tags[j])) == tags[j]) { decoded_str = decoded_str tags[j] skip = length(tags[j]) - 1 found = 1 break } } if (found) { continue } decoded_str = decoded_str "<" } else if (substr(string, ii, 1) == ">") { decoded_str = decoded_str ">" } else { decoded_str = decoded_str substr(string, ii, 1) } } return decoded_str } function is_numeric(string) { # Give the loop control variable a unique name so that it doesn't # conflict with any variables in the calling function. (Awk does not # have block scope.) for (is_numeric_i = 1; is_numeric_i <= length(string); is_numeric_i++) { if (substr(string, is_numeric_i, 1) < "0" || substr(string, is_numeric_i, 1) > "9") { return 0 } } return 1 } function is_table_state() { if (state == "TD" || state == "TD2" || state == "TH" || state == "TH2") { return 1 } return 0 } function print_css(line) { if (!in_css) { print "" in_css = 0 } if (!document_head) { print "\n" document_head = 1 } state = "NO_OP" matched = 1 } /^---br$/ { assert_head() # ---br does not close tags or change state, so table rows and tables # have to be explicitly closed. if (in_table) { print "\n" in_table = 0 state = "NO_OP" } print "
" matched = 1 } /^---c$/ { close_tags() state = "COMMENT" matched = 1 } /^---css$/ { state = "CSS" matched = 1 } /^---h1$/ { assert_head() close_tags() state = "H1" matched = 1 } /^---h2$/ { assert_head() close_tags() state = "H2" matched = 1 } /^---head$/ { state = "HEAD" matched = 1 } /^---l$/ { assert_head() close_tags() state = "LINK" matched = 1 } /^---pnum$/ { paragraph_num = 1 matched = 1 } /^---q$/ { assert_head() close_tags() state = "QUOTE" matched = 1 } /^---raw$/ { assert_head() close_tags() state = "RAW" matched = 1 } /^---t$/ { assert_head() close_tags() state = "TEXT" matched = 1 } /^---td$/ { assert_head() close_tags() state = "TD" matched = 1 } /^---th$/ { assert_head() close_tags() state = "TH" matched = 1 } /^---ul$/ { assert_head() close_tags() state = "ULIST" matched = 1 } !matched { # Don't trim the left side -- leading tabs are metadata line = rtrim($0) if (length(line) == 0 && state != "CSS" && state != "RAW" && !is_table_state()) { next } if (in_table && !is_table_state()) { print "" in_table = 0 } switch (state) { case "COMMENT": printf "\n", line break case "CSS": print_css(line) break case "H1": printf "

%s

\n", line break case "H2": printf "

%s

\n", line break case "HEAD": print_document_head(line) break case "LINK": print_hyperlink(line) break case "NO_OP": # no-op break case "QUOTE": case "QUOTE2": print_quote(line) break case "RAW": print line break case "TD": case "TD2": print_td(line) break case "TEXT": print_text(line) break case "TH": case "TH2": print_th(line) break case "ULIST": case "ULIST2": print_ulist(line) break default: printf "Unknown state: %s\n", state > "/dev/stderr" exit(1) } } { matched = 0 } END { if (do_end) { if (in_table) { print "" } if (document_head) { print "\n" } } }