From 0632e91565c1086f3f92f84897106016ce8a2ac2 Mon Sep 17 00:00:00 2001 From: Matteo Cypriani Date: Sun, 17 Jan 2010 17:56:13 +0100 Subject: [PATCH] Add make_html2text --- make_html2text/README | 9 ++++++++ make_html2text/make_html2text.sh | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 make_html2text/README create mode 100755 make_html2text/make_html2text.sh diff --git a/make_html2text/README b/make_html2text/README new file mode 100644 index 0000000..d45b202 --- /dev/null +++ b/make_html2text/README @@ -0,0 +1,9 @@ +make_html2text.sh converts all HTML (.html and .htm) files in the +current directory into text files (.txt). For each HTML file, it +verifies that the text file does not exists, or is older than the +corresponding HTML file; if the text file is more recent, nothing is +done. + +To use this script, you will have to install the program html2text. +The option -nobs is used to avoid output of formatting sequences (bold, +italic, etc.). diff --git a/make_html2text/make_html2text.sh b/make_html2text/make_html2text.sh new file mode 100755 index 0000000..4c8e493 --- /dev/null +++ b/make_html2text/make_html2text.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# +# make_html2text.sh, Copyright © 2010 Matteo Cypriani +# +# This program is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING for more details. +# +# This script converts all HTML (.html and .htm) files in the current +# directory into text files (.txt). For each HTML file, it verifies +# that the text file, if it exists, is older; if the text file is more +# recent, nothing is done. + +# Look for html2text +if ! which html2text >/dev/null ; then + echo "Error! html2text not found within the PATH." >&2 + exit 1 +fi + +for HTML in *.html *.htm ; do + # Avoid to create "*.html.txt" and "*.html.txt" if there is no + # .html or no .htm files + if [ ! -f "$HTML" ] ; then + continue + fi + + TXT=$(basename "$HTML" .html).txt + + # Is the .html file more recent than the .txt ? + if [ -f "$TXT" -a "$HTML" -ot "$TXT" ] ; then + continue + fi + + # Convert the file + echo "$HTML --> $TXT" + html2text -nobs -o "$TXT" "$HTML" +done