Add make_html2text

This commit is contained in:
Matteo Cypriani 2010-01-17 17:56:13 +01:00
parent f2c2e7da8d
commit 0632e91565
2 changed files with 48 additions and 0 deletions

9
make_html2text/README Normal file
View File

@ -0,0 +1,9 @@
make_html2text.sh converts all HTML (.html and .htm) files in the
current directory into text files (.txt). For each HTML file, it
verifies that the text file does not exists, or is older than the
corresponding HTML file; if the text file is more recent, nothing is
done.
To use this script, you will have to install the program html2text.
The option -nobs is used to avoid output of formatting sequences (bold,
italic, etc.).

View File

@ -0,0 +1,39 @@
#!/bin/sh
#
# make_html2text.sh, Copyright © 2010 Matteo Cypriani
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
#
# This script converts all HTML (.html and .htm) files in the current
# directory into text files (.txt). For each HTML file, it verifies
# that the text file, if it exists, is older; if the text file is more
# recent, nothing is done.
# Look for html2text
if ! which html2text >/dev/null ; then
echo "Error! html2text not found within the PATH." >&2
exit 1
fi
for HTML in *.html *.htm ; do
# Avoid to create "*.html.txt" and "*.html.txt" if there is no
# .html or no .htm files
if [ ! -f "$HTML" ] ; then
continue
fi
TXT=$(basename "$HTML" .html).txt
# Is the .html file more recent than the .txt ?
if [ -f "$TXT" -a "$HTML" -ot "$TXT" ] ; then
continue
fi
# Convert the file
echo "$HTML --> $TXT"
html2text -nobs -o "$TXT" "$HTML"
done