scripts/textproc/make_html2text.sh

40 lines
1.1 KiB
Bash
Executable File

#!/bin/sh
#
# make_html2text.sh, Copyright © 2010 Matteo Cypriani
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
#
# This script converts all HTML (.html and .htm) files in the current
# directory into text files (.txt). For each HTML file, it verifies
# that the text file, if it exists, is older; if the text file is more
# recent, nothing is done.
# Look for html2text
if ! which html2text >/dev/null ; then
echo "Error! html2text not found within the PATH." >&2
exit 1
fi
for HTML in *.html *.htm ; do
# Avoid to create "*.html.txt" and "*.html.txt" if there is no
# .html or no .htm files
if [ ! -f "$HTML" ] ; then
continue
fi
TXT=$(basename "$HTML" .html).txt
# Is the .html file more recent than the .txt ?
if [ -f "$TXT" ] && [ "$HTML" -ot "$TXT" ] ; then
continue
fi
# Convert the file
echo "$HTML --> $TXT"
html2text -nobs -o "$TXT" "$HTML"
done