Add make_html2text
This commit is contained in:
parent
f2c2e7da8d
commit
0632e91565
|
@ -0,0 +1,9 @@
|
||||||
|
make_html2text.sh converts all HTML (.html and .htm) files in the
|
||||||
|
current directory into text files (.txt). For each HTML file, it
|
||||||
|
verifies that the text file does not exists, or is older than the
|
||||||
|
corresponding HTML file; if the text file is more recent, nothing is
|
||||||
|
done.
|
||||||
|
|
||||||
|
To use this script, you will have to install the program html2text.
|
||||||
|
The option -nobs is used to avoid output of formatting sequences (bold,
|
||||||
|
italic, etc.).
|
|
@ -0,0 +1,39 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# make_html2text.sh, Copyright © 2010 Matteo Cypriani
|
||||||
|
#
|
||||||
|
# This program is free software. It comes without any warranty, to
|
||||||
|
# the extent permitted by applicable law. You can redistribute it
|
||||||
|
# and/or modify it under the terms of the Do What The Fuck You Want
|
||||||
|
# To Public License, Version 2, as published by Sam Hocevar. See
|
||||||
|
# http://sam.zoy.org/wtfpl/COPYING for more details.
|
||||||
|
#
|
||||||
|
# This script converts all HTML (.html and .htm) files in the current
|
||||||
|
# directory into text files (.txt). For each HTML file, it verifies
|
||||||
|
# that the text file, if it exists, is older; if the text file is more
|
||||||
|
# recent, nothing is done.
|
||||||
|
|
||||||
|
# Look for html2text
|
||||||
|
if ! which html2text >/dev/null ; then
|
||||||
|
echo "Error! html2text not found within the PATH." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for HTML in *.html *.htm ; do
|
||||||
|
# Avoid to create "*.html.txt" and "*.html.txt" if there is no
|
||||||
|
# .html or no .htm files
|
||||||
|
if [ ! -f "$HTML" ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
TXT=$(basename "$HTML" .html).txt
|
||||||
|
|
||||||
|
# Is the .html file more recent than the .txt ?
|
||||||
|
if [ -f "$TXT" -a "$HTML" -ot "$TXT" ] ; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Convert the file
|
||||||
|
echo "$HTML --> $TXT"
|
||||||
|
html2text -nobs -o "$TXT" "$HTML"
|
||||||
|
done
|
Loading…
Reference in New Issue