From 61e115c4b9a4f1a5432fdd93e00704ef11e7cc19 Mon Sep 17 00:00:00 2001 From: Matteo Cypriani Date: Sat, 31 May 2014 15:42:24 -0400 Subject: [PATCH] [file_utils] Add dirpacker.py This program helps grouping a bunch of files into fixed-size volumes (e.g. to burn them on CD-ROMs). --- bin/dirpacker | 1 + file_utils/README | 19 ++++++ file_utils/dirpacker.py | 137 ++++++++++++++++++++++++++++++++++++++++ multimedia/README | 4 ++ 4 files changed, 161 insertions(+) create mode 120000 bin/dirpacker create mode 100755 file_utils/dirpacker.py diff --git a/bin/dirpacker b/bin/dirpacker new file mode 120000 index 0000000..2a63b42 --- /dev/null +++ b/bin/dirpacker @@ -0,0 +1 @@ +../file_utils/dirpacker.py \ No newline at end of file diff --git a/file_utils/README b/file_utils/README index bb37a18..58289ee 100644 --- a/file_utils/README +++ b/file_utils/README @@ -1,3 +1,22 @@ +# dirpacker.py # + +This scripts allows to group (pack) a bunch of files or directories into +fixed-size volumes, optimizing the occupied size of the volumes. The +original use case was to burn MP3 albums to CD-ROMs to play them in the +car, but minimize the wasted space of each disc. Of course, it can be +used to backup any kind of files to any kind of medium. + +The particularity of this program, compared for example to datapacker +(from which it is loosely inspired) is that it works with directories +instead of regular files only. The files inside a directory won't be +separated on several archives, they will be on the same volume. + +Note: I kept the datapacker's terminology, a volume is also called a +bin. + +To see the usage, call the program with -h. + + # mvparent.sh # mvparent.sh was originally written to be integrated in the ROX-Filer diff --git a/file_utils/dirpacker.py b/file_utils/dirpacker.py new file mode 100755 index 0000000..27de185 --- /dev/null +++ b/file_utils/dirpacker.py @@ -0,0 +1,137 @@ +#!/usr/bin/python3 +# +# dirpacker.py, Copyright © 2014 Matteo Cypriani +# +######################################################################## +# This program is licensed under the terms of the Expat license. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +######################################################################## +# +# This script groups input files by volumes of a given size. It is loosely +# inspired by datapacker but is able to work on directories rather than regular +# files only. + + +import argparse +import os +from collections import defaultdict +import math + + +def du(basepath): + """ Returns the size of the file of directory `basepath`, in MiB. + """ + size = os.path.getsize(basepath) + for root, dirs, files in os.walk(basepath): + for name in files + dirs: + fullname = os.path.join(root, name) + if not os.path.exists(fullname): + continue + size += os.path.getsize(fullname) + return size / 1024 / 1024 + + +def create_bin(binnumber): + """Creates a bin numbered `binnumber` from the global list of files and + sizes. + """ + print("\n### Bin #{} ###\n# List of files:".format(binnumber)) + binsize = 0 + for size in reversed(sorted(sizes.keys())): + newbinsize = binsize + size + # Try the next, smaller file if we would exceed the bin's maximum size + # with this one: + if newbinsize > options.maxbinsize: + continue + + filename = sizes[size].pop() + print("{} # {:.2f} MiB".format(filename, size)) + # Delete the size from the dictionary if its last file name was just + # popped: + if not len(sizes[size]): + del sizes[size] + + binsize = newbinsize + + print("# This bin's size: {:.2f} MiB".format(binsize)) + sizefree = options.maxbinsize - binsize + print("# Free space: {:.2f} MiB".format(sizefree)) + + +### Parse command-line arguments ### + +arg_parser = argparse.ArgumentParser( + description="Packs files and directories into fixed-size volumes", + epilog="For more information about this program, see the README file \ +provided with the distribution.") +arg_parser.add_argument("-s", "--size", action="store", dest="maxbinsize", + type=float, default=703, + help="maximal size of each volume (bin), in MiB; the " + +"default is 703 MiB, i.e. the size of a 80-minute " + +"CD-ROM") +arg_parser.add_argument("filenames", metavar="file", nargs="+", + help="files or directories to pack") +options = arg_parser.parse_args() + + +### Preliminary statistics ### + +print("# Maximum size of a bin: {:.2f} MiB".format(options.maxbinsize)) + +# Compute the size of all the files +sizes = defaultdict(list) +totalsize = 0 +ignored_files = False +for filename in options.filenames: + size = du(filename) + if (size > options.maxbinsize): + print('# WARNING! "{}" is {:.2f} MiB, which exceeds the maximum size of' + ' a bin: ignoring.' + .format(filename, size)) + ignored_files = True + continue + sizes[size].append(filename) + totalsize += size + +print("# Total size of the input files: {:.2f} MiB".format(totalsize)) +minbins = math.ceil(totalsize / options.maxbinsize) +print("# Minimal (optimal) number of bins required: ", minbins) +sizefree = minbins * options.maxbinsize - totalsize +print("# Total unused space with {} bins: {:.2f} MiB".format(minbins, sizefree)) + + +### Assemble the bins ### + +binnumber = 1 +while len(sizes): + create_bin(binnumber) + binnumber += 1 +binscreated = binnumber - 1 + + +### Final statistics ### + +sizefree = binscreated * options.maxbinsize - totalsize +print("\n# Total unused space over the {} bins created: {:.2f} MiB" + .format(binscreated, sizefree)) + +if (ignored_files): + print("\n# WARNING! There were ignored files, see at the top of this log.") diff --git a/multimedia/README b/multimedia/README index 08ee4f2..61ffec3 100755 --- a/multimedia/README +++ b/multimedia/README @@ -117,3 +117,7 @@ they will be converted recursively and put in the target directory (~/tmp/mp3car/ by default, edit the script to change that). You must have installed pacpl and lame for this script to work. + +Note: if you want to burn all these converted audio files to CD-ROMs, +you might be interested in the dirpacker.py script provided in this +repository.