[file_utils] Add dirpacker.py

This program helps grouping a bunch of files into fixed-size volumes
(e.g. to burn them on CD-ROMs).
This commit is contained in:
Matteo Cypriani 2014-05-31 15:42:24 -04:00
parent 6940427a62
commit 61e115c4b9
4 changed files with 161 additions and 0 deletions

1
bin/dirpacker Symbolic link
View File

@ -0,0 +1 @@
../file_utils/dirpacker.py

View File

@ -1,3 +1,22 @@
# dirpacker.py #
This scripts allows to group (pack) a bunch of files or directories into
fixed-size volumes, optimizing the occupied size of the volumes. The
original use case was to burn MP3 albums to CD-ROMs to play them in the
car, but minimize the wasted space of each disc. Of course, it can be
used to backup any kind of files to any kind of medium.
The particularity of this program, compared for example to datapacker
(from which it is loosely inspired) is that it works with directories
instead of regular files only. The files inside a directory won't be
separated on several archives, they will be on the same volume.
Note: I kept the datapacker's terminology, a volume is also called a
bin.
To see the usage, call the program with -h.
# mvparent.sh #
mvparent.sh was originally written to be integrated in the ROX-Filer

137
file_utils/dirpacker.py Executable file
View File

@ -0,0 +1,137 @@
#!/usr/bin/python3
#
# dirpacker.py, Copyright © 2014 Matteo Cypriani <mcy@lm7.fr>
#
########################################################################
# This program is licensed under the terms of the Expat license.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
########################################################################
#
# This script groups input files by volumes of a given size. It is loosely
# inspired by datapacker but is able to work on directories rather than regular
# files only.
import argparse
import os
from collections import defaultdict
import math
def du(basepath):
""" Returns the size of the file of directory `basepath`, in MiB.
"""
size = os.path.getsize(basepath)
for root, dirs, files in os.walk(basepath):
for name in files + dirs:
fullname = os.path.join(root, name)
if not os.path.exists(fullname):
continue
size += os.path.getsize(fullname)
return size / 1024 / 1024
def create_bin(binnumber):
"""Creates a bin numbered `binnumber` from the global list of files and
sizes.
"""
print("\n### Bin #{} ###\n# List of files:".format(binnumber))
binsize = 0
for size in reversed(sorted(sizes.keys())):
newbinsize = binsize + size
# Try the next, smaller file if we would exceed the bin's maximum size
# with this one:
if newbinsize > options.maxbinsize:
continue
filename = sizes[size].pop()
print("{} # {:.2f} MiB".format(filename, size))
# Delete the size from the dictionary if its last file name was just
# popped:
if not len(sizes[size]):
del sizes[size]
binsize = newbinsize
print("# This bin's size: {:.2f} MiB".format(binsize))
sizefree = options.maxbinsize - binsize
print("# Free space: {:.2f} MiB".format(sizefree))
### Parse command-line arguments ###
arg_parser = argparse.ArgumentParser(
description="Packs files and directories into fixed-size volumes",
epilog="For more information about this program, see the README file \
provided with the distribution.")
arg_parser.add_argument("-s", "--size", action="store", dest="maxbinsize",
type=float, default=703,
help="maximal size of each volume (bin), in MiB; the "
+"default is 703 MiB, i.e. the size of a 80-minute "
+"CD-ROM")
arg_parser.add_argument("filenames", metavar="file", nargs="+",
help="files or directories to pack")
options = arg_parser.parse_args()
### Preliminary statistics ###
print("# Maximum size of a bin: {:.2f} MiB".format(options.maxbinsize))
# Compute the size of all the files
sizes = defaultdict(list)
totalsize = 0
ignored_files = False
for filename in options.filenames:
size = du(filename)
if (size > options.maxbinsize):
print('# WARNING! "{}" is {:.2f} MiB, which exceeds the maximum size of'
' a bin: ignoring.'
.format(filename, size))
ignored_files = True
continue
sizes[size].append(filename)
totalsize += size
print("# Total size of the input files: {:.2f} MiB".format(totalsize))
minbins = math.ceil(totalsize / options.maxbinsize)
print("# Minimal (optimal) number of bins required: ", minbins)
sizefree = minbins * options.maxbinsize - totalsize
print("# Total unused space with {} bins: {:.2f} MiB".format(minbins, sizefree))
### Assemble the bins ###
binnumber = 1
while len(sizes):
create_bin(binnumber)
binnumber += 1
binscreated = binnumber - 1
### Final statistics ###
sizefree = binscreated * options.maxbinsize - totalsize
print("\n# Total unused space over the {} bins created: {:.2f} MiB"
.format(binscreated, sizefree))
if (ignored_files):
print("\n# WARNING! There were ignored files, see at the top of this log.")

View File

@ -117,3 +117,7 @@ they will be converted recursively and put in the target directory
(~/tmp/mp3car/ by default, edit the script to change that).
You must have installed pacpl and lame for this script to work.
Note: if you want to burn all these converted audio files to CD-ROMs,
you might be interested in the dirpacker.py script provided in this
repository.