scripts/file_utils/dirpacker.py

245 lines
8.4 KiB
Python
Executable File

#!/usr/bin/env python3
#
# dirpacker.py, Copyright © 2014 Matteo Cypriani <mcy@lm7.fr>
#
########################################################################
# This program is licensed under the terms of the Expat license.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
########################################################################
#
# This script groups input files by volumes of a given size. It is loosely
# inspired by datapacker but is able to work on directories rather than regular
# files only.
import argparse
import os
import shutil
import sys
from collections import defaultdict
import math
class Bin:
""" Represents a bin (a volume) containing files and directories.
"""
def __init__(self, bin_id, total_size, files):
"""bin_id is the bin's identifier, total_size the sum of the sizes of
all the files it contains, and files is a dictionary with file names
as the keys and sizes as values.
"""
self.id = bin_id
self.size = total_size
self.files = files
def print(self):
"""Displays the contents of the bin.
"""
if not options.machine_readable:
print("\n### {} ###\nList of files:".format(self.name()))
for filename, size in sorted(self.files.items()):
if options.machine_readable:
print(self.name(), end="")
else:
print("{:12.2f} MiB".format(size), end="")
print("\t{}".format(filename))
if not options.machine_readable:
sizefree = options.maxbinsize - self.size
print("This bin's size: {:.2f} MiB".format(self.size))
print("Unused: {:.2f} MiB".format(sizefree))
def name(self):
"""Returns the bin's name'
"""
return "{}{:02}".format(options.prefix, b.id)
def list_files(self):
"""Returns a list of the files contained in the bin.
"""
return list(self.files.keys())
def warn(*message, prefix="Warning!", **args):
"""Prints the message on the error output, prepended by 'prefix'.
The standard output is flushed prior to printing the error message, to
enable the messages to be displayed in the right order.
This function is a simple wrapper around print(), and you can use any
keyword argument you would use with print().
"""
sys.stdout.flush()
print(prefix, *message, file=sys.stderr, **args)
def du(basepath):
""" Returns the size of the file of directory `basepath`, in MiB.
"""
size = os.path.getsize(basepath)
for root, dirs, files in os.walk(basepath):
for name in files + dirs:
fullname = os.path.join(root, name)
if not os.path.exists(fullname):
continue
size += os.path.getsize(fullname)
return size / 1024 / 1024
def create_bin(binnumber):
"""Creates a bin numbered `binnumber` from the global list of files and
sizes, and returns it.
"""
binsize = 0
files = {}
for size in reversed(sorted(sizes.keys())):
newbinsize = binsize + size
# Try the next, smaller file if we would exceed the bin's maximum size
# with this one:
if newbinsize > options.maxbinsize:
continue
files[sizes[size].pop()] = size
# Delete the size from the dictionary if its last file name was just
# popped:
if not sizes[size]:
del sizes[size]
binsize = newbinsize
return Bin(binnumber, binsize, files)
### Parse command-line arguments ###
arg_parser = argparse.ArgumentParser(
description="Packs files and directories into fixed-size volumes",
epilog="For more information about this program, see the README file"
"provided with the distribution.")
arg_parser.add_argument("-s", "--size", action="store", dest="maxbinsize",
type=float, default=703,
help="maximal size of each volume (bin), in MiB; the "
"default is 703 MiB, i.e. the size of a 80-minute "
"CD-ROM")
arg_parser.add_argument("--prefix", action="store", default="bin_",
help="prefix of a bin's name (default: \"bin_\")")
arg_parser.add_argument("-n", "--first-bin-number", action="store", type=int,
default=1,
help="first number used for bin numbering (default: 1)")
arg_parser.add_argument("--move", action="store_true",
help="move input to per-volume directories")
arg_parser.add_argument("-m", "--machine-readable", action="store_true",
help="print the list of volumes in a machine-readable"
" format; implies --quiet")
arg_parser.add_argument("-v", "--verbose", action="store_true", default=True,
help="be verbose (this is the default)")
arg_parser.add_argument("-q", "--quiet", action="store_false", dest="verbose",
help="print only warnings and errors")
arg_parser.add_argument("filenames", metavar="file", nargs="+",
help="files or directories to pack")
options = arg_parser.parse_args()
if options.machine_readable:
options.verbose = False
### Preliminary statistics ###
if options.verbose:
print("Maximum size of a bin: {:.2f} MiB".format(options.maxbinsize))
# Compute the size of all the files
sizes = defaultdict(list)
totalsize = 0
ignored_files = False
for filename in options.filenames:
size = du(filename)
if (size > options.maxbinsize):
warn('"{}" is {:.2f} MiB, which exceeds the maximum size of a bin:'
' ignoring.'
.format(filename, size))
ignored_files = True
continue
sizes[size].append(filename)
totalsize += size
if options.verbose:
print("Total size of the input files: {:.2f} MiB".format(totalsize))
minbins = math.ceil(totalsize / options.maxbinsize)
print("Minimal (optimal) number of bins required: ", minbins)
sizefree = minbins * options.maxbinsize - totalsize
print("Theoretical unused space with {} bins: {:.2f} MiB"
.format(minbins, sizefree))
### Assemble the bins ###
bins = []
binnumber = options.first_bin_number
binscreated = 0
while sizes:
bins.append(create_bin(binnumber))
binnumber += 1
binscreated += 1
### Final statistics ###
if options.verbose:
sizefree = binscreated * options.maxbinsize - totalsize
print("""\
{} bins created.
Actual unused space over the {} bins created: {:.2f} MiB"""
.format(binscreated, binscreated, sizefree))
### Execute the requested action ###
# Print the contents of the bins
if options.verbose or options.machine_readable:
for b in bins:
b.print()
# Move the files
if options.move:
for b in bins:
dirname = b.name()
if os.path.exists(dirname) and not os.path.isdir(dirname):
warn('File "{}" exists but is not a directory: skipping bin #{}.'
.format(dirname, b.id))
continue
# Create the target directory:
try:
os.makedirs(dirname, exist_ok=True)
except FileExistsError:
warn('Directory "{}" already exists with different permissions'
' (mode). Proceeding anyway.'
.format(dirname))
# Move the files in the directory:
for filename in b.list_files():
shutil.move(filename, dirname)
### Wrapping-up ###
if (ignored_files):
warn("There were ignored files (see prior messages).")