#!/usr/bin/env python3 # # dirpacker.py, Copyright © 2014 Matteo Cypriani # ######################################################################## # This program is licensed under the terms of the Expat license. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ######################################################################## # # This script groups input files by volumes of a given size. It is loosely # inspired by datapacker but is able to work on directories rather than regular # files only. import argparse import os import shutil import sys from collections import defaultdict import math class Bin: """ Represents a bin (a volume) containing files and directories. """ def __init__(self, bin_id, total_size, files): """bin_id is the bin's identifier, total_size the sum of the sizes of all the files it contains, and files is a dictionary with file names as the keys and sizes as values. """ self.id = bin_id self.size = total_size self.files = files def print(self): """Displays the contents of the bin. """ if not options.machine_readable: print("\n### {} ###\nList of files:".format(self.name())) for filename, size in sorted(self.files.items()): if options.machine_readable: print(self.name(), end="") else: print("{:12.2f} MiB".format(size), end="") print("\t{}".format(filename)) if not options.machine_readable: sizefree = options.maxbinsize - self.size print("This bin's size: {:.2f} MiB".format(self.size)) print("Unused: {:.2f} MiB".format(sizefree)) def name(self): """Returns the bin's name' """ return "{}{:02}".format(options.prefix, b.id) def list_files(self): """Returns a list of the files contained in the bin. """ return list(self.files.keys()) def warn(*message, prefix="Warning!", **args): """Prints the message on the error output, prepended by 'prefix'. The standard output is flushed prior to printing the error message, to enable the messages to be displayed in the right order. This function is a simple wrapper around print(), and you can use any keyword argument you would use with print(). """ sys.stdout.flush() print(prefix, *message, file=sys.stderr, **args) def du(basepath): """ Returns the size of the file of directory `basepath`, in MiB. """ size = os.path.getsize(basepath) for root, dirs, files in os.walk(basepath): for name in files + dirs: fullname = os.path.join(root, name) if not os.path.exists(fullname): continue size += os.path.getsize(fullname) return size / 1024 / 1024 def create_bin(binnumber): """Creates a bin numbered `binnumber` from the global list of files and sizes, and returns it. """ binsize = 0 files = {} for size in reversed(sorted(sizes.keys())): newbinsize = binsize + size # Try the next, smaller file if we would exceed the bin's maximum size # with this one: if newbinsize > options.maxbinsize: continue files[sizes[size].pop()] = size # Delete the size from the dictionary if its last file name was just # popped: if not sizes[size]: del sizes[size] binsize = newbinsize return Bin(binnumber, binsize, files) ### Parse command-line arguments ### arg_parser = argparse.ArgumentParser( description="Packs files and directories into fixed-size volumes", epilog="For more information about this program, see the README file" "provided with the distribution.") arg_parser.add_argument("-s", "--size", action="store", dest="maxbinsize", type=float, default=703, help="maximal size of each volume (bin), in MiB; the " "default is 703 MiB, i.e. the size of a 80-minute " "CD-ROM") arg_parser.add_argument("--prefix", action="store", default="bin_", help="prefix of a bin's name (default: \"bin_\")") arg_parser.add_argument("-n", "--first-bin-number", action="store", type=int, default=1, help="first number used for bin numbering (default: 1)") arg_parser.add_argument("--move", action="store_true", help="move input to per-volume directories") arg_parser.add_argument("-m", "--machine-readable", action="store_true", help="print the list of volumes in a machine-readable" " format; implies --quiet") arg_parser.add_argument("-v", "--verbose", action="store_true", default=True, help="be verbose (this is the default)") arg_parser.add_argument("-q", "--quiet", action="store_false", dest="verbose", help="print only warnings and errors") arg_parser.add_argument("filenames", metavar="file", nargs="+", help="files or directories to pack") options = arg_parser.parse_args() if options.machine_readable: options.verbose = False ### Preliminary statistics ### if options.verbose: print("Maximum size of a bin: {:.2f} MiB".format(options.maxbinsize)) # Compute the size of all the files sizes = defaultdict(list) totalsize = 0 ignored_files = False for filename in options.filenames: size = du(filename) if (size > options.maxbinsize): warn('"{}" is {:.2f} MiB, which exceeds the maximum size of a bin:' ' ignoring.' .format(filename, size)) ignored_files = True continue sizes[size].append(filename) totalsize += size if options.verbose: print("Total size of the input files: {:.2f} MiB".format(totalsize)) minbins = math.ceil(totalsize / options.maxbinsize) print("Minimal (optimal) number of bins required: ", minbins) sizefree = minbins * options.maxbinsize - totalsize print("Theoretical unused space with {} bins: {:.2f} MiB" .format(minbins, sizefree)) ### Assemble the bins ### bins = [] binnumber = options.first_bin_number binscreated = 0 while sizes: bins.append(create_bin(binnumber)) binnumber += 1 binscreated += 1 ### Final statistics ### if options.verbose: sizefree = binscreated * options.maxbinsize - totalsize print("""\ {} bins created. Actual unused space over the {} bins created: {:.2f} MiB""" .format(binscreated, binscreated, sizefree)) ### Execute the requested action ### # Print the contents of the bins if options.verbose or options.machine_readable: for b in bins: b.print() # Move the files if options.move: for b in bins: dirname = b.name() if os.path.exists(dirname) and not os.path.isdir(dirname): warn('File "{}" exists but is not a directory: skipping bin #{}.' .format(dirname, b.id)) continue # Create the target directory: try: os.makedirs(dirname, exist_ok=True) except FileExistsError: warn('Directory "{}" already exists with different permissions' ' (mode). Proceeding anyway.' .format(dirname)) # Move the files in the directory: for filename in b.list_files(): shutil.move(filename, dirname) ### Wrapping-up ### if (ignored_files): warn("There were ignored files (see prior messages).")