scripts/file_utils/dirpacker.py

216 lines
7.3 KiB
Python
Executable File

#!/usr/bin/python3
#
# dirpacker.py, Copyright © 2014 Matteo Cypriani <mcy@lm7.fr>
#
########################################################################
# This program is licensed under the terms of the Expat license.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
########################################################################
#
# This script groups input files by volumes of a given size. It is loosely
# inspired by datapacker but is able to work on directories rather than regular
# files only.
import argparse
import os
import shutil
from collections import defaultdict
import math
class Bin:
""" Represents a bin (a volume) containing files and directories.
"""
def __init__(self, bin_id, total_size, files):
"""bin_id is the bin's identifier, total_size the sum of the sizes of
all the files it contains, and files is a dictionary with file names
as the keys and sizes as values.
"""
self.id = bin_id
self.size = total_size
self.files = files
def print(self):
"""Displays the contents of the bin.
"""
print("\n### {} ###\n# List of files:".format(self.name()))
for filename, size in sorted(self.files.items()):
print("{} # {:.2f} MiB".format(filename, size))
print("# This bin's size: {:.2f} MiB".format(self.size))
sizefree = options.maxbinsize - self.size
print("# Free space: {:.2f} MiB".format(sizefree))
def name(self):
"""Returns the bin's name'
"""
return "{}{:02}".format(options.prefix, b.id)
def list_files(self):
"""Returns a list of the files contained in the bin.
"""
return list(self.files.keys())
def du(basepath):
""" Returns the size of the file of directory `basepath`, in MiB.
"""
size = os.path.getsize(basepath)
for root, dirs, files in os.walk(basepath):
for name in files + dirs:
fullname = os.path.join(root, name)
if not os.path.exists(fullname):
continue
size += os.path.getsize(fullname)
return size / 1024 / 1024
def create_bin(binnumber):
"""Creates a bin numbered `binnumber` from the global list of files and
sizes, and returns it.
"""
binsize = 0
files = {}
for size in reversed(sorted(sizes.keys())):
newbinsize = binsize + size
# Try the next, smaller file if we would exceed the bin's maximum size
# with this one:
if newbinsize > options.maxbinsize:
continue
files[sizes[size].pop()] = size
# Delete the size from the dictionary if its last file name was just
# popped:
if not len(sizes[size]):
del sizes[size]
binsize = newbinsize
return Bin(binnumber, binsize, files)
### Parse command-line arguments ###
arg_parser = argparse.ArgumentParser(
description="Packs files and directories into fixed-size volumes",
epilog="For more information about this program, see the README file"
"provided with the distribution.")
arg_parser.add_argument("-s", "--size", action="store", dest="maxbinsize",
type=float, default=703,
help="maximal size of each volume (bin), in MiB; the "
"default is 703 MiB, i.e. the size of a 80-minute "
"CD-ROM")
arg_parser.add_argument("--prefix", action="store", default="bin_",
help="prefix of a bin's name (default: \"bin_\")")
arg_parser.add_argument("--move", action="store_true",
help="move input to per-volume directories")
arg_parser.add_argument("-v", "--verbose", action="store_true", default=True,
help="be verbose (this is the default)")
arg_parser.add_argument("-q", "--quiet", action="store_false", dest="verbose",
help="print only warnings and errors")
arg_parser.add_argument("filenames", metavar="file", nargs="+",
help="files or directories to pack")
options = arg_parser.parse_args()
### Preliminary statistics ###
if options.verbose:
print("# Maximum size of a bin: {:.2f} MiB".format(options.maxbinsize))
# Compute the size of all the files
sizes = defaultdict(list)
totalsize = 0
ignored_files = False
for filename in options.filenames:
size = du(filename)
if (size > options.maxbinsize):
print('# WARNING! "{}" is {:.2f} MiB, which exceeds the maximum size of'
' a bin: ignoring.'
.format(filename, size))
ignored_files = True
continue
sizes[size].append(filename)
totalsize += size
if options.verbose:
print("# Total size of the input files: {:.2f} MiB".format(totalsize))
minbins = math.ceil(totalsize / options.maxbinsize)
print("# Minimal (optimal) number of bins required: ", minbins)
sizefree = minbins * options.maxbinsize - totalsize
print("# Theoretical unused space with {} bins: {:.2f} MiB"
.format(minbins, sizefree))
### Assemble the bins ###
bins = []
binnumber = 1
while len(sizes):
bins.append(create_bin(binnumber))
binnumber += 1
binscreated = binnumber - 1
### Final statistics ###
if options.verbose:
sizefree = binscreated * options.maxbinsize - totalsize
print("""\
# {} bins created.
# Actual unused space over the {} bins created: {:.2f} MiB"""
.format(binscreated, binscreated, sizefree))
### Execute the requested action ###
# Print the contents of the bins
if options.verbose:
for b in bins:
b.print()
# Move the files
if options.move:
for b in bins:
dirname = b.name()
if os.path.exists(dirname) and not os.path.isdir(dirname):
print('# WARNING! File "{}" exists but is not a directory:'
' skipping bin #{}.'
.format(dirname, b.id))
continue
# Create the target directory:
try:
os.makedirs(dirname, exist_ok=True)
except FileExistsError:
print('# WARNING! Directory "{}" already exists with different'
' permissions (mode). Proceeding anyway.'
.format(dirname))
# Move the files in the directory:
for filename in b.list_files():
shutil.move(filename, dirname)
### Wrapping-up ###
if (ignored_files):
print("\n# WARNING! There were ignored files, see at the top of this log.")