You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
245 lines
8.4 KiB
Python
245 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# dirpacker.py, Copyright © 2014 Matteo Cypriani <mcy@lm7.fr>
|
|
#
|
|
########################################################################
|
|
# This program is licensed under the terms of the Expat license.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
########################################################################
|
|
#
|
|
# This script groups input files by volumes of a given size. It is loosely
|
|
# inspired by datapacker but is able to work on directories rather than regular
|
|
# files only.
|
|
|
|
|
|
import argparse
|
|
import os
|
|
import shutil
|
|
import sys
|
|
from collections import defaultdict
|
|
import math
|
|
|
|
|
|
class Bin:
|
|
""" Represents a bin (a volume) containing files and directories.
|
|
"""
|
|
def __init__(self, bin_id, total_size, files):
|
|
"""bin_id is the bin's identifier, total_size the sum of the sizes of
|
|
all the files it contains, and files is a dictionary with file names
|
|
as the keys and sizes as values.
|
|
"""
|
|
self.id = bin_id
|
|
self.size = total_size
|
|
self.files = files
|
|
|
|
def print(self):
|
|
"""Displays the contents of the bin.
|
|
"""
|
|
if not options.machine_readable:
|
|
print("\n### {} ###\nList of files:".format(self.name()))
|
|
for filename, size in sorted(self.files.items()):
|
|
if options.machine_readable:
|
|
print(self.name(), end="")
|
|
else:
|
|
print("{:12.2f} MiB".format(size), end="")
|
|
print("\t{}".format(filename))
|
|
if not options.machine_readable:
|
|
sizefree = options.maxbinsize - self.size
|
|
print("This bin's size: {:.2f} MiB".format(self.size))
|
|
print("Unused: {:.2f} MiB".format(sizefree))
|
|
|
|
def name(self):
|
|
"""Returns the bin's name'
|
|
"""
|
|
return "{}{:02}".format(options.prefix, b.id)
|
|
|
|
def list_files(self):
|
|
"""Returns a list of the files contained in the bin.
|
|
"""
|
|
return list(self.files.keys())
|
|
|
|
|
|
def warn(*message, prefix="Warning!", **args):
|
|
"""Prints the message on the error output, prepended by 'prefix'.
|
|
|
|
The standard output is flushed prior to printing the error message, to
|
|
enable the messages to be displayed in the right order.
|
|
|
|
This function is a simple wrapper around print(), and you can use any
|
|
keyword argument you would use with print().
|
|
"""
|
|
sys.stdout.flush()
|
|
print(prefix, *message, file=sys.stderr, **args)
|
|
|
|
|
|
def du(basepath):
|
|
""" Returns the size of the file of directory `basepath`, in MiB.
|
|
"""
|
|
size = os.path.getsize(basepath)
|
|
for root, dirs, files in os.walk(basepath):
|
|
for name in files + dirs:
|
|
fullname = os.path.join(root, name)
|
|
if not os.path.exists(fullname):
|
|
continue
|
|
size += os.path.getsize(fullname)
|
|
return size / 1024 / 1024
|
|
|
|
|
|
def create_bin(binnumber):
|
|
"""Creates a bin numbered `binnumber` from the global list of files and
|
|
sizes, and returns it.
|
|
"""
|
|
binsize = 0
|
|
files = {}
|
|
for size in reversed(sorted(sizes.keys())):
|
|
newbinsize = binsize + size
|
|
# Try the next, smaller file if we would exceed the bin's maximum size
|
|
# with this one:
|
|
if newbinsize > options.maxbinsize:
|
|
continue
|
|
|
|
files[sizes[size].pop()] = size
|
|
# Delete the size from the dictionary if its last file name was just
|
|
# popped:
|
|
if not sizes[size]:
|
|
del sizes[size]
|
|
|
|
binsize = newbinsize
|
|
|
|
return Bin(binnumber, binsize, files)
|
|
|
|
|
|
### Parse command-line arguments ###
|
|
|
|
arg_parser = argparse.ArgumentParser(
|
|
description="Packs files and directories into fixed-size volumes",
|
|
epilog="For more information about this program, see the README file"
|
|
"provided with the distribution.")
|
|
arg_parser.add_argument("-s", "--size", action="store", dest="maxbinsize",
|
|
type=float, default=703,
|
|
help="maximal size of each volume (bin), in MiB; the "
|
|
"default is 703 MiB, i.e. the size of a 80-minute "
|
|
"CD-ROM")
|
|
arg_parser.add_argument("--prefix", action="store", default="bin_",
|
|
help="prefix of a bin's name (default: \"bin_\")")
|
|
arg_parser.add_argument("-n", "--first-bin-number", action="store", type=int,
|
|
default=1,
|
|
help="first number used for bin numbering (default: 1)")
|
|
arg_parser.add_argument("--move", action="store_true",
|
|
help="move input to per-volume directories")
|
|
arg_parser.add_argument("-m", "--machine-readable", action="store_true",
|
|
help="print the list of volumes in a machine-readable"
|
|
" format; implies --quiet")
|
|
arg_parser.add_argument("-v", "--verbose", action="store_true", default=True,
|
|
help="be verbose (this is the default)")
|
|
arg_parser.add_argument("-q", "--quiet", action="store_false", dest="verbose",
|
|
help="print only warnings and errors")
|
|
arg_parser.add_argument("filenames", metavar="file", nargs="+",
|
|
help="files or directories to pack")
|
|
options = arg_parser.parse_args()
|
|
|
|
if options.machine_readable:
|
|
options.verbose = False
|
|
|
|
|
|
### Preliminary statistics ###
|
|
|
|
if options.verbose:
|
|
print("Maximum size of a bin: {:.2f} MiB".format(options.maxbinsize))
|
|
|
|
# Compute the size of all the files
|
|
sizes = defaultdict(list)
|
|
totalsize = 0
|
|
ignored_files = False
|
|
for filename in options.filenames:
|
|
size = du(filename)
|
|
if (size > options.maxbinsize):
|
|
warn('"{}" is {:.2f} MiB, which exceeds the maximum size of a bin:'
|
|
' ignoring.'
|
|
.format(filename, size))
|
|
ignored_files = True
|
|
continue
|
|
sizes[size].append(filename)
|
|
totalsize += size
|
|
|
|
if options.verbose:
|
|
print("Total size of the input files: {:.2f} MiB".format(totalsize))
|
|
minbins = math.ceil(totalsize / options.maxbinsize)
|
|
print("Minimal (optimal) number of bins required: ", minbins)
|
|
sizefree = minbins * options.maxbinsize - totalsize
|
|
print("Theoretical unused space with {} bins: {:.2f} MiB"
|
|
.format(minbins, sizefree))
|
|
|
|
|
|
### Assemble the bins ###
|
|
|
|
bins = []
|
|
binnumber = options.first_bin_number
|
|
binscreated = 0
|
|
while sizes:
|
|
bins.append(create_bin(binnumber))
|
|
binnumber += 1
|
|
binscreated += 1
|
|
|
|
|
|
### Final statistics ###
|
|
|
|
if options.verbose:
|
|
sizefree = binscreated * options.maxbinsize - totalsize
|
|
print("""\
|
|
{} bins created.
|
|
Actual unused space over the {} bins created: {:.2f} MiB"""
|
|
.format(binscreated, binscreated, sizefree))
|
|
|
|
|
|
### Execute the requested action ###
|
|
|
|
|
|
# Print the contents of the bins
|
|
if options.verbose or options.machine_readable:
|
|
for b in bins:
|
|
b.print()
|
|
|
|
# Move the files
|
|
if options.move:
|
|
for b in bins:
|
|
dirname = b.name()
|
|
if os.path.exists(dirname) and not os.path.isdir(dirname):
|
|
warn('File "{}" exists but is not a directory: skipping bin #{}.'
|
|
.format(dirname, b.id))
|
|
continue
|
|
# Create the target directory:
|
|
try:
|
|
os.makedirs(dirname, exist_ok=True)
|
|
except FileExistsError:
|
|
warn('Directory "{}" already exists with different permissions'
|
|
' (mode). Proceeding anyway.'
|
|
.format(dirname))
|
|
# Move the files in the directory:
|
|
for filename in b.list_files():
|
|
shutil.move(filename, dirname)
|
|
|
|
|
|
### Wrapping-up ###
|
|
|
|
if (ignored_files):
|
|
warn("There were ignored files (see prior messages).")
|