mail-compress/mail-compress
2024-04-09 21:43:50 +02:00

302 lines
8.3 KiB
Ruby
Executable file

#!/usr/bin/env ruby
# Find the mails you want to compress in a single maildir.
#
# Skip files that don't have ,S=<size> in the filename.
#
# Compress the mails to tmp/
#
# Update the compressed files' mtimes to be the same as they were in the original files (e.g. touch command)
#
# Run maildirlock <path> <timeout>. It writes PID to stdout, save it.
#
# <path> is path to the directory containing Maildir's dovecot-uidlist (the control directory, if it's separate)
#
# <timeout> specifies how long to wait for the lock before failing.
#
# If maildirlock grabbed the lock successfully (exit code 0) you can continue.
# For each mail you compressed:
#
# Verify that it still exists where you last saw it.
# If it doesn't exist, delete the compressed file. Its flags may have been changed or it may have been expunged. This happens rarely, so just let the next run handle it.
#
# If the file does exist, rename() (mv) the compressed file over the original file.
#
# Dovecot can now read the file, but to avoid compressing it again on the next run, you'll probably want to rename it again to include e.g. a "Z" flag in the file name to mark that it was compressed (e.g. 1223212411.M907959P17184.host,S=3271:2,SZ). Remember that the Maildir specifications require that the flags are sorted by their ASCII value, although Dovecot itself doesn't care about that.
#
# Unlock the maildir by sending a TERM signal to the maildirlock process (killing the PID it wrote to stdout).
##
## <http://ivaldi.nl/blog/2011/12/06/compressed-mail-in-dovecot/>
##
require 'pathname'
require 'set'
def maildirlock path
return yield # currently deactivated
io = IO.popen ['/usr/lib/dovecot/maildirlock', path.to_s, '10'], out: %i[child out]
lockpidfile = Pathname.new io.read
pid, pst = Process.wait2
unless 0 == pst.exitstatus && lockpidfile.exist?
STDERR.puts "Locking maildir failed: #{mf}"
return
end
lockpid = lockpidfile.read.to_i
unless 0 == lockpid
STDERR.puts "Locking maildir failed (invalid pid): #{mf}"
return
end
begin
yield
ensure
Process.kill lockpid
end
end
FileSigs = {
xz: 'FD 37 7A 58 5A 00', # will convert to anything else.
gz: '1F 8B',
lz4: '04 22 4D 18',
bz2: '42 5A 68',
zst: '28 B5 2F FD',
}
FileSigs.each do |e,s|
FileSigs[e] = s.gsub( /\s+/, '').each_char.each_slice(2).map {|x| x.join.to_i 16 }
end
$before, $after = 0, 0
def log text
STDERR.puts "#{text}\e[J"
end
def human_bytes b
o = b
return "#{b}B" if 786>b
b /= 1024.0
%w[kiB MiB GiB TiB PiB EiB].each do |pf|
return "%.2f%s" % [b, pf] if 786>b
b /= 1024
end
"#{o}ZiB"
end
def zstd mf, tf
pid = Process.spawn 'zstd', '-19', umask: 0177, in: mf.to_s, out: tf.to_s, close_others: true
_pid, pst = Process.waitpid2( pid)
if 0 == pst.exitstatus
true
else
log " Compression failed for: #{mf}"
false
end
end
def unxz mf, tf
pid = Process.spawn 'xz', '-d', umask: 0177, in: mf.to_s, out: tf.to_s, close_others: true
_pid, pst = Process.waitpid2( pid)
if 0 == pst.exitstatus
true
else
log " Decompression [xz] failed for: #{mf}"
false
end
end
def unzstd mf, tf
pid = Process.spawn 'zstd', '-d', umask: 0177, in: mf.to_s, out: tf.to_s, close_others: true
_pid, pst = Process.waitpid2( pid)
if 0 == pst.exitstatus
true
else
log " Decompression [zstd] failed for: #{mf}"
false
end
end
def restat tf, stat
File.chown stat.uid, stat.gid, tf.to_s
File.chmod stat.mode, tf.to_s
File.utime stat.atime, stat.mtime, tf.to_s
end
def place mf, nf, tf
raise "Placing mail refused: old and new filenames are equal: #{mf} | #{nf}" if mf == nf
maildirlock mf.maildir do
unless mf.exist?
log " Somebody removed the old mailfile, compression aborted: #{mf}"
return
end
nf.link tf
mf.unlink
end
tf.unlink
end
class MailFilename
attr_reader :maildir, :dir, :name, :flags, :size, :vsize, :v2
def initialize maildir, dir, name, flags, size=nil, vsize=nil, v2: nil
dir = dir.to_s
raise ArgumentError, "Invalid directory in maildir [#{maildir}]: #{dir.inspect}" unless %w[cur tmp new].include? dir
@maildir, @dir, @name, @flags, @size, @vsize, @v2 = maildir, dir, name, Set.new( flags), size, vsize, v2
end
def new maildir: nil, dir: nil, name: nil, flags: nil, size: nil, vsize: nil, compressed: nil, v2: nil
r = self.class.new maildir||@maildir, dir||@dir, name||@name, flags||@flags, size||@size, vsize||@vsize, v2: (v2.nil? ? @v2 : ! ! vs)
r.compressed = compressed unless compressed.nil?
r
end
def compressed= val
if val
@flags.add 'Z'
else
@flags.delete 'Z'
end
end
def size= val
raise ArgumentError, "Mail S=size must be an numeric value" unless Integer === val
@size = val
end
def vsize= val
raise ArgumentError, "Mail W=vsize must be an numeric value" unless Integer === val
@vsize = val
end
def compressed?() @flags.include? 'Z' end
def v2=( val) @v2 = ! ! val end
def v2?() @v2 end
def fullname() File.join @maildir, @dir, basename end
alias to_s fullname
def dirname() File.join @maildir, @dir end
def exist?() File.exist? to_s end
def stat() File.stat to_s end
def read(v=nil) File.read to_s, v end
def link( old) File.link old.to_s, self.to_s end
def unlink() File.unlink to_s end
def basename()
"#{@name}#{@size ? ",S=#{@size}" : ''}#{@vsize ? ",W=#{@vsize}" : ''}#{@v2 ? ":2,#{@flags.sort.join}" : ''}"
end
def compress_algorithm
cnt = read( 8).each_byte.to_a
FileSigs.find {|e,s| s == cnt[0...s.length] }&.first
end
def self.parse pathname, maildir=nil, dir=nil
filename =
if maildir
pathname.to_s
else
d = File.dirname pathname
maildir, dir = File.dirname(d), File.basename(d)
File.basename pathname
end
return nil unless /\A([^.].*?)(?::2,(\w*)|)\z/ =~ filename
n, f = $1, $2
v2 = ! f.nil?
fl = f.to_s.chars
name, *fs = n.split ','
s, w = nil, nil
fs.each do|v|
return nil unless v =~ /\A([SW])=(\d+)\z/
case $1
when 'S' then s = $2.to_i
when 'W' then w = $2.to_i
end
end
new maildir, dir, name, fl, s, w, v2: v2
end
end
def process xf
return unless mf = MailFilename.parse( xf.to_s)
raise "Parsed Pathname does not equal original Pathname: #{mf} | #{xf}" unless mf.to_s == xf.to_s
stat = mf.stat
fs = mf.compress_algorithm
unless mf.v2?
x = mf.to_s
mf.v2 = true
log " Old file name pattern #{x} -> #{mf}"
File.rename x, mf.to_s
end
if fs
if mf.compressed?
$before += mf.size || stat.size
$after += stat.size
return unless nil == fs or :xz == fs
else
x = mf.to_s
mf.compressed = true
log " File has signature of #{fs} but has no Z-flag: #{x} => #{mf}"
File.rename x, mf.to_s
end
end
if false and :zst == fs
nf = mf.new compressed: false
tf = nf.new dir: :tmp
return log " Uncompressed mail already exist: #{nf}" if nf.exist?
return log " Temporary file already exist: #{tf}" if tf.exist?
STDERR.print "<\r"
return unless unzstd mf.to_s, tf.to_s
STDERR.print "<<\r"
restat tf, stat
place mf, nf, tf
STDERR.print "<<<\r"
return process(nf)
end
if :xz == fs
nf = mf.new compressed: false
tf = nf.new dir: :tmp
return log " Uncompressed mail already exist: #{nf}" if nf.exist?
return log " Temporary file already exist: #{tf}" if tf.exist?
STDERR.print "<\r"
return unless unxz mf.to_s, tf.to_s
STDERR.print "<<\r"
restat tf, stat
place mf, nf, tf
STDERR.print "<<<\r"
return process(nf)
end
STDERR.printf " [%s => %s | %f] %s\e[J\r", human_bytes($before), human_bytes($after), begin $after.to_f/$before; rescue ZeroDivisionError; 0; end, mf
$before += mf.size || stat.size
$after += stat.size
tf, nf = mf.new( dir: :tmp), mf.new( dir: :cur)
nf.compressed = true
return if stat.size < stat.blksize
return log " Compressed mail already exist: #{nf}" if nf.exist?
return log " Temporary file already exist: #{tf}" if tf.exist?
STDERR.print ".\r"
return unless zstd mf, tf
STDERR.print "..\r"
restat tf, stat
$after += tf.stat.size - stat.size
place mf, nf, tf
STDERR.print "...\r"
end
basedir = Pathname.new ARGV[0]
basedir.find do |path|
d = path.dirname
next unless %w[cur new].include?( d.basename.to_s)
d = d.dirname
next unless d.join('tmp').exist?
next unless d.join('new').exist?
next unless d.join('cur').exist?
process path
end
STDERR.printf "[%s => %s | %f]\e[J\n", human_bytes($before), human_bytes($after), begin $after.to_f/$before; rescue ZeroDivisionError; 0; end