From 5fee6133a50097dd50aa4a57b640d9f6c431ff4c Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Tue, 9 Apr 2024 18:33:43 +0200 Subject: [PATCH] script mail-compress imported --- mail-compress | 277 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100755 mail-compress diff --git a/mail-compress b/mail-compress new file mode 100755 index 0000000..5624bd1 --- /dev/null +++ b/mail-compress @@ -0,0 +1,277 @@ +#!/usr/bin/env ruby + +# Find the mails you want to compress in a single maildir. +# +# Skip files that don't have ,S= in the filename. +# +# Compress the mails to tmp/ +# +# Update the compressed files' mtimes to be the same as they were in the original files (e.g. touch command) +# +# Run maildirlock . It writes PID to stdout, save it. +# +# is path to the directory containing Maildir's dovecot-uidlist (the control directory, if it's separate) +# +# specifies how long to wait for the lock before failing. +# +# If maildirlock grabbed the lock successfully (exit code 0) you can continue. +# For each mail you compressed: +# +# Verify that it still exists where you last saw it. +# If it doesn't exist, delete the compressed file. Its flags may have been changed or it may have been expunged. This happens rarely, so just let the next run handle it. +# +# If the file does exist, rename() (mv) the compressed file over the original file. +# +# Dovecot can now read the file, but to avoid compressing it again on the next run, you'll probably want to rename it again to include e.g. a "Z" flag in the file name to mark that it was compressed (e.g. 1223212411.M907959P17184.host,S=3271:2,SZ). Remember that the Maildir specifications require that the flags are sorted by their ASCII value, although Dovecot itself doesn't care about that. +# +# Unlock the maildir by sending a TERM signal to the maildirlock process (killing the PID it wrote to stdout). + +## +## +## + +require 'pathname' +require 'set' + +def maildirlock path + return yield # currently deactivated, maildirlock does not work + io = IO.popen ['/usr/lib/dovecot/maildirlock', path.to_s, '10'], out: %i[child out] + lockpidfile = Pathname.new io.read + pid, pst = Process.wait2 + unless 0 == pst.exitstatus && lockpidfile.exist? + STDERR.puts "Locking maildir failed: #{mf}" + return + end + lockpid = lockpidfile.read.to_i + unless 0 == lockpid + STDERR.puts "Locking maildir failed (invalid pid): #{mf}" + return + end + begin + yield + ensure + Process.kill lockpid + end +end + +FileSigs = { + xz: 'FD 37 7A 58 5A 00', # will convert to anything else. + gz: '1F 8B', + lz4: '04 22 4D 18', + bz2: '42 5A 68', + zst: '28 B5 2F FD', +} +FileSigs.each do |e,s| + FileSigs[e] = s.gsub( /\s+/, '').each_char.each_slice(2).map {|x| x.join.to_i 16 } +end + +$before, $after = 0, 0 + +def log text + STDERR.puts "#{text}\e[J" +end + +def human_bytes b + o = b + return "#{b}B" if 786>b + b /= 1024.0 + %w[kiB MiB GiB TiB PiB EiB].each do |pf| + return "%.2f%s" % [b, pf] if 786>b + b /= 1024 + end + "#{o}ZiB" +end + +def zstd mf, tf + pid = Process.spawn 'zstd', '-19', umask: 0177, in: mf.to_s, out: tf.to_s, close_others: true + _pid, pst = Process.waitpid2( pid) + if 0 == pst.exitstatus + true + else + log " Compression failed for: #{mf}" + false + end +end + +def unxz mf, tf + pid = Process.spawn 'xz', '-d', umask: 0177, in: mf.to_s, out: tf.to_s, close_others: true + _pid, pst = Process.waitpid2( pid) + if 0 == pst.exitstatus + true + else + log " Compression failed for: #{mf}" + false + end +end + +def restat tf, stat + File.chown stat.uid, stat.gid, tf.to_s + File.chmod stat.mode, tf.to_s + File.utime stat.atime, stat.mtime, tf.to_s +end + +def place mf, nf, tf + raise "Placing mail refused: old and new filenames are equal: #{mf} | #{nf}" if mf == nf + maildirlock mf.maildir do + unless mf.exist? + log " Somebody removed the old mailfile, compression aborted: #{mf}" + return + end + + nf.link tf + mf.unlink + end + tf.unlink +end + +class MailFilename + attr_reader :maildir, :dir, :name, :flags, :size, :vsize, :v2 + + def initialize maildir, dir, name, flags, size=nil, vsize=nil, v2: nil + dir = dir.to_s + raise ArgumentError, "Invalid directory in maildir [#{maildir}]: #{dir.inspect}" unless %w[cur tmp new].include? dir + @maildir, @dir, @name, @flags, @size, @vsize, @v2 = maildir, dir, name, Set.new( flags), size, vsize, v2 + end + + def new maildir: nil, dir: nil, name: nil, flags: nil, size: nil, vsize: nil, compressed: nil, v2: nil + r = self.class.new maildir||@maildir, dir||@dir, name||@name, flags||@flags, size||@size, vsize||@vsize, v2: (v2.nil? ? @v2 : ! ! vs) + r.compressed = compressed unless compressed.nil? + r + end + + def compressed= val + if val + @flags.add 'Z' + else + @flags.delete 'Z' + end + end + + def size= val + raise ArgumentError, "Mail S=size must be an numeric value" unless Integer === val + @size = val + end + + def vsize= val + raise ArgumentError, "Mail W=vsize must be an numeric value" unless Integer === val + @vsize = val + end + + def compressed?() @flags.include? 'Z' end + def v2=( val) @v2 = ! ! val end + def v2?() @v2 end + def fullname() File.join @maildir, @dir, basename end + alias to_s fullname + def dirname() File.join @maildir, @dir end + def exist?() File.exist? to_s end + def stat() File.stat to_s end + def read(v=nil) File.read to_s, v end + def link( old) File.link old.to_s, self.to_s end + def unlink() File.unlink to_s end + + def basename() + "#{@name}#{@size ? ",S=#{@size}" : ''}#{@vsize ? ",W=#{@vsize}" : ''}#{@v2 ? ":2,#{@flags.sort.join}" : ''}" + end + + def compress_algorithm + cnt = read( 8).each_byte.to_a + FileSigs.find {|e,s| s == cnt[0...s.length] }&.first + end + + def self.parse pathname, maildir=nil, dir=nil + filename = + if maildir + pathname.to_s + else + d = File.dirname pathname + maildir, dir = File.dirname(d), File.basename(d) + File.basename pathname + end + return nil unless /\A([^.].*?)(?::2,(\w*)|)\z/ =~ filename + n, f = $1, $2 + v2 = ! f.nil? + fl = f.to_s.chars + name, *fs = n.split ',' + s, w = nil, nil + fs.each do|v| + return nil unless v =~ /\A([SW])=(\d+)\z/ + case $1 + when 'S' then s = $2.to_i + when 'W' then w = $2.to_i + end + end + new maildir, dir, name, fl, s, w, v2: v2 + end +end + +def process xf + return unless mf = MailFilename.parse( xf.to_s) + raise "Parsed Pathname does not equal original Pathname: #{mf} | #{xf}" unless mf.to_s == xf.to_s + stat = mf.stat + fs = mf.compress_algorithm + + unless mf.v2? + x = mf.to_s + mf.v2 = true + log " Old file name pattern #{x} -> #{mf}" + File.rename x, mf.to_s + end + + if fs + if mf.compressed? + $before += mf.size || stat.size + $after += stat.size + return unless nil == fs or :xz == fs + else + x = mf.to_s + mf.compressed = true + log " File has signature of #{fs} but has no Z-flag: #{x} => #{mf}" + File.rename x, mf.to_s + end + end + + if :xz == fs + nf = mf.new compressed: false + tf = nf.new dir: :tmp + return log " Uncompressed mail already exist: #{nf}" if nf.exist? + return log " Temporary file already exist: #{tf}" if tf.exist? + STDERR.print "<\r" + return unless unxz mf.to_s, tf.to_s + STDERR.print "<<\r" + restat tf, stat + place mf, nf, tf + STDERR.print "<<<\r" + return process(nf) + end + + STDERR.printf " [%s => %s | %f] %s\e[J\r", human_bytes($before), human_bytes($after), begin $after.to_f/$before; rescue ZeroDivisionError; 0; end, mf + $before += mf.size || stat.size + $after += stat.size + tf, nf = mf.new( dir: :tmp), mf.new( dir: :cur) + nf.compressed = true + + return if stat.size < stat.blksize + return log " Compressed mail already exist: #{nf}" if nf.exist? + return log " Temporary file already exist: #{tf}" if tf.exist? + + STDERR.print ".\r" + return unless zstd mf, tf + STDERR.print "..\r" + restat tf, stat + $after += tf.stat.size - stat.size + place mf, nf, tf + STDERR.print "...\r" +end + +basedir = Pathname.new ARGV[0] +basedir.find do |path| + d = path.dirname + next unless %w[cur new].include?( d.basename.to_s) + d = d.dirname + next unless d.join('tmp').exist? + next unless d.join('new').exist? + next unless d.join('cur').exist? + process path +end + +STDERR.printf "[%s => %s | %f]\e[J\n", human_bytes($before), human_bytes($after), begin $after.to_f/$before; rescue ZeroDivisionError; 0; end