collector splitted into dovecot, postfix and general files/classes. More unknown loglines eliminated. Dependency updates.

This commit is contained in:
Denis Knauf 2022-05-19 14:02:17 +02:00
parent 7232802176
commit f057ce701f
8 changed files with 719 additions and 232 deletions

2
.gitignore vendored
View file

@ -1,2 +1,4 @@
*.sw[opqnm] *.sw[opqnm]
*~ *~
/.bundle
/sbin

View file

@ -4,3 +4,4 @@ gem 'systemd-journal' #, '~> 1.3.0'
gem 'prometheus-client' gem 'prometheus-client'
gem 'rack' gem 'rack'
gem 'puma' gem 'puma'
gem 'sd_notify'

View file

@ -7,6 +7,7 @@ GEM
puma (5.6.4) puma (5.6.4)
nio4r (~> 2.0) nio4r (~> 2.0)
rack (2.2.3) rack (2.2.3)
sd_notify (0.1.1)
systemd-journal (1.4.2) systemd-journal (1.4.2)
ffi (~> 1.9) ffi (~> 1.9)
@ -17,6 +18,7 @@ DEPENDENCIES
prometheus-client prometheus-client
puma puma
rack rack
sd_notify
systemd-journal systemd-journal
BUNDLED WITH BUNDLED WITH

View file

@ -2,8 +2,20 @@ require 'rack'
require './postfix_exporter' require './postfix_exporter'
require 'socket' require 'socket'
collector = Collector.new class Settings
th = collector.start def initialize environment
@environment = environment.to_s.to_sym
end
attr_reader :environment
alias :env :environment
def development?() :development == environment end
def production?() :production == environment end
def test?() :test == environment end
end
settings = Settings.new ENV['RACK_ENV']
collector = Collector.new settings: settings
collector.start
showqpath = '/var/spool/postfix/public/showq' showqpath = '/var/spool/postfix/public/showq'
prometheus = collector.prometheus prometheus = collector.prometheus
metrics = OpenStruct.new( metrics = OpenStruct.new(

76
lib/collector.rb Normal file
View file

@ -0,0 +1,76 @@
class Collector
attr_reader :journal, :prometheus, :settings
class PrefixProxy
attr_reader :prometheus, :prefix
def initialize prometheus, prefix
@prometheus, @prefix = prometheus, prefix
end
def counter name, **options
@prometheus.counter :"#{prefix}_#{name}", **options
end
def gauge name, **options
@prometheus.gauge :"#{prefix}_#{name}", **options
end
def histogram name, **options
@prometheus.histogram :"#{prefix}_#{name}", **options
end
def summary name, **options
@prometheus.summary :"#{prefix}_#{name}", **options
end
end
def self.start prometheus: nil, journal: nil
self.new( prometheus: prometheus, journal: journal).start
end
def initialize prometheus: nil, journal: nil, settings: nil
@settings = settings
@journal = journal || Systemd::Journal.new( flags: Systemd::Journal::Flags::SYSTEM_ONLY)
@prometheus = prometheus || Prometheus::Client.registry
@store = {}
@errors = @prometheus.counter :postfix_exporter_errors_total, docstring: 'Count internal errors/exceptions'
@dovecot = Dovecot.new @store, PrefixProxy.new( @prometheus, :dovecot)
@postfix = Postfix.new @store, PrefixProxy.new( @prometheus, :postfix)
end
def run
@journal.seek :tail
@journal.move_previous
@journal.watch do |entry|
case entry._systemd_unit
when 'dovecot.service'
@dovecot.collect entry
when 'postfix@-.service'
@postfix.collect entry
else
if @settings.nil? or @settings.development?
STDERR.puts "# unit: #{entry._systemd_unit}: #{entry.syslog_identifier} #{entry.message}"
end
end
end
end
def start
Thread.abort_on_exception = true
Thread.new do
begin
run
rescue SystemExit, Interrupt
raise
rescue Object
@errors.increment
STDERR.puts "#$! (#{$!.class})", $!.backtrace.map {|x| " in #{x}"}
retry
end
end
end
end
require_relative 'collector/postfix'
require_relative 'collector/dovecot'

171
lib/collector/dovecot.rb Normal file
View file

@ -0,0 +1,171 @@
class Collector::Dovecot
class Sieve
def initialize store, prometheus
@store = store
@stored_into_mailbox = prometheus.counter :stored_into_mailbox_total, docstring: 'A counter of mails stored in mailbox by sieve', labels: %i[process]
@forwards = prometheus.counter :forwared_mails_total, docstring: 'A counter of mails forwareded to other address', labels: %i[process]
@discarded_duplicate_forward = prometheus.counter :discarded_duplicate_forward_total, docstring: 'A counter of discarded duplicates, which will not be forwarded.', labels: %i[process]
%w[lmtp deliver].each do |p|
@stored_into_mailbox.increment by: 0, labels: {process: p}
@forwards.increment by: 0, labels: {process: p}
@discarded_duplicate_forward.increment by: 0, labels: {process: p}
end
end
def collect entry, process, msg
case msg
when / stored mail into mailbox /
# dovecot.service dovecot lmtp sieve: lmtp(dillo@nfotex.com)<935639><hhtGGv2ZVmLXRg4AQx2OcA>: sieve: msgid=<1649842684455734173.18148473471766045120@vlmpaymp001.at.inside>: stored mail into mailbox
@stored_into_mailbox.increment labels: {process: process}
when / forwarded to /
@forwards.increment labels: {process: process}
when / discarded duplicate forward to /
@discarded_duplicate_forward.increment labels: {process: process}
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} sieve| #{entry.message}"
end
end
end
class Delivery
def initialize store, prometheus, sieve, saved_mailbox, process
@store, @sieve, @process = store, sieve, process
@connect = prometheus.counter "#{process}_connect_total", docstring: "A counter of connection via #{process}"
@disconnect = prometheus.counter "#{process}_disconnect_total", docstring: "A counter of disconnect at #{process}"
@saved_mail_to_mailbox = saved_mailbox
end
def collect entry, msg
case msg
when /\AConnect from /
@connect.increment
when /\ADisconnect from /
@disconnect.increment
when /saved mail to /
@saved_mail_to_mailbox.increment labels: {process: @process}
when /\Asieve: (.*)/
@sieve.collect entry, @process, $1
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} delivery| #{entry.message}"
end
end
end
class Imap
def initialize store, prometheus
@connection_closed = prometheus.counter :connection_closed_total, docstring: 'A counter of closed connection on dovecot'
@inactivity = prometheus.counter :disconnected_inactivity_total, docstring: 'A counter for disconnect for inactivity.'
@connection_stats = prometheus.counter :connection_stats_total, docstring: 'A counter for observed statistics after disconnected.', labels: %i[disconnect_reason]
@logged_out = prometheus.counter :logged_out_total, docstring: 'A counter of logouts on dovecot'
@maildir_scanning_took_long = prometheus.summary :maildir_scanning_took_long_total, docstring: 'A summary of long taken maildir scanning by reason (why).', labels: %i[why]
@maildir_scanning_took_long_rename = prometheus.counter :maildir_scanning_took_long_renames_total, docstring: 'A counter of rename()-calls while long taken mail dir scanning'
@maildir_scanning_took_long_readdir = prometheus.counter :maildir_scanning_took_long_readdirs_total, docstring: 'A counter of readdir()-calls while long taken mail dir scanning'
disconnect_reasons = ['logged out', 'connection closed', 'inactivity']
disconnect_reasons.each {|r| @connection_stats.increment by: 0, labels: {disconnect_reason: r} }
@connection_stats =
Hash[ *%i[in out deleted expunged trashed hdr_count hdr_bytes body_count body_bytes].flat_map {|t|
[t, prometheus.counter( :"connection_stats_#{t}_total", docstring: "Counter for #{t} statistics observed after disconnected")]
}]
end
def collect entry, msg
case msg
when /\ALogged out (.*)/
# imap(srv_rt0@nfotex.com)<936759><IXxbn4bc4roqAQGQAGoGAWAUP//++Cw+>: Logged out in=38 out=804 deleted=0 expunged=0 trashed=0 hdr_count=0 hdr_bytes=0 body_count=0 body_bytes=0
@logged_out.increment
$1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f }
when /\AConnection closed \([^)]+\) (.*)/
# imap(johannes@nfotex.com)<936668><R4OTgIbcCKRQbb9l>: Connection closed (EXAMINE finished 0.041 secs ago) in=5253 out=390971 deleted=0 expunged=0 trashed=0 hdr_count=14 hdr_bytes=6569 body_count=14 body_bytes=336589
@connection_closed.increment
$1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f }
when /\AConnection closed: .* failed: \([^)]+\) (.*)/
# imap(wiz@nfotex.com)<1447340><0OOGu+XeasMqAoOIC8CCAHA59y+2iMag>: Connection closed: read(size=6100) failed: Connection reset by peer (UID FETCH finished 0.159 secs ago) in=2982 out=17044659 deleted=0 expunged=0 trashed=0 hdr_count=1 hdr_bytes=3724 body_count=169 body_bytes=16970415
@connection_closed.increment
$1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f }
when /\ADisconnected for inactivity (.*)/
@inactivity.increment
$1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f }
when /\AWarning: Maildir: Scanning .+? took (?<took>\d+) seconds \((?<readdir>\d+) readdir\(\)s, (?<rename>\d+) rename\(\)s to cur\/, why=0x(?<why>[0-9a-fA-F]+)\)/
# Warning: Maildir: Scanning /var/mail/nfotex.com/wiz/mails/.Updates/cur took 49 seconds (86044 readdir()s, 0 rename()s to cur/, why=0x80)
m = $~
@maildir_scanning_took_long.observe m[:took].to_i, labels: m[:why].to_i(16)
@maildir_scanning_took_long_rename.increment by: m[:rename].to_i
@maildir_scanning_took_long_readdir.increment by: m[:readdir].to_i
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} imap| #{entry.message}"
end
end
end
class ImapLogin
def initialize store, prometheus
@store = store
@logged_in = prometheus.counter :logged_in_total, docstring: 'A counter of successfull logins to dovecot'
@aborted = prometheus.counter :login_aborted_total, docstring: 'A counter of aborted logins'
@disconnected = prometheus.counter :login_disconnected_total, docstring: 'A counter of disconnections before successfully logged in', labels: %i[reason]
end
def collect entry, msg
case msg
when /\ALogin: user=/
@logged_in.increment
when /\ADisconnected \((.*?)\): user=</
case $1
when /\Ano auth attempts/
@disconnected.increment labels: {reason: 'no auth attempts'}
when /\Aauth failed/
@disconnected.increment labels: {reason: 'auth failed'}
when /\AToo many invalid commands /
@disconnected.increment labels: {reason: 'too many invalid commands'}
when /\AInactivity /
@disconnected.increment labels: {reason: 'inactivity'}
when /\ADisconnected /
@disconnected.increment labels: {reason: 'disconnected'}
else
@disconnected.increment labels: {reason: '<any>'}
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} Disconnected before login| #{entry.message}"
end
when /\AAborted login/
@aborted.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} imap-login| #{entry.message}"
end
end
end
def initialize store, prometheus
@store = store
@sieve = Sieve.new store, Collector::PrefixProxy.new( prometheus, :sieve)
@saved_mail_to_mailbox = prometheus.counter :saved_mail_to_mailbox_total, docstring: "A counter of saved mails to mailbox directly", labels: %i[process]
@lmtp = Delivery.new store, prometheus, @sieve, @saved_mail_to_mailbox, :lmtp
@deliver = Delivery.new store, prometheus, @sieve, @saved_mail_to_mailbox, :deliver
@imap_login = ImapLogin.new store, prometheus
@imap = Imap.new store, prometheus
end
def collect entry
# STDERR.puts "dovecot| #{entry.message}"
case entry.message
when /\Aimap-login: (.*)/
@imap_login.collect entry, $1
when /\Aauth: Error: (.*)/
case $1
when /\ALDAP: Connection lost to LDAP server, /
@auth_ldap_connection_lost.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}| #{entry.message}"
@auth_errors.increment
end
when /\Aimap\([^)]+\)(?:<[^ ]+>)?: (.*)/
@imap.collect entry, $1
when /\Almtp\([^ ]+\)<[^ ]+>: (.*)/
@lmtp.collect entry, $1
when /\Almtp\([^ ]+\): (.*)/
@lmtp.collect entry, $1
when /\Adeliver(?:[^:]+): (.*)/
@deliver.collect entry, $1
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}| #{entry.message}"
end
end
end

450
lib/collector/postfix.rb Normal file
View file

@ -0,0 +1,450 @@
class Collector::Postfix
class Noqueue
def initialize store, prometheus
@store = store
@noqueue = prometheus.counter :total, docstring: 'Total noqueued by reasons.', labels: %i[reason]
@codes = prometheus.counter :status_code_total, docstring: 'Total noqueued by status code', labels: %i[status_code enhanced_status_code]
['dnsbl', 'no reverse hostname', 'user does not exist', '<any>'].each {|r| @noqueue.increment by: 0, labels: {reason: r} }
end
def collect entry
case entry.message
when /\ANOQUEUE: reject: RCPT from (?:[^ ]+): (\d+) (\d+\.\d+\.\d+) (?:[^ ]+): (.*?),/
code, enh = $1, $2
@codes.increment labels: {status_code: code, enhanced_status_code: enh}
end
case entry.message
when /\ANOQUEUE: reject: RCPT from .* blocked using /
@noqueue.increment labels: {reason: 'dnsbl'}
when /\ANOQUEUE: reject: RCPT from .* Message rejected due to: SPF fail - not authorized\. /
@noqueue.increment labels: {reason: 'spf fail'}
when / Client host rejected: cannot find your reverse hostname/
@noqueue.increment labels: {reason: 'no reverse hostname'}
when / User doesn't exist: /
@noqueue.increment labels: {reason: 'user does not exist'}
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} NOQUEUE: #{entry.message}"
@noqueue.increment labels: {reason: '<any>'}
end
end
end
class Cache
def initialize store, prometheus
@store = store
@full_cleanup = prometheus.counter :full_cleanup_total, docstring: 'A counter of total cache cleanups', labels: %i[file]
@full_cleanup_retained = prometheus.gauge :full_cleanup_retained_entries, docstring: 'Retained entries of last cache cleanups', labels: %i[file]
@full_cleanup_dropped = prometheus.counter :full_cleanup_dropped_entries_total, docstring: 'Total dropped entries of cache cleanups', labels: %i[file]
end
def collect entry
case entry.message
when /\Acache (.*) full cleanup: retained=(\d+) dropped=(\d+) entries/
# postscreen: cache lmdb:/var/lib/postfix/postscreen_cache full cleanup: retained=128 dropped=14 entries
file, retained, dropped = $1, $2.to_f, $3.to_f
@full_cleanup.increment labels: {file: file}
@full_cleanup_retained.increment by: retained, labels: {file: file}
@full_cleanup_dropped.increment by: dropped, labels: {file: file}
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} cache: #{entry.message}"
end
end
end
class Postscreen
def initialize store, prometheus
@store = store
@noqueue = Noqueue.new store, Collector::PrefixProxy.new( prometheus, :noqueue)
@cache = Cache.new store, Collector::PrefixProxy.new( prometheus, :cache)
@connect_from = prometheus.counter :connect_from_total, docstring: 'A counter of connections to postscreen'
@whitelisted = prometheus.counter :whitelisted_total, docstring: 'A counter of WHITELISTED connections to postscreen'
@pass_old = prometheus.counter :pass_old_total, docstring: 'A counter of PASS OLD connections to postscreen'
@pass_new = prometheus.counter :pass_new_total, docstring: 'A counter of PASS NEW connections to postscreen'
@dnsbl = prometheus.counter :dnsbl_total, docstring: 'A counter of DNSBL-blocked to postscreen'
@bare_newline = prometheus.counter :bare_newline_total, docstring: 'A counter of BARE NEWLINE-blocked to postscreen'
@command_pipelining = prometheus.counter :command_pipelining_total, docstring: 'A counter of COMMAND PIPELINING-blocked to postscreen'
@command_time_limit = prometheus.counter :command_time_limit_total, docstring: 'A counter of COMMAND TIME LIMIT-blocked to postscreen'
@hangup = prometheus.counter :hangup_total, docstring: 'A counter of HANGUP to postscreen'
@bdat = prometheus.counter :bdat_total, docstring: 'A counter of BDAT to postscreen'
@pregreet = prometheus.counter :pregreet_total, docstring: 'A counter of PREGREET to postscreen'
@disconnect = prometheus.counter :disconnect_total, docstring: 'A counter of DISCONNECT to postscreen'
@unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by postscreen'
@psc_cache_update_delay = prometheus.summary :psc_cache_update_delay_total, docstring: 'A counter of PSC cache update delays by file', labels: %i[file]
@curr_unavailable = prometheus.counter :service_currently_unavailable_total, docstring: 'A counter for rejected mails, because service currently unavailable - so greylisted.'
@dnsblog_reply_timeout = prometheus.counter :dnsblog_reply_timeout_total, docstring: 'Total timedout requests for dnsblog'
@data_without_valid_rcpt= prometheus.counter :data_without_valid_rcpt_total, docstring: 'A counter of DATA without valid RCPT events'
@warnings = prometheus.counter :warnings_total, docstring: 'A counter of any warnings'
end
def collect entry
#STDERR.puts "postscreen: #{entry.message}"
case entry.message
when /\ACONNECT from /
@connect_from.increment
when /\AWHITELISTED /
@whitelisted.increment
when /\APASS OLD /
@pass_old.increment
when /\APASS NEW /
@pass_new.increment
when /\ADISCONNECT /
@disconnect.increment
when /\APREGREET /
@pregreet.increment
when /\ABDAT /
@bdat.increment
when /\ANOQUEUE: (.*)/
case msg = $1
when /\Areject: RCPT from [^ ]+: 450 4.3.2 Service currently unavailable; /
@curr_unavailable.increment
else
@noqueue.collect entry
end
when /\AHANGUP /
@hangup.increment
when /\ADNSBL rank /
@dnsbl.increment
when /\ABARE NEWLINE /
@bare_newline.increment
when /\ACOMMAND PIPELINING /
@command_pipelining.increment
when /\ACOMMAND TIME LIMIT /
@command_time_limit.increment
when /\ADATA without valid RCPT /
@data_without_valid_rcpt.increment
when /\Awarning: (.*)/
@warnings.increment
case $1
when /\Apsc_cache_update: ([^ ]+) update average delay is ([^ ]+)/
@psc_cache_update_delay.observe $2.to_f, labels: {file: $1}
when /\Adnsblog reply timeout /
@dnsblog_reply_timeout.increment
when /\Agetpeername: Transport endpoint is not connected -- dropping this connection/
else
STDERR.puts "# postscreen warnings: #{entry.message}"
end
when /\Acache /
@cache.collect entry
else
STDERR.puts "# postscreen: #{entry.message}"
@unknown.increment
end
end
end
def self.tls_posibilities
%w[Trusted Untrusted Anonymous].each do |trust|
%w[TLSv1.2 TLSv1.3].each do |tls|
%w[TLS_AES_128_GCM_SHA256 TLS_AES_256_GCM_SHA384 ECDHE-RSA-AES128-GCM-SHA256 ECDHE-RSA-AES256-GCM-SHA384].each do |cipher|
yield trust, tls, cipher
end
end
end
end
class Smtp
def initialize store, prometheus
@store = store
@connection_refused = prometheus.counter :connection_refused_total, docstring: 'A counter of refused connections on smtp'
@connection_timed_out = prometheus.counter :connection_timed_out_total, docstring: 'A counter of timed out connections on smtp'
@tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections on smtp with TLS-version and cipher', labels: %i[trust tls cipher]
Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} }
@status = prometheus.histogram :status, docstring: 'A histogram of message status by status', labels: %i[status]
@sent = prometheus.counter :sent_total, docstring: 'A counter of sent messages by smtp'
@deferred = prometheus.counter :deferred_total, docstring: 'A counter of deferred messages by smtp'
@bounced = prometheus.counter :bounced_total, docstring: 'A counter of bounced messages by smtp'
@deliverable = prometheus.counter :deliverable_total, docstring: 'A counter of deliverable messages by smtp'
@undeliverable = prometheus.counter :undeliverable_total, docstring: 'A counter of undeliverable messages by smtp'
@status_unknown = prometheus.counter :status_unknown_total, docstring: 'A counter of unknown status by smtp'
@unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by smtp'
end
def collect entry
#STDERR.puts "smtp: #{entry.message}"
case entry.message
when /\Aconnect to /
case entry.message
when / Connection refused\z/
@connection_refused.increment
when / Connection timed out\z/
@connection_timed_out.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} connect to: #{entry.message}"
end
when /\A([^ ]+) TLS connection established to .* ([^ ]+) with cipher ([^ ]+)/
@tls.increment labels: {trust: $1, tls: $2, cipher: $3}
when /\A\w+: .* delay=([0-9.]+),.*status=([^ ]+)/
# postfix@-.service: postfix/smtp/smtp 4KZ0KY5Wx4z4Mn: to=<wegmann@psi.co.at>, relay=mail.psi.co.at[81.223.32.197]:25, delay=0.76, delays=0.2/0/0.35/0.21, dsn=2.0.0, status=sent (250 2.0.0 Ok: queued as 710F7A0263)
delay, status = $1.to_f, $2.downcase
@status.observe delay, labels: {status: status}
case status
when 'sent'
@sent.increment
when 'deferred'
@deferred.increment
when 'bounced'
@bounced.increment
when 'deliverable'
@deliverable.increment
when 'undeliverable'
@undeliverable.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} status: #{entry.message}"
@status_unknown.increment
end
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
@unknown.increment
end
end
end
class Smtpd
def initialize store, prometheus
@store = store
@noqueue = Noqueue.new store, Collector::PrefixProxy.new( prometheus, :noqueue)
@connect_from = prometheus.counter :connect_from_total, docstring: 'A counter of connections to smtpd', labels: %i[from_unknown]
%w[0 1].each {|x| @connect_from.increment by: 0, labels: {from_unknown: x} }
@tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections to smtpd with TLS-version and cipher', labels: %i[trust tls cipher]
Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} }
@disconnect_from = prometheus.counter :disconnect_from_total, docstring: 'A counter of "disconnect from" to smtpd'
@disconnect_from_events = prometheus.counter :disconnect_from_event_total, docstring: 'A counter of events while connection-lifetime (auth, starttls, mail, rcpt, data, commands, ...) from disconnections', labels: %i[event]
%w[auth mail rcpt data commands starttls ehlo quit].each {|e| @disconnect_from_events.increment by: 0, labels: {event: e} }
@concurrenty_limit_exceeded = prometheus.counter :concurrenty_limit_exceeded_total, docstring: 'A counter of concurrenty limit exceeded connections to smtpd'
@timeout = prometheus.counter :timeout_connection_total, docstring: 'A counter of timedout connections to smtpd', labels: %i[after]
@lost_connection = prometheus.counter :lost_connection_total, docstring: 'A counter of lost connections to smtpd', labels: %i[after]
@accepted = prometheus.counter :accepted_total, docstring: 'A counter of accepted messages to smtpd'
@unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by smtpd'
@sasl_auth_failed = prometheus.counter :sasl_auth_failed_total, docstring: 'A counter of failed SASL authentication by method', labels: %i[method]
@non_smtp_command = prometheus.counter :non_smtp_command_total, docstring: 'A counter of Non-SMTP-commands (ex. was a HTTP GET / HTTP/1.1)'
@tls_lib_problem = prometheus.counter :tls_lib_problem_total, docstring: 'A counter of TLS libreary problems (ex. unsupported protocol'
@ssl_error = prometheus.counter :ssl_error_total, docstring: 'A counter of any SSL_accept errors by error', labels: %i[error]
@warnings = prometheus.counter :warnings_total, docstring: 'A counter of any warnings'
%w[].each {|m| @sasl_auth_failed.increment by: 0, labels: {method: m} }
@hostname_not_resolved_to_address = prometheus.counter :hostname_not_resolved_to_address_total, docstring: 'A counter of hostnames, which cannot be resolved to there IP'
end
def collect entry
#STDERR.puts "smtpd: #{entry.message}"
case entry.message
when /\Aconnect from unknown/
@connect_from.increment labels: {from_unknown: 1}
when /\Aconnect from /
@connect_from.increment labels: {from_unknown: 0}
when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) /
@tls.increment labels: {trust: $1, tls: $2, cipher: $3}
when /\ANOQUEUE: /
@noqueue.collect entry
when /\Adisconnect from ([^ ]+) (.*)/
@disconnect_from.increment
# ehlo=2 starttls=1 auth=1 mail=1 rcpt=1 data=1 commands=8
$2.split( ' ').each do |x|
case x
when /(\w+)=(\d+)/
@disconnect_from_events.increment by: $2.to_f, labels: {event: $1}
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} disconnect from: [#{x}] #{entry.message}"
end
end
when /\Awarning: (.*)/
@warnings.increment
case $1
when /\AConnection concurrency limit exceeded: /
@concurrenty_limit_exceeded.increment
when /\Ahostname ([^ ]+) does not resolve to address /
@hostname_not_resolved_to_address.increment
when /\A[^ ]+\[([^ ]+)\]: SASL ([^ ]+) authentication failed/
@sasl_auth_failed.increment labels: {method: $1.downcase}
when /\Anon-SMTP command from /
@non_smtp_command.increment
when /\ATLS library problem /
@tls_lib_problem.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} wanring: #{entry.message}"
end
when /\ASSL_accept error from [^ ]+: (-?\d)/
@ssl_error.increment labels: {error: $1.to_i}
when /\Atimeout after ([^ ]+) from /
@timeout.increment labels: {after: $1}
when /\Alost connection after ([^ ]+) from /
@lost_connection.increment labels: {after: $1}
when /\A\w{8,15}: client=/ # sasl_method=
@accepted.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
@unknown.increment
end
end
end
class Tlsproxy
def initialize store, prometheus
@store = store
@connect = prometheus.counter :connect_total, docstring: 'A counter of COUNNECT from tlsproxy'
@disconnect = prometheus.counter :disconnect_total, docstring: 'A counter of DISCOUNNECT from tlsproxy'
@tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections to smtpd with TLS-version and cipher', labels: %i[trust tls cipher]
Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} }
end
def collect entry
case entry.message
when /\ADISCONNECT /
@disconnect.increment
when /\ACONNECT /
@connect.increment
when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) /
@tls.increment labels: {trust: $1, tls: $2, cipher: $3}
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
end
end
class Bounce
def initialize store, prometheus
@store = store
@non_delivery = prometheus.counter :sender_non_delivery_notification, docstring: 'A counter of notifications to sender because mail cannot deliveried'
end
def collect entry
case entry.message
when /\A([^ ]+): sender non-delivery notification: (.+)/
# postfix@-.service: postfix/bounce/bounce 4L3cN05mX6zfB: sender non-delivery notification: 4L3cN06Qpkz4Fc
@non_delivery.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
end
end
class SCache
def initialize store, prometheus
@store = store
@start = prometheus.gauge :start, docstring: 'Start timestamp of SCache statistics'
@domain_lookup_hits = prometheus.counter :domain_lookup_hits_total, docstring: 'Count of hits for domain lookups of SCache'
@domain_lookup_miss = prometheus.counter :domain_lookup_miss_total, docstring: 'Count of misses for domain lookups of SCache'
@domain_lookup_success_rate = prometheus.gauge :domain_lookup_success_rate, docstring: 'Success rate of domain lookups of SCache'
@max_simultaneaus_domains = prometheus.gauge :max_simultaneaus_domains, docstring: 'Max simultaneaus connections Domains of SCache'
@max_simultaneaus_addresses = prometheus.gauge :max_simultaneaus_addresses, docstring: 'Max simultaneaus connections Addresses of SCache'
@max_simultaneaus_connections = prometheus.gauge :max_simultaneaus_connections, docstring: 'Max simultaneaus connections of SCache'
end
def collect entry
case entry.message
when /\Astatistics: start interval (.*)/
# postfix@-.service: postfix/scache/scache statistics: start interval Apr 13 10:58:06
@start.set Time.parse( $1).to_f
when /\Astatistics: domain lookup hits=(\d+) miss=(\d+) success=(\d+)%/
# postfix@-.service: postfix/scache/scache statistics: domain lookup hits=2 miss=4 success=33%
@domain_lookup_hits.increment by: $1.to_f
@domain_lookup_miss.increment by: $2.to_f
@domain_lookup_success_rate.set $3.to_f/100
when /\Astatistics: max simultaneous domains=(\d+) addresses=(\d+) connection=(\d+)/
# postfix@-.service: postfix/scache/scache statistics: max simultaneous domains=1 addresses=1 connection=4
@max_simultaneaus_domains.set $1.to_f
@max_simultaneaus_addresses.set $2.to_f
@max_simultaneaus_connections.set $3.to_f
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
end
end
class Verify
def initialize store, prometheus
@store = store
@cache = Cache.new store, Collector::PrefixProxy.new( prometheus, :cache)
end
def collect entry
case entry.message
when /\Acache /
@cache.collect entry
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
end
end
def initialize store, prometheus
@store = store
@postscreen = Postscreen.new store, Collector::PrefixProxy.new( prometheus, :postscreen)
@smtp = Smtp.new store, Collector::PrefixProxy.new( prometheus, :smtp)
@smtpd = Smtpd.new store, Collector::PrefixProxy.new( prometheus, :smtpd)
@submission = Smtpd.new store, Collector::PrefixProxy.new( prometheus, :submission)
@tlsproxy = Tlsproxy.new store, Collector::PrefixProxy.new( prometheus, :tlsproxy)
@scache = SCache.new store, Collector::PrefixProxy.new( prometheus, :scache)
@bounce = Bounce.new store, Collector::PrefixProxy.new( prometheus, :bounce)
@qmgr = prometheus.counter :qmgr_total, docstring: 'A counter of qmgr actions'
@cleanup = prometheus.counter :cleanup_total, docstring: 'A counter of cleanup actions'
@lmtp = prometheus.counter :lmtp_total, docstring: 'A counter of ltmp actions'
@pickup = prometheus.counter :pickup_total, docstring: 'A counter of pickup actions'
@spf = prometheus.counter :spf_total, docstring: 'A counter of prepended SPF header-lines with state', labels: %i[status]
@spf_fail = prometheus.counter :spf_fail_total, docstring: 'A counter of policyd-SPF failed SPF'
%w[pass none].each {|s| @spf.increment by: 0, labels: {status: s} }
@dnsblog = prometheus.counter :dnsblog_total, docstring: 'A counter for DNS-Blacklisted IP by DNSBL', labels: %i[dnsbl]
@anvil_max_connection_rate_per_minute = prometheus.gauge :anvil_max_connection_per_minute_tate, docstring: "Rate of max connections per minute to listener", labels: %i{listener}
@anvil_max_connection_count = prometheus.gauge :anvil_max_connection_count, docstring: "Count of max connections to listener", labels: %i{listener}
@anvil_max_cache_size = prometheus.gauge :anvil_max_cache_size, docstring: "Current max cache size"
end
def collect entry
#STDERR.puts "postfix: #{entry.syslog_identifier}: #{entry.message}"
case entry.syslog_identifier
when 'postfix/tlsproxy/tlsproxy'
@tlsproxy.collect entry
when 'postfix/smtp/postscreen', 'postfix/smtp/postscreen'
@postscreen.collect entry
when 'postfix/smtp', 'postfix/smtp/smtp'
@smtp.collect entry
when 'postfix/smtpd', 'postfix/smtpd/smtpd'
@smtpd.collect entry
when 'postfix/submission/smtpd'
@submission.collect entry
when 'postfix/bounce', 'postfix/bounce/bounce'
@bounce.collect entry
when 'postifx/verify', 'postfix/verify/verify'
@verify.collect entry
when 'policyd-spf'
case entry.message
when /\Aprepend Received-SPF: (\w+) /
@spf.increment labels: {status: $1.downcase}
when /\A[^ ]+ [^ ]+ Message rejected due to: SPF fail /
# postfix@-.service policyd-spf: 550 5.7.23 Message rejected due to: SPF fail - not authorized.
@spf_fail.increment
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
when 'postfix/cleanup', 'postfix/cleanup/cleanup'
@cleanup.increment
when 'postfix/qmgr', 'postfix/qmgr/qmgr'
@qmgr.increment
when 'postfix/lmtp', 'postfix/lmtp/lmtp'
@lmtp.increment
when 'postfix/pickup', 'postfix/pickup/pickup'
@pickup.increment
when 'postfix/dnsblog', 'postfix/dnsblog/dnsblog'
case entry.message
when /\Aaddr [^ ]+ listed by domain ([^ ]+) as /
@dnsblog.increment labels: {dnsbl: $1.downcase}
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
when 'postfix/anvil', 'postfix/anvil/anvil'
case entry.message
when /\Astatistics: max connection rate (\d+)\/60s for \((.+)\) at (.*)/
@anvil_max_connection_rate_per_minute.set $1.to_i, labels: {listener: $2}
when /\Astatistics: max connection count (\d+) for \((.+)\) at (.*)/
@anvil_max_connection_count.set $1.to_i, labels: {listener: $2}
when /\Astatistics: max cache size (\d+) at (.*)/
@anvil_max_cache_size.set $1.to_i
else
STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}"
end
when 'postfix/scache', 'postfix/scache/scache'
@scache.collect entry
else
STDERR.puts "# #{entry._systemd_unit}: #{entry.syslog_identifier} #{entry.message}"
end
end
end

View file

@ -6,235 +6,8 @@ require 'prometheus/client'
require 'prometheus/client/formats/text' require 'prometheus/client/formats/text'
require 'ostruct' require 'ostruct'
class Collector require_relative 'lib/collector'
attr_reader :journal, :prometheus
class PrefixProxy
attr_reader :prometheus, :prefix
def initialize prometheus, prefix
@prometheus, @prefix = prometheus, prefix
end
def counter name, docstring, **options
@prometheus.counter :"#{prefix}_#{name}", docstring: docstring, **options
end
end
class Dovecot
attr_reader :logged_in, :logged_out, :connection_closed
def initialize prometheus
@logged_in = prometheus.counter :logged_in, 'A counter of successfull logins to dovecot'
@logged_out = prometheus.counter :logged_out, 'A counter of logouts on dovecot'
@connection_closed = prometheus.counter :connection_closed, 'A counter of closed connection on dovecot'
end
def collect entry
case entry.message
when /\Aimap-login: Login: user=/
@logged_in.increment
when /\Aimap\([^)]+\): Logged out /
@logged_out.increment
when /\Aimap\([^)]+\): Connection closed /
@connection_closed.increment
end
end
end
class Postscreen
attr_reader :connect_from, :whitelisted, :pass_old, :dnsbl, :noqueue, :hangup, :disconnect, :unknown
def initialize prometheus
@connect_from = prometheus.counter :connect_from, 'A counter of connections to postscreen'
@whitelisted = prometheus.counter :whitelisted, 'A counter of WHITELISTED connections to postscreen'
@pass_old = prometheus.counter :pass_old, 'A counter of PASS OLD connections to postscreen'
@dnsbl = prometheus.counter :dnsbl, 'A counter of DNSBL-blocked to postscreen'
@noqueue = prometheus.counter :noqueue, 'A counter of NOQUEUE to postscreen', reason: "unknown"
@hangup = prometheus.counter :hangup, 'A counter of HANGUP to postscreen'
@disconnect = prometheus.counter :disconnect, 'A counter of DISCONNECT to postscreen'
@unknown = prometheus.counter :unknown, 'A counter of unknown loglines by postscreen'
end
def collect entry
case entry.message
when /\ACONNECT from /
@connect_from.increment
when /\AWHITELISTED /
@whitelisted.increment
when /\APASS OLD /
@pass_old.increment
when /\ADISCONNECT /
@disconnect.increment
when /\ANOQUEUE: /
case entry.message
when / blocked using /
@noqueue.increment reason: 'dnsbl'
end
when /\AHANGUP /
@hangup.increment
when /\ADNSBL rank /
@dnsbl.increment
else
@unknown.increment
end
end
end
class Smtp
def initialize prometheus
@connection_refused = prometheus.counter :connection_refused, 'A counter of connection refused on smtp'
@connection_timed_out = prometheus.counter :connection_timed_out, 'A counter of timed out connections on smtp'
@tls = prometheus.counter :tls, 'A counter of TLS connections on smtp with TLS-version and cipher', labels: %w[trust tls cipher]
@status = prometheus.counter :status, 'A counter of message status by status', labels: %w[status]
@sent = prometheus.counter :sent, 'A counter of sent messages by smtp'
@deferred = prometheus.counter :deferred, 'A counter of deferred messages by smtp'
@bounced = prometheus.counter :bounced, 'A counter of bounced messages by smtp'
@deliverable = prometheus.counter :deliverable, 'A counter of deliverable messages by smtp'
@undeliverable = prometheus.counter :undeliverable, 'A counter of undeliverable messages by smtp'
@status_unknown = prometheus.counter :status_unknown, 'A counter of unknown status by smtp'
@unknown = prometheus.counter :unknown, 'A counter of unknown loglines by smtp'
end
def collect entry
case entry.message
when /\Aconnect to /
case entry.message
when / Connection refused\z/
@connection_refused.increment
when / Connection timed out\z/
@connection_timed_out.increment
end
when /\A([^ ]+) TLS connection established to .* ([^ ]+) with cipher ([^ ]+)/
@tls.increment trust: $1, tls: $2, cipher: $3
when /\A\w{8,15}: .*status=([^ ]+)/
status = $1
@status.increment status: status
case status
when 'sent'
@sent.increment
when 'deferred'
@deferred.increment
when 'bounced'
@bounced.increment
when 'deliverable'
@deliverable.increment
when 'undeliverable'
@undeliverable.increment
else
@status_unknown.increment
end
else
@unknown.increment
end
end
end
class Smtpd
def initialize prometheus
@connect_from = prometheus.counter :connect_from, 'A counter of connections to smtpd'
@tls = prometheus.counter :tls, 'A counter of TLS connections to smtpd with TLS-version and cipher'
@disconnect_from = prometheus.counter :disconnect_from, 'A counter of disconnections to smtpd'
@noqueue = prometheus.counter :noqueue, 'A counter of NOQUEUE by smtpd', reason: "uknown"
@concurrenty_limit_exceeded = prometheus.counter :concurrenty_limit_exceeded, 'A counter of concurrenty limit exceeded connections to smtpd'
@timeout = prometheus.counter :timeout_connection, 'A counter of timedout connections to smtpd'
@lost_connection = prometheus.counter :lost_connection, 'A counter of lost connections to smtpd'
@accepted = prometheus.counter :accepted, 'A counter of accepted messages to smtpd'
@unknown = prometheus.counter :unknown, 'A counter of unknown loglines by smtpd'
end
def collect entry
case entry.message
when /\Aconnect from unknown/
@connect_from.increment unknown: 1
when /\Aconnect from /
@connect_from.increment unknown: 0
when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) /
@tls.increment trust: $1, tls: $2, cipher: $3
when /\ANOQUEUE: /
case entry.message
when / Client host rejected: cannot find your reverse hostname /
@noqueue.increment reason: 'no reverse hostname'
when / User doesn't exist: /
@noqueue.increment reason: 'user does not exist'
else
@noqueue.increment reason: 'any'
end
when /\Adisconnect from / # ehlo=2 starttls=1 auth=1 mail=1 rcpt=1 data=1 commands=8
@disconnect_from.increment
when /\Awarning: Connection concurrency limit exceeded: /
@concurrenty_limit_exceeded.increment
when /\Atimeout after ([^ ]+) from /
@timeout.increment after: $1
when /\Alost connection after ([^ ]+) from /
@lost_connection.increment after: $1
when /\A\w{8,15}: client=/ # sasl_method=
@accepted.increment
else
@unknown.increment
end
end
end
class Postfix
def initialize prometheus
@postscreen = Postscreen.new PrefixProxy.new( prometheus, :postscreen)
@smtp = Smtp.new PrefixProxy.new( prometheus, :smtp)
@smtpd = Smtpd.new PrefixProxy.new( prometheus, :smtpd)
@submission = Smtpd.new PrefixProxy.new( prometheus, :submission)
@qmgr = prometheus.counter :qmgr, 'A counter of qmgr actions'
@cleanup = prometheus.counter :cleanup, 'A counter of cleanup actions'
end
def collect entry
case entry.syslog_identifier
when 'postfix/postscreen'
@postscreen.collect entry
when 'postfix/smtp'
@smtp.collect entry
when 'postfix/smtpd'
@smtpd.collect entry
when 'postfix/submission/smtpd'
@submission.collect entry
when 'postfix/cleanup'
@metrics.cleanup.increment
when 'postfix/qmgr'
@metrics.qmgr.increment
end
end
end
def initialize prometheus: nil, journal: nil
@journal = journal || Systemd::Journal.new( flags: Systemd::Journal::Flags::SYSTEM_ONLY)
@prometheus = prometheus || Prometheus::Client.registry
@dovecot = Dovecot.new PrefixProxy.new( @prometheus, :dovecot)
@postfix = Postfix.new PrefixProxy.new( @prometheus, :postfix)
end
def start
Thread.abort_on_exception = true
Thread.new do
begin
run
rescue Object
STDERR.puts "#$! (#{$!.class})", $!.backtrace.map {|x| " in #{x}"}
raise
end
end
end
def run
@journal.seek :tail
@journal.move_previous
@journal.watch do |entry|
case entry._systemd_unit
when 'dovecot.service'
@dovecot.collect entry
when 'postfix@-.service'
@postfix.collect entry
end
end
end
end
if __FILE__ == $0 if __FILE__ == $0
run Collector.start
end end