diff --git a/.gitignore b/.gitignore index 27eb3bb..155bade 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.sw[opqnm] *~ +/.bundle +/sbin diff --git a/Gemfile b/Gemfile index ee1676d..7bf3b65 100644 --- a/Gemfile +++ b/Gemfile @@ -4,3 +4,4 @@ gem 'systemd-journal' #, '~> 1.3.0' gem 'prometheus-client' gem 'rack' gem 'puma' +gem 'sd_notify' diff --git a/Gemfile.lock b/Gemfile.lock index f3d12db..52a3e01 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -7,6 +7,7 @@ GEM puma (5.6.4) nio4r (~> 2.0) rack (2.2.3) + sd_notify (0.1.1) systemd-journal (1.4.2) ffi (~> 1.9) @@ -17,6 +18,7 @@ DEPENDENCIES prometheus-client puma rack + sd_notify systemd-journal BUNDLED WITH diff --git a/config.ru b/config.ru index 6754d37..d8b5a96 100644 --- a/config.ru +++ b/config.ru @@ -2,8 +2,20 @@ require 'rack' require './postfix_exporter' require 'socket' -collector = Collector.new -th = collector.start +class Settings + def initialize environment + @environment = environment.to_s.to_sym + end + attr_reader :environment + alias :env :environment + def development?() :development == environment end + def production?() :production == environment end + def test?() :test == environment end +end + +settings = Settings.new ENV['RACK_ENV'] +collector = Collector.new settings: settings +collector.start showqpath = '/var/spool/postfix/public/showq' prometheus = collector.prometheus metrics = OpenStruct.new( @@ -12,7 +24,7 @@ metrics = OpenStruct.new( def determine_domain str case str - when /@([^.]+\.[^.]+)\z/ + when /@([^.]+\.[^.]+)\z/ $1 when /\.([^.]+\.[^.]+)\z/ "any.#$1" diff --git a/lib/collector.rb b/lib/collector.rb new file mode 100644 index 0000000..9c44c60 --- /dev/null +++ b/lib/collector.rb @@ -0,0 +1,76 @@ +class Collector + attr_reader :journal, :prometheus, :settings + + class PrefixProxy + attr_reader :prometheus, :prefix + def initialize prometheus, prefix + @prometheus, @prefix = prometheus, prefix + end + + def counter name, **options + @prometheus.counter :"#{prefix}_#{name}", **options + end + + def gauge name, **options + @prometheus.gauge :"#{prefix}_#{name}", **options + end + + def histogram name, **options + @prometheus.histogram :"#{prefix}_#{name}", **options + end + + def summary name, **options + @prometheus.summary :"#{prefix}_#{name}", **options + end + end + + def self.start prometheus: nil, journal: nil + self.new( prometheus: prometheus, journal: journal).start + end + + def initialize prometheus: nil, journal: nil, settings: nil + @settings = settings + @journal = journal || Systemd::Journal.new( flags: Systemd::Journal::Flags::SYSTEM_ONLY) + @prometheus = prometheus || Prometheus::Client.registry + @store = {} + + @errors = @prometheus.counter :postfix_exporter_errors_total, docstring: 'Count internal errors/exceptions' + @dovecot = Dovecot.new @store, PrefixProxy.new( @prometheus, :dovecot) + @postfix = Postfix.new @store, PrefixProxy.new( @prometheus, :postfix) + end + + def run + @journal.seek :tail + @journal.move_previous + @journal.watch do |entry| + case entry._systemd_unit + when 'dovecot.service' + @dovecot.collect entry + when 'postfix@-.service' + @postfix.collect entry + else + if @settings.nil? or @settings.development? + STDERR.puts "# unit: #{entry._systemd_unit}: #{entry.syslog_identifier} #{entry.message}" + end + end + end + end + + def start + Thread.abort_on_exception = true + Thread.new do + begin + run + rescue SystemExit, Interrupt + raise + rescue Object + @errors.increment + STDERR.puts "#$! (#{$!.class})", $!.backtrace.map {|x| " in #{x}"} + retry + end + end + end +end + +require_relative 'collector/postfix' +require_relative 'collector/dovecot' diff --git a/lib/collector/dovecot.rb b/lib/collector/dovecot.rb new file mode 100644 index 0000000..74c58eb --- /dev/null +++ b/lib/collector/dovecot.rb @@ -0,0 +1,171 @@ +class Collector::Dovecot + class Sieve + def initialize store, prometheus + @store = store + @stored_into_mailbox = prometheus.counter :stored_into_mailbox_total, docstring: 'A counter of mails stored in mailbox by sieve', labels: %i[process] + @forwards = prometheus.counter :forwared_mails_total, docstring: 'A counter of mails forwareded to other address', labels: %i[process] + @discarded_duplicate_forward = prometheus.counter :discarded_duplicate_forward_total, docstring: 'A counter of discarded duplicates, which will not be forwarded.', labels: %i[process] + %w[lmtp deliver].each do |p| + @stored_into_mailbox.increment by: 0, labels: {process: p} + @forwards.increment by: 0, labels: {process: p} + @discarded_duplicate_forward.increment by: 0, labels: {process: p} + end + end + + def collect entry, process, msg + case msg + when / stored mail into mailbox / + # dovecot.service dovecot lmtp sieve: lmtp(dillo@nfotex.com)<935639>: sieve: msgid=<1649842684455734173.18148473471766045120@vlmpaymp001.at.inside>: stored mail into mailbox + @stored_into_mailbox.increment labels: {process: process} + when / forwarded to / + @forwards.increment labels: {process: process} + when / discarded duplicate forward to / + @discarded_duplicate_forward.increment labels: {process: process} + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} sieve| #{entry.message}" + end + end + end + + class Delivery + def initialize store, prometheus, sieve, saved_mailbox, process + @store, @sieve, @process = store, sieve, process + @connect = prometheus.counter "#{process}_connect_total", docstring: "A counter of connection via #{process}" + @disconnect = prometheus.counter "#{process}_disconnect_total", docstring: "A counter of disconnect at #{process}" + @saved_mail_to_mailbox = saved_mailbox + end + + def collect entry, msg + case msg + when /\AConnect from / + @connect.increment + when /\ADisconnect from / + @disconnect.increment + when /saved mail to / + @saved_mail_to_mailbox.increment labels: {process: @process} + when /\Asieve: (.*)/ + @sieve.collect entry, @process, $1 + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} delivery| #{entry.message}" + end + end + end + + class Imap + def initialize store, prometheus + @connection_closed = prometheus.counter :connection_closed_total, docstring: 'A counter of closed connection on dovecot' + @inactivity = prometheus.counter :disconnected_inactivity_total, docstring: 'A counter for disconnect for inactivity.' + @connection_stats = prometheus.counter :connection_stats_total, docstring: 'A counter for observed statistics after disconnected.', labels: %i[disconnect_reason] + @logged_out = prometheus.counter :logged_out_total, docstring: 'A counter of logouts on dovecot' + @maildir_scanning_took_long = prometheus.summary :maildir_scanning_took_long_total, docstring: 'A summary of long taken maildir scanning by reason (why).', labels: %i[why] + @maildir_scanning_took_long_rename = prometheus.counter :maildir_scanning_took_long_renames_total, docstring: 'A counter of rename()-calls while long taken mail dir scanning' + @maildir_scanning_took_long_readdir = prometheus.counter :maildir_scanning_took_long_readdirs_total, docstring: 'A counter of readdir()-calls while long taken mail dir scanning' + disconnect_reasons = ['logged out', 'connection closed', 'inactivity'] + disconnect_reasons.each {|r| @connection_stats.increment by: 0, labels: {disconnect_reason: r} } + @connection_stats = + Hash[ *%i[in out deleted expunged trashed hdr_count hdr_bytes body_count body_bytes].flat_map {|t| + [t, prometheus.counter( :"connection_stats_#{t}_total", docstring: "Counter for #{t} statistics observed after disconnected")] + }] + end + + def collect entry, msg + case msg + when /\ALogged out (.*)/ + # imap(srv_rt0@nfotex.com)<936759>: Logged out in=38 out=804 deleted=0 expunged=0 trashed=0 hdr_count=0 hdr_bytes=0 body_count=0 body_bytes=0 + @logged_out.increment + $1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f } + when /\AConnection closed \([^)]+\) (.*)/ + # imap(johannes@nfotex.com)<936668>: Connection closed (EXAMINE finished 0.041 secs ago) in=5253 out=390971 deleted=0 expunged=0 trashed=0 hdr_count=14 hdr_bytes=6569 body_count=14 body_bytes=336589 + @connection_closed.increment + $1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f } + when /\AConnection closed: .* failed: \([^)]+\) (.*)/ + # imap(wiz@nfotex.com)<1447340><0OOGu+XeasMqAoOIC8CCAHA59y+2iMag>: Connection closed: read(size=6100) failed: Connection reset by peer (UID FETCH finished 0.159 secs ago) in=2982 out=17044659 deleted=0 expunged=0 trashed=0 hdr_count=1 hdr_bytes=3724 body_count=169 body_bytes=16970415 + @connection_closed.increment + $1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f } + when /\ADisconnected for inactivity (.*)/ + @inactivity.increment + $1.split( ' ').each {|x| t, v = x.split('='); @connection_stats[t.to_sym]&.increment by: v.to_f } + when /\AWarning: Maildir: Scanning .+? took (?\d+) seconds \((?\d+) readdir\(\)s, (?\d+) rename\(\)s to cur\/, why=0x(?[0-9a-fA-F]+)\)/ + # Warning: Maildir: Scanning /var/mail/nfotex.com/wiz/mails/.Updates/cur took 49 seconds (86044 readdir()s, 0 rename()s to cur/, why=0x80) + m = $~ + @maildir_scanning_took_long.observe m[:took].to_i, labels: m[:why].to_i(16) + @maildir_scanning_took_long_rename.increment by: m[:rename].to_i + @maildir_scanning_took_long_readdir.increment by: m[:readdir].to_i + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} imap| #{entry.message}" + end + end + end + + class ImapLogin + def initialize store, prometheus + @store = store + @logged_in = prometheus.counter :logged_in_total, docstring: 'A counter of successfull logins to dovecot' + @aborted = prometheus.counter :login_aborted_total, docstring: 'A counter of aborted logins' + @disconnected = prometheus.counter :login_disconnected_total, docstring: 'A counter of disconnections before successfully logged in', labels: %i[reason] + end + + def collect entry, msg + case msg + when /\ALogin: user=/ + @logged_in.increment + when /\ADisconnected \((.*?)\): user='} + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} Disconnected before login| #{entry.message}" + end + when /\AAborted login/ + @aborted.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} imap-login| #{entry.message}" + end + end + end + + def initialize store, prometheus + @store = store + @sieve = Sieve.new store, Collector::PrefixProxy.new( prometheus, :sieve) + @saved_mail_to_mailbox = prometheus.counter :saved_mail_to_mailbox_total, docstring: "A counter of saved mails to mailbox directly", labels: %i[process] + @lmtp = Delivery.new store, prometheus, @sieve, @saved_mail_to_mailbox, :lmtp + @deliver = Delivery.new store, prometheus, @sieve, @saved_mail_to_mailbox, :deliver + @imap_login = ImapLogin.new store, prometheus + @imap = Imap.new store, prometheus + end + + def collect entry + # STDERR.puts "dovecot| #{entry.message}" + case entry.message + when /\Aimap-login: (.*)/ + @imap_login.collect entry, $1 + when /\Aauth: Error: (.*)/ + case $1 + when /\ALDAP: Connection lost to LDAP server, / + @auth_ldap_connection_lost.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}| #{entry.message}" + @auth_errors.increment + end + when /\Aimap\([^)]+\)(?:<[^ ]+>)?: (.*)/ + @imap.collect entry, $1 + when /\Almtp\([^ ]+\)<[^ ]+>: (.*)/ + @lmtp.collect entry, $1 + when /\Almtp\([^ ]+\): (.*)/ + @lmtp.collect entry, $1 + when /\Adeliver(?:[^:]+): (.*)/ + @deliver.collect entry, $1 + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}| #{entry.message}" + end + end +end diff --git a/lib/collector/postfix.rb b/lib/collector/postfix.rb new file mode 100644 index 0000000..5ef2f3d --- /dev/null +++ b/lib/collector/postfix.rb @@ -0,0 +1,450 @@ +class Collector::Postfix + class Noqueue + def initialize store, prometheus + @store = store + @noqueue = prometheus.counter :total, docstring: 'Total noqueued by reasons.', labels: %i[reason] + @codes = prometheus.counter :status_code_total, docstring: 'Total noqueued by status code', labels: %i[status_code enhanced_status_code] + ['dnsbl', 'no reverse hostname', 'user does not exist', ''].each {|r| @noqueue.increment by: 0, labels: {reason: r} } + end + + def collect entry + case entry.message + when /\ANOQUEUE: reject: RCPT from (?:[^ ]+): (\d+) (\d+\.\d+\.\d+) (?:[^ ]+): (.*?),/ + code, enh = $1, $2 + @codes.increment labels: {status_code: code, enhanced_status_code: enh} + end + case entry.message + when /\ANOQUEUE: reject: RCPT from .* blocked using / + @noqueue.increment labels: {reason: 'dnsbl'} + when /\ANOQUEUE: reject: RCPT from .* Message rejected due to: SPF fail - not authorized\. / + @noqueue.increment labels: {reason: 'spf fail'} + when / Client host rejected: cannot find your reverse hostname/ + @noqueue.increment labels: {reason: 'no reverse hostname'} + when / User doesn't exist: / + @noqueue.increment labels: {reason: 'user does not exist'} + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} NOQUEUE: #{entry.message}" + @noqueue.increment labels: {reason: ''} + end + end + end + + class Cache + def initialize store, prometheus + @store = store + @full_cleanup = prometheus.counter :full_cleanup_total, docstring: 'A counter of total cache cleanups', labels: %i[file] + @full_cleanup_retained = prometheus.gauge :full_cleanup_retained_entries, docstring: 'Retained entries of last cache cleanups', labels: %i[file] + @full_cleanup_dropped = prometheus.counter :full_cleanup_dropped_entries_total, docstring: 'Total dropped entries of cache cleanups', labels: %i[file] + end + + def collect entry + case entry.message + when /\Acache (.*) full cleanup: retained=(\d+) dropped=(\d+) entries/ + # postscreen: cache lmdb:/var/lib/postfix/postscreen_cache full cleanup: retained=128 dropped=14 entries + file, retained, dropped = $1, $2.to_f, $3.to_f + @full_cleanup.increment labels: {file: file} + @full_cleanup_retained.increment by: retained, labels: {file: file} + @full_cleanup_dropped.increment by: dropped, labels: {file: file} + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} cache: #{entry.message}" + end + end + end + + class Postscreen + def initialize store, prometheus + @store = store + @noqueue = Noqueue.new store, Collector::PrefixProxy.new( prometheus, :noqueue) + @cache = Cache.new store, Collector::PrefixProxy.new( prometheus, :cache) + @connect_from = prometheus.counter :connect_from_total, docstring: 'A counter of connections to postscreen' + @whitelisted = prometheus.counter :whitelisted_total, docstring: 'A counter of WHITELISTED connections to postscreen' + @pass_old = prometheus.counter :pass_old_total, docstring: 'A counter of PASS OLD connections to postscreen' + @pass_new = prometheus.counter :pass_new_total, docstring: 'A counter of PASS NEW connections to postscreen' + @dnsbl = prometheus.counter :dnsbl_total, docstring: 'A counter of DNSBL-blocked to postscreen' + @bare_newline = prometheus.counter :bare_newline_total, docstring: 'A counter of BARE NEWLINE-blocked to postscreen' + @command_pipelining = prometheus.counter :command_pipelining_total, docstring: 'A counter of COMMAND PIPELINING-blocked to postscreen' + @command_time_limit = prometheus.counter :command_time_limit_total, docstring: 'A counter of COMMAND TIME LIMIT-blocked to postscreen' + @hangup = prometheus.counter :hangup_total, docstring: 'A counter of HANGUP to postscreen' + @bdat = prometheus.counter :bdat_total, docstring: 'A counter of BDAT to postscreen' + @pregreet = prometheus.counter :pregreet_total, docstring: 'A counter of PREGREET to postscreen' + @disconnect = prometheus.counter :disconnect_total, docstring: 'A counter of DISCONNECT to postscreen' + @unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by postscreen' + @psc_cache_update_delay = prometheus.summary :psc_cache_update_delay_total, docstring: 'A counter of PSC cache update delays by file', labels: %i[file] + @curr_unavailable = prometheus.counter :service_currently_unavailable_total, docstring: 'A counter for rejected mails, because service currently unavailable - so greylisted.' + @dnsblog_reply_timeout = prometheus.counter :dnsblog_reply_timeout_total, docstring: 'Total timedout requests for dnsblog' + @data_without_valid_rcpt= prometheus.counter :data_without_valid_rcpt_total, docstring: 'A counter of DATA without valid RCPT events' + @warnings = prometheus.counter :warnings_total, docstring: 'A counter of any warnings' + end + + def collect entry + #STDERR.puts "postscreen: #{entry.message}" + case entry.message + when /\ACONNECT from / + @connect_from.increment + when /\AWHITELISTED / + @whitelisted.increment + when /\APASS OLD / + @pass_old.increment + when /\APASS NEW / + @pass_new.increment + when /\ADISCONNECT / + @disconnect.increment + when /\APREGREET / + @pregreet.increment + when /\ABDAT / + @bdat.increment + when /\ANOQUEUE: (.*)/ + case msg = $1 + when /\Areject: RCPT from [^ ]+: 450 4.3.2 Service currently unavailable; / + @curr_unavailable.increment + else + @noqueue.collect entry + end + when /\AHANGUP / + @hangup.increment + when /\ADNSBL rank / + @dnsbl.increment + when /\ABARE NEWLINE / + @bare_newline.increment + when /\ACOMMAND PIPELINING / + @command_pipelining.increment + when /\ACOMMAND TIME LIMIT / + @command_time_limit.increment + when /\ADATA without valid RCPT / + @data_without_valid_rcpt.increment + when /\Awarning: (.*)/ + @warnings.increment + case $1 + when /\Apsc_cache_update: ([^ ]+) update average delay is ([^ ]+)/ + @psc_cache_update_delay.observe $2.to_f, labels: {file: $1} + when /\Adnsblog reply timeout / + @dnsblog_reply_timeout.increment + when /\Agetpeername: Transport endpoint is not connected -- dropping this connection/ + else + STDERR.puts "# postscreen warnings: #{entry.message}" + end + when /\Acache / + @cache.collect entry + else + STDERR.puts "# postscreen: #{entry.message}" + @unknown.increment + end + end + end + + def self.tls_posibilities + %w[Trusted Untrusted Anonymous].each do |trust| + %w[TLSv1.2 TLSv1.3].each do |tls| + %w[TLS_AES_128_GCM_SHA256 TLS_AES_256_GCM_SHA384 ECDHE-RSA-AES128-GCM-SHA256 ECDHE-RSA-AES256-GCM-SHA384].each do |cipher| + yield trust, tls, cipher + end + end + end + end + + class Smtp + def initialize store, prometheus + @store = store + @connection_refused = prometheus.counter :connection_refused_total, docstring: 'A counter of refused connections on smtp' + @connection_timed_out = prometheus.counter :connection_timed_out_total, docstring: 'A counter of timed out connections on smtp' + @tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections on smtp with TLS-version and cipher', labels: %i[trust tls cipher] + Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} } + @status = prometheus.histogram :status, docstring: 'A histogram of message status by status', labels: %i[status] + @sent = prometheus.counter :sent_total, docstring: 'A counter of sent messages by smtp' + @deferred = prometheus.counter :deferred_total, docstring: 'A counter of deferred messages by smtp' + @bounced = prometheus.counter :bounced_total, docstring: 'A counter of bounced messages by smtp' + @deliverable = prometheus.counter :deliverable_total, docstring: 'A counter of deliverable messages by smtp' + @undeliverable = prometheus.counter :undeliverable_total, docstring: 'A counter of undeliverable messages by smtp' + @status_unknown = prometheus.counter :status_unknown_total, docstring: 'A counter of unknown status by smtp' + @unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by smtp' + end + + def collect entry + #STDERR.puts "smtp: #{entry.message}" + case entry.message + when /\Aconnect to / + case entry.message + when / Connection refused\z/ + @connection_refused.increment + when / Connection timed out\z/ + @connection_timed_out.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} connect to: #{entry.message}" + end + when /\A([^ ]+) TLS connection established to .* ([^ ]+) with cipher ([^ ]+)/ + @tls.increment labels: {trust: $1, tls: $2, cipher: $3} + when /\A\w+: .* delay=([0-9.]+),.*status=([^ ]+)/ + # postfix@-.service: postfix/smtp/smtp 4KZ0KY5Wx4z4Mn: to=, relay=mail.psi.co.at[81.223.32.197]:25, delay=0.76, delays=0.2/0/0.35/0.21, dsn=2.0.0, status=sent (250 2.0.0 Ok: queued as 710F7A0263) + delay, status = $1.to_f, $2.downcase + @status.observe delay, labels: {status: status} + case status + when 'sent' + @sent.increment + when 'deferred' + @deferred.increment + when 'bounced' + @bounced.increment + when 'deliverable' + @deliverable.increment + when 'undeliverable' + @undeliverable.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} status: #{entry.message}" + @status_unknown.increment + end + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + @unknown.increment + end + end + end + + class Smtpd + def initialize store, prometheus + @store = store + @noqueue = Noqueue.new store, Collector::PrefixProxy.new( prometheus, :noqueue) + @connect_from = prometheus.counter :connect_from_total, docstring: 'A counter of connections to smtpd', labels: %i[from_unknown] + %w[0 1].each {|x| @connect_from.increment by: 0, labels: {from_unknown: x} } + @tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections to smtpd with TLS-version and cipher', labels: %i[trust tls cipher] + Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} } + @disconnect_from = prometheus.counter :disconnect_from_total, docstring: 'A counter of "disconnect from" to smtpd' + @disconnect_from_events = prometheus.counter :disconnect_from_event_total, docstring: 'A counter of events while connection-lifetime (auth, starttls, mail, rcpt, data, commands, ...) from disconnections', labels: %i[event] + %w[auth mail rcpt data commands starttls ehlo quit].each {|e| @disconnect_from_events.increment by: 0, labels: {event: e} } + @concurrenty_limit_exceeded = prometheus.counter :concurrenty_limit_exceeded_total, docstring: 'A counter of concurrenty limit exceeded connections to smtpd' + @timeout = prometheus.counter :timeout_connection_total, docstring: 'A counter of timedout connections to smtpd', labels: %i[after] + @lost_connection = prometheus.counter :lost_connection_total, docstring: 'A counter of lost connections to smtpd', labels: %i[after] + @accepted = prometheus.counter :accepted_total, docstring: 'A counter of accepted messages to smtpd' + @unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by smtpd' + @sasl_auth_failed = prometheus.counter :sasl_auth_failed_total, docstring: 'A counter of failed SASL authentication by method', labels: %i[method] + @non_smtp_command = prometheus.counter :non_smtp_command_total, docstring: 'A counter of Non-SMTP-commands (ex. was a HTTP GET / HTTP/1.1)' + @tls_lib_problem = prometheus.counter :tls_lib_problem_total, docstring: 'A counter of TLS libreary problems (ex. unsupported protocol' + @ssl_error = prometheus.counter :ssl_error_total, docstring: 'A counter of any SSL_accept errors by error', labels: %i[error] + @warnings = prometheus.counter :warnings_total, docstring: 'A counter of any warnings' + %w[].each {|m| @sasl_auth_failed.increment by: 0, labels: {method: m} } + @hostname_not_resolved_to_address = prometheus.counter :hostname_not_resolved_to_address_total, docstring: 'A counter of hostnames, which cannot be resolved to there IP' + end + + def collect entry + #STDERR.puts "smtpd: #{entry.message}" + case entry.message + when /\Aconnect from unknown/ + @connect_from.increment labels: {from_unknown: 1} + when /\Aconnect from / + @connect_from.increment labels: {from_unknown: 0} + when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) / + @tls.increment labels: {trust: $1, tls: $2, cipher: $3} + when /\ANOQUEUE: / + @noqueue.collect entry + when /\Adisconnect from ([^ ]+) (.*)/ + @disconnect_from.increment + # ehlo=2 starttls=1 auth=1 mail=1 rcpt=1 data=1 commands=8 + $2.split( ' ').each do |x| + case x + when /(\w+)=(\d+)/ + @disconnect_from_events.increment by: $2.to_f, labels: {event: $1} + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} disconnect from: [#{x}] #{entry.message}" + end + end + when /\Awarning: (.*)/ + @warnings.increment + case $1 + when /\AConnection concurrency limit exceeded: / + @concurrenty_limit_exceeded.increment + when /\Ahostname ([^ ]+) does not resolve to address / + @hostname_not_resolved_to_address.increment + when /\A[^ ]+\[([^ ]+)\]: SASL ([^ ]+) authentication failed/ + @sasl_auth_failed.increment labels: {method: $1.downcase} + when /\Anon-SMTP command from / + @non_smtp_command.increment + when /\ATLS library problem / + @tls_lib_problem.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} wanring: #{entry.message}" + end + when /\ASSL_accept error from [^ ]+: (-?\d)/ + @ssl_error.increment labels: {error: $1.to_i} + when /\Atimeout after ([^ ]+) from / + @timeout.increment labels: {after: $1} + when /\Alost connection after ([^ ]+) from / + @lost_connection.increment labels: {after: $1} + when /\A\w{8,15}: client=/ # sasl_method= + @accepted.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + @unknown.increment + end + end + end + + class Tlsproxy + def initialize store, prometheus + @store = store + @connect = prometheus.counter :connect_total, docstring: 'A counter of COUNNECT from tlsproxy' + @disconnect = prometheus.counter :disconnect_total, docstring: 'A counter of DISCOUNNECT from tlsproxy' + @tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections to smtpd with TLS-version and cipher', labels: %i[trust tls cipher] + Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} } + end + + def collect entry + case entry.message + when /\ADISCONNECT / + @disconnect.increment + when /\ACONNECT / + @connect.increment + when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) / + @tls.increment labels: {trust: $1, tls: $2, cipher: $3} + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + end + end + + class Bounce + def initialize store, prometheus + @store = store + @non_delivery = prometheus.counter :sender_non_delivery_notification, docstring: 'A counter of notifications to sender because mail cannot deliveried' + end + + def collect entry + case entry.message + when /\A([^ ]+): sender non-delivery notification: (.+)/ + # postfix@-.service: postfix/bounce/bounce 4L3cN05mX6zfB: sender non-delivery notification: 4L3cN06Qpkz4Fc + @non_delivery.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + end + end + + class SCache + def initialize store, prometheus + @store = store + @start = prometheus.gauge :start, docstring: 'Start timestamp of SCache statistics' + @domain_lookup_hits = prometheus.counter :domain_lookup_hits_total, docstring: 'Count of hits for domain lookups of SCache' + @domain_lookup_miss = prometheus.counter :domain_lookup_miss_total, docstring: 'Count of misses for domain lookups of SCache' + @domain_lookup_success_rate = prometheus.gauge :domain_lookup_success_rate, docstring: 'Success rate of domain lookups of SCache' + @max_simultaneaus_domains = prometheus.gauge :max_simultaneaus_domains, docstring: 'Max simultaneaus connections Domains of SCache' + @max_simultaneaus_addresses = prometheus.gauge :max_simultaneaus_addresses, docstring: 'Max simultaneaus connections Addresses of SCache' + @max_simultaneaus_connections = prometheus.gauge :max_simultaneaus_connections, docstring: 'Max simultaneaus connections of SCache' + end + + def collect entry + case entry.message + when /\Astatistics: start interval (.*)/ + # postfix@-.service: postfix/scache/scache statistics: start interval Apr 13 10:58:06 + @start.set Time.parse( $1).to_f + when /\Astatistics: domain lookup hits=(\d+) miss=(\d+) success=(\d+)%/ + # postfix@-.service: postfix/scache/scache statistics: domain lookup hits=2 miss=4 success=33% + @domain_lookup_hits.increment by: $1.to_f + @domain_lookup_miss.increment by: $2.to_f + @domain_lookup_success_rate.set $3.to_f/100 + when /\Astatistics: max simultaneous domains=(\d+) addresses=(\d+) connection=(\d+)/ + # postfix@-.service: postfix/scache/scache statistics: max simultaneous domains=1 addresses=1 connection=4 + @max_simultaneaus_domains.set $1.to_f + @max_simultaneaus_addresses.set $2.to_f + @max_simultaneaus_connections.set $3.to_f + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + end + end + + class Verify + def initialize store, prometheus + @store = store + @cache = Cache.new store, Collector::PrefixProxy.new( prometheus, :cache) + end + + def collect entry + case entry.message + when /\Acache / + @cache.collect entry + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + end + end + + def initialize store, prometheus + @store = store + @postscreen = Postscreen.new store, Collector::PrefixProxy.new( prometheus, :postscreen) + @smtp = Smtp.new store, Collector::PrefixProxy.new( prometheus, :smtp) + @smtpd = Smtpd.new store, Collector::PrefixProxy.new( prometheus, :smtpd) + @submission = Smtpd.new store, Collector::PrefixProxy.new( prometheus, :submission) + @tlsproxy = Tlsproxy.new store, Collector::PrefixProxy.new( prometheus, :tlsproxy) + @scache = SCache.new store, Collector::PrefixProxy.new( prometheus, :scache) + @bounce = Bounce.new store, Collector::PrefixProxy.new( prometheus, :bounce) + @qmgr = prometheus.counter :qmgr_total, docstring: 'A counter of qmgr actions' + @cleanup = prometheus.counter :cleanup_total, docstring: 'A counter of cleanup actions' + @lmtp = prometheus.counter :lmtp_total, docstring: 'A counter of ltmp actions' + @pickup = prometheus.counter :pickup_total, docstring: 'A counter of pickup actions' + @spf = prometheus.counter :spf_total, docstring: 'A counter of prepended SPF header-lines with state', labels: %i[status] + @spf_fail = prometheus.counter :spf_fail_total, docstring: 'A counter of policyd-SPF failed SPF' + %w[pass none].each {|s| @spf.increment by: 0, labels: {status: s} } + @dnsblog = prometheus.counter :dnsblog_total, docstring: 'A counter for DNS-Blacklisted IP by DNSBL', labels: %i[dnsbl] + @anvil_max_connection_rate_per_minute = prometheus.gauge :anvil_max_connection_per_minute_tate, docstring: "Rate of max connections per minute to listener", labels: %i{listener} + @anvil_max_connection_count = prometheus.gauge :anvil_max_connection_count, docstring: "Count of max connections to listener", labels: %i{listener} + @anvil_max_cache_size = prometheus.gauge :anvil_max_cache_size, docstring: "Current max cache size" + end + + def collect entry + #STDERR.puts "postfix: #{entry.syslog_identifier}: #{entry.message}" + case entry.syslog_identifier + when 'postfix/tlsproxy/tlsproxy' + @tlsproxy.collect entry + when 'postfix/smtp/postscreen', 'postfix/smtp/postscreen' + @postscreen.collect entry + when 'postfix/smtp', 'postfix/smtp/smtp' + @smtp.collect entry + when 'postfix/smtpd', 'postfix/smtpd/smtpd' + @smtpd.collect entry + when 'postfix/submission/smtpd' + @submission.collect entry + when 'postfix/bounce', 'postfix/bounce/bounce' + @bounce.collect entry + when 'postifx/verify', 'postfix/verify/verify' + @verify.collect entry + when 'policyd-spf' + case entry.message + when /\Aprepend Received-SPF: (\w+) / + @spf.increment labels: {status: $1.downcase} + when /\A[^ ]+ [^ ]+ Message rejected due to: SPF fail / + # postfix@-.service policyd-spf: 550 5.7.23 Message rejected due to: SPF fail - not authorized. + @spf_fail.increment + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + when 'postfix/cleanup', 'postfix/cleanup/cleanup' + @cleanup.increment + when 'postfix/qmgr', 'postfix/qmgr/qmgr' + @qmgr.increment + when 'postfix/lmtp', 'postfix/lmtp/lmtp' + @lmtp.increment + when 'postfix/pickup', 'postfix/pickup/pickup' + @pickup.increment + when 'postfix/dnsblog', 'postfix/dnsblog/dnsblog' + case entry.message + when /\Aaddr [^ ]+ listed by domain ([^ ]+) as / + @dnsblog.increment labels: {dnsbl: $1.downcase} + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + when 'postfix/anvil', 'postfix/anvil/anvil' + case entry.message + when /\Astatistics: max connection rate (\d+)\/60s for \((.+)\) at (.*)/ + @anvil_max_connection_rate_per_minute.set $1.to_i, labels: {listener: $2} + when /\Astatistics: max connection count (\d+) for \((.+)\) at (.*)/ + @anvil_max_connection_count.set $1.to_i, labels: {listener: $2} + when /\Astatistics: max cache size (\d+) at (.*)/ + @anvil_max_cache_size.set $1.to_i + else + STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" + end + when 'postfix/scache', 'postfix/scache/scache' + @scache.collect entry + else + STDERR.puts "# #{entry._systemd_unit}: #{entry.syslog_identifier} #{entry.message}" + end + end +end diff --git a/postfix_exporter.rb b/postfix_exporter.rb index 86ffdbe..025f781 100644 --- a/postfix_exporter.rb +++ b/postfix_exporter.rb @@ -6,235 +6,8 @@ require 'prometheus/client' require 'prometheus/client/formats/text' require 'ostruct' -class Collector - attr_reader :journal, :prometheus - - class PrefixProxy - attr_reader :prometheus, :prefix - def initialize prometheus, prefix - @prometheus, @prefix = prometheus, prefix - end - - def counter name, docstring, **options - @prometheus.counter :"#{prefix}_#{name}", docstring: docstring, **options - end - end - - class Dovecot - attr_reader :logged_in, :logged_out, :connection_closed - def initialize prometheus - @logged_in = prometheus.counter :logged_in, 'A counter of successfull logins to dovecot' - @logged_out = prometheus.counter :logged_out, 'A counter of logouts on dovecot' - @connection_closed = prometheus.counter :connection_closed, 'A counter of closed connection on dovecot' - end - - def collect entry - case entry.message - when /\Aimap-login: Login: user=/ - @logged_in.increment - when /\Aimap\([^)]+\): Logged out / - @logged_out.increment - when /\Aimap\([^)]+\): Connection closed / - @connection_closed.increment - end - end - end - - class Postscreen - attr_reader :connect_from, :whitelisted, :pass_old, :dnsbl, :noqueue, :hangup, :disconnect, :unknown - def initialize prometheus - @connect_from = prometheus.counter :connect_from, 'A counter of connections to postscreen' - @whitelisted = prometheus.counter :whitelisted, 'A counter of WHITELISTED connections to postscreen' - @pass_old = prometheus.counter :pass_old, 'A counter of PASS OLD connections to postscreen' - @dnsbl = prometheus.counter :dnsbl, 'A counter of DNSBL-blocked to postscreen' - @noqueue = prometheus.counter :noqueue, 'A counter of NOQUEUE to postscreen', reason: "unknown" - @hangup = prometheus.counter :hangup, 'A counter of HANGUP to postscreen' - @disconnect = prometheus.counter :disconnect, 'A counter of DISCONNECT to postscreen' - @unknown = prometheus.counter :unknown, 'A counter of unknown loglines by postscreen' - end - - def collect entry - case entry.message - when /\ACONNECT from / - @connect_from.increment - when /\AWHITELISTED / - @whitelisted.increment - when /\APASS OLD / - @pass_old.increment - when /\ADISCONNECT / - @disconnect.increment - when /\ANOQUEUE: / - case entry.message - when / blocked using / - @noqueue.increment reason: 'dnsbl' - end - when /\AHANGUP / - @hangup.increment - when /\ADNSBL rank / - @dnsbl.increment - else - @unknown.increment - end - end - end - - class Smtp - def initialize prometheus - @connection_refused = prometheus.counter :connection_refused, 'A counter of connection refused on smtp' - @connection_timed_out = prometheus.counter :connection_timed_out, 'A counter of timed out connections on smtp' - @tls = prometheus.counter :tls, 'A counter of TLS connections on smtp with TLS-version and cipher', labels: %w[trust tls cipher] - @status = prometheus.counter :status, 'A counter of message status by status', labels: %w[status] - @sent = prometheus.counter :sent, 'A counter of sent messages by smtp' - @deferred = prometheus.counter :deferred, 'A counter of deferred messages by smtp' - @bounced = prometheus.counter :bounced, 'A counter of bounced messages by smtp' - @deliverable = prometheus.counter :deliverable, 'A counter of deliverable messages by smtp' - @undeliverable = prometheus.counter :undeliverable, 'A counter of undeliverable messages by smtp' - @status_unknown = prometheus.counter :status_unknown, 'A counter of unknown status by smtp' - @unknown = prometheus.counter :unknown, 'A counter of unknown loglines by smtp' - end - - def collect entry - case entry.message - when /\Aconnect to / - case entry.message - when / Connection refused\z/ - @connection_refused.increment - when / Connection timed out\z/ - @connection_timed_out.increment - end - when /\A([^ ]+) TLS connection established to .* ([^ ]+) with cipher ([^ ]+)/ - @tls.increment trust: $1, tls: $2, cipher: $3 - when /\A\w{8,15}: .*status=([^ ]+)/ - status = $1 - @status.increment status: status - case status - when 'sent' - @sent.increment - when 'deferred' - @deferred.increment - when 'bounced' - @bounced.increment - when 'deliverable' - @deliverable.increment - when 'undeliverable' - @undeliverable.increment - else - @status_unknown.increment - end - else - @unknown.increment - end - end - end - - class Smtpd - def initialize prometheus - @connect_from = prometheus.counter :connect_from, 'A counter of connections to smtpd' - @tls = prometheus.counter :tls, 'A counter of TLS connections to smtpd with TLS-version and cipher' - @disconnect_from = prometheus.counter :disconnect_from, 'A counter of disconnections to smtpd' - @noqueue = prometheus.counter :noqueue, 'A counter of NOQUEUE by smtpd', reason: "uknown" - @concurrenty_limit_exceeded = prometheus.counter :concurrenty_limit_exceeded, 'A counter of concurrenty limit exceeded connections to smtpd' - @timeout = prometheus.counter :timeout_connection, 'A counter of timedout connections to smtpd' - @lost_connection = prometheus.counter :lost_connection, 'A counter of lost connections to smtpd' - @accepted = prometheus.counter :accepted, 'A counter of accepted messages to smtpd' - @unknown = prometheus.counter :unknown, 'A counter of unknown loglines by smtpd' - end - - def collect entry - case entry.message - when /\Aconnect from unknown/ - @connect_from.increment unknown: 1 - when /\Aconnect from / - @connect_from.increment unknown: 0 - when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) / - @tls.increment trust: $1, tls: $2, cipher: $3 - when /\ANOQUEUE: / - case entry.message - when / Client host rejected: cannot find your reverse hostname / - @noqueue.increment reason: 'no reverse hostname' - when / User doesn't exist: / - @noqueue.increment reason: 'user does not exist' - else - @noqueue.increment reason: 'any' - end - when /\Adisconnect from / # ehlo=2 starttls=1 auth=1 mail=1 rcpt=1 data=1 commands=8 - @disconnect_from.increment - when /\Awarning: Connection concurrency limit exceeded: / - @concurrenty_limit_exceeded.increment - when /\Atimeout after ([^ ]+) from / - @timeout.increment after: $1 - when /\Alost connection after ([^ ]+) from / - @lost_connection.increment after: $1 - when /\A\w{8,15}: client=/ # sasl_method= - @accepted.increment - else - @unknown.increment - end - end - end - - class Postfix - def initialize prometheus - @postscreen = Postscreen.new PrefixProxy.new( prometheus, :postscreen) - @smtp = Smtp.new PrefixProxy.new( prometheus, :smtp) - @smtpd = Smtpd.new PrefixProxy.new( prometheus, :smtpd) - @submission = Smtpd.new PrefixProxy.new( prometheus, :submission) - @qmgr = prometheus.counter :qmgr, 'A counter of qmgr actions' - @cleanup = prometheus.counter :cleanup, 'A counter of cleanup actions' - end - - def collect entry - case entry.syslog_identifier - when 'postfix/postscreen' - @postscreen.collect entry - when 'postfix/smtp' - @smtp.collect entry - when 'postfix/smtpd' - @smtpd.collect entry - when 'postfix/submission/smtpd' - @submission.collect entry - when 'postfix/cleanup' - @metrics.cleanup.increment - when 'postfix/qmgr' - @metrics.qmgr.increment - end - end - end - - def initialize prometheus: nil, journal: nil - @journal = journal || Systemd::Journal.new( flags: Systemd::Journal::Flags::SYSTEM_ONLY) - @prometheus = prometheus || Prometheus::Client.registry - - @dovecot = Dovecot.new PrefixProxy.new( @prometheus, :dovecot) - @postfix = Postfix.new PrefixProxy.new( @prometheus, :postfix) - end - - def start - Thread.abort_on_exception = true - Thread.new do - begin - run - rescue Object - STDERR.puts "#$! (#{$!.class})", $!.backtrace.map {|x| " in #{x}"} - raise - end - end - end - - def run - @journal.seek :tail - @journal.move_previous - @journal.watch do |entry| - case entry._systemd_unit - when 'dovecot.service' - @dovecot.collect entry - when 'postfix@-.service' - @postfix.collect entry - end - end - end -end +require_relative 'lib/collector' if __FILE__ == $0 - run + Collector.start end