class Collector::Postfix class Noqueue def initialize store, prometheus @store = store @noqueue = prometheus.counter :total, docstring: 'Total noqueued by reasons.', labels: %i[reason] @codes = prometheus.counter :status_code_total, docstring: 'Total noqueued by status code', labels: %i[status_code enhanced_status_code] ['dnsbl', 'no reverse hostname', 'user does not exist', ''].each {|r| @noqueue.increment by: 0, labels: {reason: r} } end def collect entry case entry.message when /\ANOQUEUE: reject: RCPT from (?:[^ ]+): (\d+) (\d+\.\d+\.\d+) (?:[^ ]+): (.*?),/ code, enh = $1, $2 @codes.increment labels: {status_code: code, enhanced_status_code: enh} end case entry.message when /\ANOQUEUE: reject: RCPT from .* blocked using / @noqueue.increment labels: {reason: 'dnsbl'} when /\ANOQUEUE: reject: RCPT from .* Message rejected due to: SPF fail - not authorized\. / @noqueue.increment labels: {reason: 'spf fail'} when / Client host rejected: cannot find your reverse hostname/ @noqueue.increment labels: {reason: 'no reverse hostname'} when / User doesn't exist: / @noqueue.increment labels: {reason: 'user does not exist'} else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} NOQUEUE: #{entry.message}" @noqueue.increment labels: {reason: ''} end end end class Cache def initialize store, prometheus @store = store @full_cleanup = prometheus.counter :full_cleanup_total, docstring: 'A counter of total cache cleanups', labels: %i[file] @full_cleanup_retained = prometheus.gauge :full_cleanup_retained_entries, docstring: 'Retained entries of last cache cleanups', labels: %i[file] @full_cleanup_dropped = prometheus.counter :full_cleanup_dropped_entries_total, docstring: 'Total dropped entries of cache cleanups', labels: %i[file] end def collect entry case entry.message when /\Acache (.*) full cleanup: retained=(\d+) dropped=(\d+) entries/ # postscreen: cache lmdb:/var/lib/postfix/postscreen_cache full cleanup: retained=128 dropped=14 entries file, retained, dropped = $1, $2.to_f, $3.to_f @full_cleanup.increment labels: {file: file} @full_cleanup_retained.increment by: retained, labels: {file: file} @full_cleanup_dropped.increment by: dropped, labels: {file: file} else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} cache: #{entry.message}" end end end class Postscreen def initialize store, prometheus @store = store @noqueue = Noqueue.new store, prometheus.prefix_proxy( :noqueue) @cache = Cache.new store, prometheus.prefix_proxy( :cache) @connect_from = prometheus.counter :connect_from_total, docstring: 'A counter of connections to postscreen' @whitelisted = prometheus.counter :whitelisted_total, docstring: 'A counter of WHITELISTED connections to postscreen' @pass_old = prometheus.counter :pass_old_total, docstring: 'A counter of PASS OLD connections to postscreen' @pass_new = prometheus.counter :pass_new_total, docstring: 'A counter of PASS NEW connections to postscreen' @dnsbl = prometheus.counter :dnsbl_total, docstring: 'A counter of DNSBL-blocked to postscreen' @bare_newline = prometheus.counter :bare_newline_total, docstring: 'A counter of BARE NEWLINE-blocked to postscreen' @command_pipelining = prometheus.counter :command_pipelining_total, docstring: 'A counter of COMMAND PIPELINING-blocked to postscreen' @command_time_limit = prometheus.counter :command_time_limit_total, docstring: 'A counter of COMMAND TIME LIMIT-blocked to postscreen' @hangup = prometheus.counter :hangup_total, docstring: 'A counter of HANGUP to postscreen' @bdat = prometheus.counter :bdat_total, docstring: 'A counter of BDAT to postscreen' @pregreet = prometheus.counter :pregreet_total, docstring: 'A counter of PREGREET to postscreen' @disconnect = prometheus.counter :disconnect_total, docstring: 'A counter of DISCONNECT to postscreen' @unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by postscreen' @warnings = prometheus.counter :warnings_total, docstring: 'A counter of any warnings' @psc_cache_update_delay = prometheus.summary :psc_cache_update_delay_total, docstring: 'A counter of PSC cache update delays by file', labels: %i[file] @curr_unavailable = prometheus.counter :service_currently_unavailable_total, docstring: 'A counter for rejected mails, because service currently unavailable - so greylisted.' @dnsblog_reply_timeout = prometheus.counter :dnsblog_reply_timeout_total, docstring: 'Total timedout requests for dnsblog' @data_without_valid_rcpt = prometheus.counter :data_without_valid_rcpt_total, docstring: 'A counter of DATA without valid RCPT events' end def collect entry #STDERR.puts "postscreen: #{entry.message}" case entry.message when /\ACONNECT from / then @connect_from.increment when /\AWHITELISTED / then @whitelisted.increment when /\APASS OLD / then @pass_old.increment when /\APASS NEW / then @pass_new.increment when /\ADISCONNECT / then @disconnect.increment when /\APREGREET / then @pregreet.increment when /\ABDAT / then @bdat.increment when /\AHANGUP / then @hangup.increment when /\ADNSBL rank / then @dnsbl.increment when /\ABARE NEWLINE / then @bare_newline.increment when /\ACOMMAND PIPELINING / then @command_pipelining.increment when /\ACOMMAND TIME LIMIT / then @command_time_limit.increment when /\ADATA without valid RCPT / then @data_without_valid_rcpt.increment when /\Acache / then @cache.collect entry when /\ANOQUEUE: (.*)/ case msg = $1 when /\Areject: RCPT from [^ ]+: 450 4.3.2 Service currently unavailable; / @curr_unavailable.increment else @noqueue.collect entry end when /\Awarning: (.*)/ @warnings.increment case $1 when /\Apsc_cache_update: ([^ ]+) update average delay is ([^ ]+)/ @psc_cache_update_delay.observe $2.to_f, labels: {file: $1} when /\Adnsblog reply timeout / @dnsblog_reply_timeout.increment when /\Agetpeername: Transport endpoint is not connected -- dropping this connection/ else STDERR.puts "# postscreen warnings: #{entry.message}" end else STDERR.puts "# postscreen: #{entry.message}" @unknown.increment end end end def self.tls_posibilities %w[Trusted Untrusted Anonymous].each do |trust| %w[TLSv1.2 TLSv1.3].each do |tls| %w[TLS_AES_128_GCM_SHA256 TLS_AES_256_GCM_SHA384 ECDHE-RSA-AES128-GCM-SHA256 ECDHE-RSA-AES256-GCM-SHA384].each do |cipher| yield trust, tls, cipher end end end end class Smtp def initialize store, prometheus @store = store @connection_refused = prometheus.counter :connection_refused_total, docstring: 'A counter of refused connections on smtp' @connection_timed_out = prometheus.counter :connection_timed_out_total, docstring: 'A counter of timed out connections on smtp' @tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections on smtp with TLS-version and cipher', labels: %i[trust tls cipher] Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} } @status = prometheus.histogram :status, docstring: 'A histogram of message status by status', labels: %i[status] @sent = prometheus.counter :sent_total, docstring: 'A counter of sent messages by smtp' @deferred = prometheus.counter :deferred_total, docstring: 'A counter of deferred messages by smtp' @bounced = prometheus.counter :bounced_total, docstring: 'A counter of bounced messages by smtp' @deliverable = prometheus.counter :deliverable_total, docstring: 'A counter of deliverable messages by smtp' @undeliverable = prometheus.counter :undeliverable_total, docstring: 'A counter of undeliverable messages by smtp' @status_unknown = prometheus.counter :status_unknown_total, docstring: 'A counter of unknown status by smtp' @unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by smtp' end def collect entry #STDERR.puts "smtp: #{entry.message}" case entry.message when /\Aconnect to / case entry.message when / Connection refused\z/ then @connection_refused.increment when / Connection timed out\z/ then @connection_timed_out.increment else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} connect to: #{entry.message}" end when /\A([^ ]+) TLS connection established to .* ([^ ]+) with cipher ([^ ]+)/ @tls.increment labels: {trust: $1, tls: $2, cipher: $3} when /\A\w+: .* delay=([0-9.]+),.*status=([^ ]+)/ # postfix@-.service: postfix/smtp/smtp 4KZ0KY5Wx4z4Mn: to=, relay=mail.psi.co.at[81.223.32.197]:25, delay=0.76, delays=0.2/0/0.35/0.21, dsn=2.0.0, status=sent (250 2.0.0 Ok: queued as 710F7A0263) delay, status = $1.to_f, $2.downcase @status.observe delay, labels: {status: status} case status when 'sent' then @sent.increment when 'deferred' then @deferred.increment when 'bounced' then @bounced.increment when 'deliverable' then @deliverable.increment when 'undeliverable' then @undeliverable.increment else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} status: #{entry.message}" @status_unknown.increment end else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" @unknown.increment end end end class Smtpd def initialize store, prometheus @store = store @noqueue = Noqueue.new store, prometheus.prefix_proxy( :noqueue) @connect_from = prometheus.counter :connect_from_total, docstring: 'A counter of connections to smtpd', labels: %i[from_unknown] %w[0 1].each {|x| @connect_from.increment by: 0, labels: {from_unknown: x} } @tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections to smtpd with TLS-version and cipher', labels: %i[trust tls cipher] Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} } @disconnect_from = prometheus.counter :disconnect_from_total, docstring: 'A counter of "disconnect from" to smtpd' @disconnect_from_events = prometheus.counter :disconnect_from_event_total, docstring: 'A counter of events while connection-lifetime (auth, starttls, mail, rcpt, data, commands, ...) from disconnections', labels: %i[event] %w[auth mail rcpt data commands starttls ehlo quit].each {|e| @disconnect_from_events.increment by: 0, labels: {event: e} } @concurrenty_limit_exceeded = prometheus.counter :concurrenty_limit_exceeded_total, docstring: 'A counter of concurrenty limit exceeded connections to smtpd' @timeout = prometheus.counter :timeout_connection_total, docstring: 'A counter of timedout connections to smtpd', labels: %i[after] @lost_connection = prometheus.counter :lost_connection_total, docstring: 'A counter of lost connections to smtpd', labels: %i[after] @accepted = prometheus.counter :accepted_total, docstring: 'A counter of accepted messages to smtpd' @unknown = prometheus.counter :unknown_total, docstring: 'A counter of unknown loglines by smtpd' @sasl_auth_failed = prometheus.counter :sasl_auth_failed_total, docstring: 'A counter of failed SASL authentication by method', labels: %i[method] @non_smtp_command = prometheus.counter :non_smtp_command_total, docstring: 'A counter of Non-SMTP-commands (ex. was a HTTP GET / HTTP/1.1)' @tls_lib_problem = prometheus.counter :tls_lib_problem_total, docstring: 'A counter of TLS libreary problems (ex. unsupported protocol' @ssl_error = prometheus.counter :ssl_error_total, docstring: 'A counter of any SSL_accept errors by error', labels: %i[error] @warnings = prometheus.counter :warnings_total, docstring: 'A counter of any warnings' %w[].each {|m| @sasl_auth_failed.increment by: 0, labels: {method: m} } @hostname_not_resolved_to_address = prometheus.counter :hostname_not_resolved_to_address_total, docstring: 'A counter of hostnames, which cannot be resolved to there IP' end def collect entry #STDERR.puts "smtpd: #{entry.message}" case entry.message when /\Aconnect from unknown/ @connect_from.increment labels: {from_unknown: 1} when /\Aconnect from / @connect_from.increment labels: {from_unknown: 0} when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) / @tls.increment labels: {trust: $1, tls: $2, cipher: $3} when /\ANOQUEUE: / @noqueue.collect entry when /\Adisconnect from ([^ ]+) (.*)/ @disconnect_from.increment # ehlo=2 starttls=1 auth=1 mail=1 rcpt=1 data=1 commands=8 $2.split( ' ').each do |x| case x when /(\w+)=(\d+)/ @disconnect_from_events.increment by: $2.to_f, labels: {event: $1} else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} disconnect from: [#{x}] #{entry.message}" end end when /\Awarning: (.*)/ @warnings.increment case $1 when /\AConnection concurrency limit exceeded: / @concurrenty_limit_exceeded.increment when /\Ahostname ([^ ]+) does not resolve to address / @hostname_not_resolved_to_address.increment when /\A[^ ]+\[([^ ]+)\]: SASL ([^ ]+) authentication failed/ @sasl_auth_failed.increment labels: {method: $1.downcase} when /\Anon-SMTP command from / @non_smtp_command.increment when /\ATLS library problem / @tls_lib_problem.increment else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier} wanring: #{entry.message}" end when /\ASSL_accept error from [^ ]+: (-?\d)/ @ssl_error.increment labels: {error: $1.to_i} when /\Atimeout after ([^ ]+) from / @timeout.increment labels: {after: $1} when /\Alost connection after ([^ ]+) from / @lost_connection.increment labels: {after: $1} when /\A\w{8,15}: client=/ # sasl_method= @accepted.increment else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" @unknown.increment end end end class Tlsproxy def initialize store, prometheus @store = store @connect = prometheus.counter :connect_total, docstring: 'A counter of COUNNECT from tlsproxy' @disconnect = prometheus.counter :disconnect_total, docstring: 'A counter of DISCOUNNECT from tlsproxy' @tls = prometheus.counter :tls_total, docstring: 'A counter of TLS connections to smtpd with TLS-version and cipher', labels: %i[trust tls cipher] Collector::Postfix.tls_posibilities {|t, s, c| @tls.increment by: 0, labels: {trust: t, tls: s, cipher: c} } end def collect entry case entry.message when /\ADISCONNECT / then @disconnect.increment when /\ACONNECT / then @connect.increment when /\A([^ ]+) TLS connection established from .*: ([^ ]+) with cipher ([^ ]+) / @tls.increment labels: {trust: $1, tls: $2, cipher: $3} else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end end end class Bounce def initialize store, prometheus @store = store @non_delivery = prometheus.counter :sender_non_delivery_notification, docstring: 'A counter of notifications to sender because mail cannot deliveried' end def collect entry case entry.message when /\A([^ ]+): sender non-delivery notification: (.+)/ # postfix@-.service: postfix/bounce/bounce 4L3cN05mX6zfB: sender non-delivery notification: 4L3cN06Qpkz4Fc @non_delivery.increment else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end end end class SCache def initialize store, prometheus @store = store @start = prometheus.gauge :start, docstring: 'Start timestamp of SCache statistics' @domain_lookup_hits = prometheus.counter :domain_lookup_hits_total, docstring: 'Count of hits for domain lookups of SCache' @domain_lookup_miss = prometheus.counter :domain_lookup_miss_total, docstring: 'Count of misses for domain lookups of SCache' @domain_lookup_success_rate = prometheus.gauge :domain_lookup_success_rate, docstring: 'Success rate of domain lookups of SCache' @max_simultaneaus_domains = prometheus.gauge :max_simultaneaus_domains, docstring: 'Max simultaneaus connections Domains of SCache' @max_simultaneaus_addresses = prometheus.gauge :max_simultaneaus_addresses, docstring: 'Max simultaneaus connections Addresses of SCache' @max_simultaneaus_connections = prometheus.gauge :max_simultaneaus_connections, docstring: 'Max simultaneaus connections of SCache' end def collect entry case entry.message when /\Astatistics: start interval (.*)/ # postfix@-.service: postfix/scache/scache statistics: start interval Apr 13 10:58:06 @start.set Time.parse( $1).to_f when /\Astatistics: domain lookup hits=(\d+) miss=(\d+) success=(\d+)%/ # postfix@-.service: postfix/scache/scache statistics: domain lookup hits=2 miss=4 success=33% @domain_lookup_hits.increment by: $1.to_f @domain_lookup_miss.increment by: $2.to_f @domain_lookup_success_rate.set $3.to_f/100 when /\Astatistics: max simultaneous domains=(\d+) addresses=(\d+) connection=(\d+)/ # postfix@-.service: postfix/scache/scache statistics: max simultaneous domains=1 addresses=1 connection=4 @max_simultaneaus_domains.set $1.to_f @max_simultaneaus_addresses.set $2.to_f @max_simultaneaus_connections.set $3.to_f else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end end end class Verify def initialize store, prometheus @store = store @cache = Cache.new store, prometheus.prefix_proxy( :cache) end def collect entry case entry.message when /\Acache / then @cache.collect entry else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end end end def initialize store, prometheus @store = store @postscreen = Postscreen.new store, prometheus.prefix_proxy( :postscreen) @smtp = Smtp.new store, prometheus.prefix_proxy( :smtp) @smtpd = Smtpd.new store, prometheus.prefix_proxy( :smtpd) @submission = Smtpd.new store, prometheus.prefix_proxy( :submission) @tlsproxy = Tlsproxy.new store, prometheus.prefix_proxy( :tlsproxy) @scache = SCache.new store, prometheus.prefix_proxy( :scache) @bounce = Bounce.new store, prometheus.prefix_proxy( :bounce) @verify = Verify.new store, prometheus.prefix_proxy( :verify) @qmgr = prometheus.counter :qmgr_total, docstring: 'A counter of qmgr actions' @cleanup = prometheus.counter :cleanup_total, docstring: 'A counter of cleanup actions' @lmtp = prometheus.counter :lmtp_total, docstring: 'A counter of ltmp actions' @pickup = prometheus.counter :pickup_total, docstring: 'A counter of pickup actions' @dnsblog = prometheus.counter :dnsblog_total, docstring: 'A counter for DNS-Blacklisted IP by DNSBL', labels: %i[dnsbl] @spf_fail = prometheus.counter :spf_fail_total, docstring: 'A counter of policyd-SPF failed SPF' @spf = prometheus.counter :spf_total, docstring: 'A counter of prepended SPF header-lines with state', labels: %i[status] %w[pass none].each {|s| @spf.increment by: 0, labels: {status: s} } @anvil_max_connection_rate_per_minute = prometheus.gauge :anvil_max_connection_per_minute_rate, docstring: "Rate of max connections per minute to listener", labels: %i{listener} @anvil_max_connection_count = prometheus.gauge :anvil_max_connection_count, docstring: "Count of max connections to listener", labels: %i{listener} @anvil_max_cache_size = prometheus.gauge :anvil_max_cache_size, docstring: "Current max cache size" end def collect entry #STDERR.puts "postfix: #{entry.syslog_identifier}: #{entry.message}" case entry.syslog_identifier when 'postfix/tlsproxy', 'postfix/tlsproxy/tlsproxy' then @tlsproxy.collect entry when 'postfix/postscreen', 'postfix/smtp/postscreen' then @postscreen.collect entry when 'postfix/smtp', 'postfix/smtp/smtp' then @smtp.collect entry when 'postfix/smtpd', 'postfix/smtpd/smtpd' then @smtpd.collect entry when 'postfix/submission/smtpd' then @submission.collect entry when 'postfix/bounce', 'postfix/bounce/bounce' then @bounce.collect entry when 'postifx/verify', 'postfix/verify/verify' then @verify.collect entry when 'postfix/cleanup', 'postfix/cleanup/cleanup' then @cleanup.increment when 'postfix/qmgr', 'postfix/qmgr/qmgr' then @qmgr.increment when 'postfix/lmtp', 'postfix/lmtp/lmtp' then @lmtp.increment when 'postfix/pickup', 'postfix/pickup/pickup' then @pickup.increment when 'postfix/scache', 'postfix/scache/scache' then @scache.collect entry when 'postfix/dnsblog', 'postfix/dnsblog/dnsblog' case entry.message when /\Aaddr [^ ]+ listed by domain ([^ ]+) as / @dnsblog.increment labels: {dnsbl: $1.downcase} else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end when 'postfix/anvil', 'postfix/anvil/anvil' case entry.message when /\Astatistics: max connection rate (\d+)\/60s for \((.+)\) at (.*)/ @anvil_max_connection_rate_per_minute.set $1.to_i, labels: {listener: $2} when /\Astatistics: max connection count (\d+) for \((.+)\) at (.*)/ @anvil_max_connection_count.set $1.to_i, labels: {listener: $2} when /\Astatistics: max cache size (\d+) at (.*)/ @anvil_max_cache_size.set $1.to_i else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end when 'policyd-spf' case entry.message when /\Aprepend Received-SPF: (\w+) / @spf.increment labels: {status: $1.downcase} when /\A[^ ]+ [^ ]+ Message rejected due to: SPF fail / # postfix@-.service policyd-spf: 550 5.7.23 Message rejected due to: SPF fail - not authorized. @spf_fail.increment else STDERR.puts "# #{entry._systemd_unit} #{entry.syslog_identifier}: #{entry.message}" end else STDERR.puts "# #{entry._systemd_unit}: #{entry.syslog_identifier} #{entry.message}" end end end