From b6b9a1a3645d6d23faaa4b3b667ab740961318de Mon Sep 17 00:00:00 2001 From: Hubert Kario Date: Fri, 4 Apr 2014 20:12:50 +0200 Subject: [PATCH] Improve scanning performance and reduce false negatives scan all the machines from top-1m.csv file, wait for completion of all jobs i=1 is an off-by-one-error support top-1m.csv files with arbitrary number of sites run scans for many hosts at a time, but don't run more than specified amount in case where default domain name doesn't resolve or doesn't have port 443 open, retry with www. prefix --- top1m/testtop1m.sh | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/top1m/testtop1m.sh b/top1m/testtop1m.sh index 52f504b..71b1b0f 100755 --- a/top1m/testtop1m.sh +++ b/top1m/testtop1m.sh @@ -1,14 +1,37 @@ #!/usr/bin/env bash parallel=50 +max_bg=400 [ ! -e "results" ] && mkdir results -i=1 -while [ $i -lt 1000000 ] + +function wait_for_jobs() { + local no_jobs + no_jobs=$(jobs | wc -l) + + while [ $no_jobs -gt $1 ]; do + sleep 1 + no_jobs=$(jobs | wc -l) + done +} + +i=0 +count=$(wc -l top-1m.csv | awk '{print $1}') +while [ $i -lt $count ] do echo processings sites $i to $((i + parallel)) - for t in $(tail -$((1000000 - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2) + for t in $(tail -$(($count - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2) do - (tcping -u 10000000 $t 443; if [ $? -gt 0 ];then continue;fi;../cipherscan $t:443 -json > results/$t )& + (tcping -u 10000000 $t 443; + if [ $? -gt 0 ];then + tcping -u 10000000 www.$t 443; + if [ $? -gt 0 ]; then + continue; + else + ../cipherscan -json www.$t:443 > results/www.$t + continue; + fi; + fi;../cipherscan -json $t:443 > results/$t )& done - sleep 7 i=$(( i + parallel)) + wait_for_jobs $max_bg done +wait