[scripts] Add AggCheck (owlps-aggcheck.pl)

Add owlps-aggcheck.pl in the new scripts/ directory. As its name indicates, this script allows one to analyse an aggregation file in various ways: print human-readable information, extract signal strength distribution, or extract the number of packets per request and per capture point.
2013-07-25 21:50:30 -04:00 · 2013-07-25 21:50:30 -04:00 · 8ef5a55b07
parent a8cb529b2c
commit 8ef5a55b07
1 changed files with 328 additions and 0 deletions
--- a/scripts/owlps-aggcheck.pl
+++ b/scripts/owlps-aggcheck.pl
@ -0,0 +1,328 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+owlps-aggcheck - verifies an aggregation CSV file
+
+
+=head1 SYNOPSIS
+
+B<owlps-aggcheck> [ B<-h> | B<-V> ] [ B<-v> ] [ B<-d> ] [ B<-n> ]
+[ <I<aggregation_files>> ]
+
+
+=head1 DESCRIPTION
+
+B<owlps-aggcheck> parses an aggregation CSV file to help the user verifying it
+visually. For each line, it displays some information about the request and
+counts the number of capture points (CPs) which received the request, along with
+the number of packets they got. With the B<-n> option, this information is
+printed in CSV format. With the B<-d> option, the global distribution of the
+signal strength values for each transmitter and each receiver in the input
+file(s) is printed.
+
+<I<aggregation_files>> is a list of one or more aggregation files to work on,
+which I<must> be placed after the options. If no file is provided, the standard
+input is read.
+
+
+=head1 OPTIONS
+
+=over 7
+
+=item B<-h>, B<--help>
+
+Print version and help message and exit.
+
+=item B<-V>, B<--version>
+
+Print version message and exit.
+
+=item B<-d>
+
+Print signal strength distribution in CSV format.
+
+=item B<-n>
+
+Print per-request statistics in CSV format.
+
+=item B<-v>
+
+Print human readable information about the lines read (this is the default
+behaviour unless B<-d> or B<-n> is used).
+
+=back
+
+
+=head1 COPYING
+
+This script and its documentation are part of the Owl Positioning System (OwlPS)
+project. They are subject to the copyright notice and license terms in the
+COPYRIGHT.t2t file found in the top-level directory of the OwlPS distribution
+and at http://code.lm7.fr/p/owlps/source/tree/master/COPYRIGHT.t2t
+
+
+=head1 SEE ALSO
+
+owlps(7), owlps-aggregatord(1)
+
+=cut
+
+
+use strict;
+use Getopt::Std;
+use Pod::Usage;
+
+
+## Constants ##
+
+# CSV format version handled by this program
+use constant FORMAT_VERSION => 1;
+
+# Number of fields before the captured packets
+use constant PREFIX_FIELDS => 9;
+
+
+## Global variables ##
+
+# Line number
+my $line_nb = 0;
+
+# Structure that holds the global count of packets
+# key = timestamp
+# value = dictionary {key = CP's MAC address
+#                     value = number of packets}
+my %global_stats;
+
+# List of present CPs in the whole file
+# key = MAC address
+# value = 1
+my %global_present_cps;
+
+# Structure that holds the global distribution of the signal strength, per
+# transmitting device
+# key = transmitter's MAC address
+# value = dictionary {key = receiver's MAC address
+#                     value = dictionary {key = signal strength level
+#                                         value = number of occurrences}
+my %distribution;
+
+
+## Functions ##
+
+sub VERSION_MESSAGE {
+    my $handle = $_[0] || *STDOUT;
+    print $handle
+      "This is OwlPS AggCheck, part of the Owl Positioning System project.\n"
+      . "CSV aggregation format version handled: "
+      . FORMAT_VERSION . "\n";
+}
+
+
+sub HELP_MESSAGE {
+    my $handle = $_[0] || *STDOUT;
+    pod2usage(-output  => $handle,
+              -exitval => "NOEXIT");
+}
+
+
+sub error_nb_fields() {
+    die "Wrong number of fields on line #$line_nb!";
+}
+
+
+## Option parsing ##
+
+$Getopt::Std::STANDARD_HELP_VERSION = 1;
+use constant OPTIONS => 'dhnvV';
+my %options;
+if (!getopts(OPTIONS, \%options)) {
+    HELP_MESSAGE(*STDERR);
+    exit 1;
+}
+
+if ($options{'h'}) {
+    VERSION_MESSAGE();
+    HELP_MESSAGE();
+    exit 0;
+}
+
+if ($options{'V'}) {
+    VERSION_MESSAGE();
+    exit 0;
+}
+
+my $print_distribution = $options{'d'};
+my $print_stats        = $options{'n'};
+
+# Enable verbose mode by default or if -v was explicitely used, disable it if -d
+# or -n was used (but not -v)
+my $verbose = $options{'v'} || !($print_distribution || $print_stats);
+
+
+## Main loop: read input lines ##
+
+while (<>) {
+    $line_nb++;
+    chomp;
+
+    # Skip empty lines
+    next if ($_ eq "");
+
+    # Present CPs for this line
+    # key = MAC address, value = number of packets
+    my %present_cps;
+
+    # Split the line and check the number of fields
+    my @fields = split(';');
+    error_nb_fields() if (@fields < PREFIX_FIELDS);
+
+    my $field_nb = 0;    # Number of the current field
+
+    # Print the first fields
+    if ($verbose) {
+        print "Line #$line_nb: $fields[$field_nb++]";
+        while ($field_nb < PREFIX_FIELDS) { print ";$fields[$field_nb++]" }
+        print "\n";
+    }
+    else { $field_nb = PREFIX_FIELDS }
+
+    # Check the CSV format version
+    my $csv_format_version = $fields[0];
+    if ($csv_format_version != FORMAT_VERSION) {
+        die "CSV format version \"$csv_format_version\" is not supported!\n";
+    }
+
+    # Extract important information
+    my $mac_trx   = $fields[1];
+    my $timestamp = $fields[4];
+
+    # Print some request information (human-readable)
+    if ($verbose) {
+        my $request_type     = $fields[2];
+        my $nb_pkts_expected = $fields[3];
+        if    ($request_type == 0)  { print "Positioning" }
+        elsif ($request_type == 1)  { print "Calibration" }
+        elsif ($request_type == 2)  { print "Autocalibration" }
+        elsif ($request_type == 10) { print "Implicit" }
+        else                        { print "Strange" }
+        print " request transmitted by $mac_trx at $timestamp"
+          . " ($nb_pkts_expected packets expected).\n";
+    }
+
+    # Read the captured requests and count the packets
+    while ($field_nb < @fields) {
+        my $mac_cp = $fields[$field_nb];
+        $global_present_cps{$mac_cp} = 1;    # Save the CP's name
+
+        # Increment the number of packets
+        $present_cps{$mac_cp}++;
+
+        # Skip the "packet number" field
+        $field_nb += 2;
+        error_nb_fields() if ($field_nb >= @fields);
+
+        # Count the signal strength level
+        my $ss = $fields[$field_nb];
+        $distribution{$mac_trx}{$mac_cp}{$ss}++;
+
+        $field_nb++;    # Jump to the next field
+    }
+
+    # Save the values for the stats
+    $global_stats{$timestamp} = {%present_cps};
+
+    # Print the number of packets for each CP and count the number of CPs
+    if ($verbose) {
+        my $nb_cps = 0;
+        while (my ($mac_cp, $nb_pkts) = each %present_cps) {
+            if ($nb_pkts > 0) {
+                print "$mac_cp -> $nb_pkts\n";
+                $nb_cps++;
+            }
+        }
+
+        # Print the number of CPs for this request
+        print "$nb_cps different CPs.\n------------\n";
+    }
+}
+
+
+## Print statistics ##
+
+if ($print_stats) {
+    # Print the header line
+    my @cps_names = sort keys %global_present_cps;
+    print "Timestamp";
+    print ";$_" foreach (@cps_names);
+    print "\n";
+
+    # Print the requests
+    foreach my $timestamp (sort keys %global_stats) {
+        print "$timestamp";
+        my $present_cps = $global_stats{$timestamp};
+        # Print the CPs in the same order as in the header
+        foreach my $mac (@cps_names) {
+            print ";", ($present_cps->{$mac} or 0);
+        }
+        print "\n";
+    }
+}
+
+
+## Print distribution ##
+
+if ($print_distribution) {
+    foreach my $mac_trx (sort keys %distribution) {
+        foreach my $mac_cp (sort keys $distribution{$mac_trx}) {
+            # Let's pick a shortcut
+            my $distrib = $distribution{$mac_trx}{$mac_cp};
+
+            # Print the header line
+            print "Link (Trx/Rx);$mac_trx;$mac_cp\n";
+
+            # Make a list of SSs to print (we want to print all the SSs in the
+            # interval, not just the SSs we found)
+            my @present_ss_list = sort { $a <=> $b } keys $distrib;
+            my @ss_list = $present_ss_list[0] .. $present_ss_list[-1];
+
+            # Print all the SSs
+            print "SS (dBm);";
+            foreach my $ss (@ss_list) { print "$ss;" }
+            print "Sum\n";
+
+            # Print the occurrences
+            print "Occurrences;";
+            my $total_nb_pkts = 0;
+            foreach my $ss (@ss_list) {
+                my $occurrences = 0;
+                if (defined($distrib->{$ss})) {
+                    $occurrences = $distrib->{$ss};
+                    $total_nb_pkts += $occurrences;
+                }
+                print "$occurrences;";
+            }
+            print "$total_nb_pkts\n";
+
+            # Print the probabilities
+            print "Probability;";
+            my $total_proba = 0;
+            foreach my $ss (@ss_list) {
+                my $proba = 0;
+                if (defined($distrib->{$ss})) {
+                    my $occurrences = $distrib->{$ss};
+                    $proba = $occurrences / $total_nb_pkts;
+                    $total_proba += $proba;
+                }
+                print "$proba;";
+            }
+            print "$total_proba\n";
+
+            # Separate two links with an empty line
+            print "\n";
+        }
+    }
+}
+
+
+# vim: tabstop=4:shiftwidth=4:expandtab:textwidth=80