owlps/scripts/owlps-aggcheck.pl

#!/usr/bin/perl -w

=head1 NAME

owlps-aggcheck - verifies an aggregation CSV file


=head1 SYNOPSIS

B<owlps-aggcheck> [ B<-h> | B<-V> ] [ B<-v> ] [ B<-d> ] [ B<-n> ]
[ I<aggregation_files> ]


=head1 DESCRIPTION

B<owlps-aggcheck> parses an aggregation CSV file to help the user verifying it
visually. For each line, it displays some information about the request and
counts the number of capture points (CPs) which received the request, along with
the number of packets they got. With the B<-n> option, this information is
printed in CSV format. With the B<-d> option, the global distribution of the
signal strength values for each transmitter and each receiver in the input
file(s) is printed.

I<aggregation_files> is a list of one or more aggregation files to work on,
which I<must> be placed after the options. If no file is provided, the standard
input is read.


=head1 OPTIONS

=over 7

=item B<-h>, B<--help>

Print version and help message and exit.

=item B<-V>, B<--version>

Print version message and exit.

=item B<-d>

Print signal strength distribution in CSV format.

=item B<-n>

Print per-request statistics in CSV format, as well as the total number of
packets received by each capture point. Please note that this display is ordered
by timestamp (unlike the default display which respects the order of the lines
in the input file); this might not correspond to the actual order of
transmission if the devices' clocks are not synchronised.

=item B<-v>

Print human readable information about the lines read (this is the default
behaviour unless B<-d> or B<-n> is used).

=back


=head1 BUGS

When using B<-n> or B<-d>, each timestamp must be unique in the input file. This
is due to the fact that this script uses the timestamp alone as the identifier
of a request (instead of using both the timestamp and the transmitter's MAC
address).  This should seldom be a problem in practice, but if you encounter
this case, a workaround is to manually add a decimal to one of the duplicated
timestamps.


=head1 COPYING

This script and its documentation are part of the Owl Positioning System (OwlPS)
project. They are subject to the copyright notice and license terms in the
COPYRIGHT.t2t file found in the top-level directory of the OwlPS distribution
and at https://code.lm7.fr/mcy/owlps/src/master/COPYRIGHT.t2t


=head1 SEE ALSO

owlps(7), owlps-aggregatord(1)

=cut


use strict;
use Getopt::Std;
use Pod::Usage;


## Constants ##

# CSV format version handled by this program
use constant FORMAT_VERSION => 1;

# Number of fields before the captured packets
use constant PREFIX_FIELDS => 9;


## Global variables ##

# Line number
my $line_nb = 0;

# Association between timestamp and transmitter of each request
# key = timestamp
# value = transmitter's MAC address
my %transmitters;

# Structure that holds the global count of packets
# key = timestamp
# value = dictionary {key = CP's MAC address
#                     value = number of packets}
my %global_stats;

# List of present CPs in the whole file
# key = MAC address
# value = 1
my %global_present_cps;

# Structure that holds the global distribution of the signal strength, per
# transmitting device
# key = transmitter's MAC address
# value = dictionary {key = receiver's MAC address
#                     value = dictionary {key = signal strength level
#                                         value = number of occurrences}
my %distribution;


## Functions ##

sub VERSION_MESSAGE {
    my $handle = $_[0] || *STDOUT;
    print $handle
      "This is OwlPS AggCheck, part of the Owl Positioning System project.\n"
      . "CSV aggregation format version handled: "
      . FORMAT_VERSION . "\n";
}


sub HELP_MESSAGE {
    my $handle = $_[0] || *STDOUT;
    pod2usage(-output  => $handle,
              -exitval => "NOEXIT");
}


sub error_nb_fields() {
    die "Wrong number of fields on line #$line_nb!";
}


## Option parsing ##

$Getopt::Std::STANDARD_HELP_VERSION = 1;
use constant OPTIONS => 'dhnvV';
my %options;
if (!getopts(OPTIONS, \%options)) {
    HELP_MESSAGE(*STDERR);
    exit 1;
}

if ($options{'h'}) {
    VERSION_MESSAGE();
    HELP_MESSAGE();
    exit 0;
}

if ($options{'V'}) {
    VERSION_MESSAGE();
    exit 0;
}

my $print_distribution = $options{'d'};
my $print_stats        = $options{'n'};

# Enable verbose mode by default or if -v was explicitely used, disable it if -d
# or -n was used (but not -v)
my $verbose = $options{'v'} || !($print_distribution || $print_stats);


## Main loop: read input lines ##

while (<>) {
    $line_nb++;
    chomp;

    # Skip empty lines
    next if ($_ eq "");

    # Present CPs for this line
    # key = MAC address, value = number of packets
    my %present_cps;

    # Split the line and check the number of fields
    my @fields = split(';');
    error_nb_fields() if (@fields < PREFIX_FIELDS);

    my $field_nb = 0;    # Number of the current field

    # Print the first fields
    if ($verbose) {
        print "Line #$line_nb: $fields[$field_nb++]";
        while ($field_nb < PREFIX_FIELDS) { print ";$fields[$field_nb++]" }
        print "\n";
    }
    else { $field_nb = PREFIX_FIELDS }

    # Check the CSV format version
    my $csv_format_version = $fields[0];
    if ($csv_format_version != FORMAT_VERSION) {
        die "CSV format version \"$csv_format_version\" is not supported!\n";
    }

    # Extract important information
    my $mac_trx   = $fields[1];
    my $timestamp = $fields[4];

    # Print some request information (human-readable)
    if ($verbose) {
        my $request_type     = $fields[2];
        my $nb_pkts_expected = $fields[3];
        if    ($request_type == 0)  { print "Positioning" }
        elsif ($request_type == 1)  { print "Calibration" }
        elsif ($request_type == 2)  { print "Autocalibration" }
        elsif ($request_type == 10) { print "Implicit" }
        else                        { print "Strange" }
        print " request transmitted by $mac_trx at $timestamp"
          . " ($nb_pkts_expected packets expected).\n";
    }

    # Make sure we don't have a request with the same timestamp already, because
    # it will be a problem if we want to print the statistics or the
    # distribution
    if (($print_stats or $print_distribution) and $transmitters{$timestamp}) {
        die "A request with timestamp \"$timestamp\" was already stored;"
        . " this program doesn't work properly with non-unique timestamps"
        . " (see section \"BUGS\" in the manual page).";
    }

    # Read the captured requests and count the packets
    while ($field_nb < @fields) {
        my $mac_cp = $fields[$field_nb];
        $global_present_cps{$mac_cp} = 1;    # Save the CP's name

        # Increment the number of packets
        $present_cps{$mac_cp}++;

        # Skip the "packet number" field
        $field_nb += 2;
        error_nb_fields() if ($field_nb >= @fields);

        # Count the signal strength level
        my $ss = $fields[$field_nb];
        $distribution{$mac_trx}{$mac_cp}{$ss}++;

        $field_nb++;    # Jump to the next field
    }

    # Save the values for the stats
    $transmitters{$timestamp} = $mac_trx;
    $global_stats{$timestamp} = {%present_cps};

    # Print the number of packets for each CP and count the number of CPs
    if ($verbose) {
        my $nb_cps = 0;
        while (my ($mac_cp, $nb_pkts) = each %present_cps) {
            if ($nb_pkts > 0) {
                print "$mac_cp -> $nb_pkts\n";
                $nb_cps++;
            }
        }

        # Print the number of CPs for this request
        print "$nb_cps different CPs.\n------------\n";
    }
}


## Print statistics ##

if ($print_stats) {
    # Will contain the total number of packets received by each CP
    my %total_nb_pkts;

    # Print the header line
    my @cps_names = sort keys %global_present_cps;
    print "Transmitter;Timestamp";
    print ";$_" foreach (@cps_names);
    print "\n";

    # Print the requests
    foreach my $timestamp (sort keys %global_stats) {
        print "$transmitters{$timestamp};$timestamp";
        my $present_cps = $global_stats{$timestamp};
        # Print the CPs in the same order as in the header and count the totals
        foreach my $mac (@cps_names) {
            my $nb_pkts = ($present_cps->{$mac} or 0);
            print ";", $nb_pkts;
            $total_nb_pkts{$mac} += $nb_pkts;
        }
        print "\n";
    }

    # Print the totals
    print "Total;";
    print ";$total_nb_pkts{$_}" foreach (@cps_names);
    print "\n";
}


## Print distribution ##

if ($print_distribution) {
    foreach my $mac_trx (sort keys %distribution) {
        foreach my $mac_cp (sort keys %{$distribution{$mac_trx}}) {
            # Let's pick a shortcut
            my $distrib = $distribution{$mac_trx}{$mac_cp};

            # Print the header line
            print "Link (Trx/Rx);$mac_trx;$mac_cp\n";

            # Make a list of SSs to print (we want to print all the SSs in the
            # interval, not just the SSs we found)
            my @present_ss_list = sort { $a <=> $b } keys %{$distrib};
            my @ss_list = $present_ss_list[0] .. $present_ss_list[-1];

            # Print all the SSs
            print "SS (dBm);";
            foreach my $ss (@ss_list) { print "$ss;" }
            print "Sum\n";

            # Print the occurrences
            print "Occurrences;";
            my $total_nb_pkts = 0;
            foreach my $ss (@ss_list) {
                my $occurrences = 0;
                if (defined($distrib->{$ss})) {
                    $occurrences = $distrib->{$ss};
                    $total_nb_pkts += $occurrences;
                }
                print "$occurrences;";
            }
            print "$total_nb_pkts\n";

            # Print the probabilities
            print "Probability;";
            my $total_proba = 0;
            foreach my $ss (@ss_list) {
                my $proba = 0;
                if (defined($distrib->{$ss})) {
                    my $occurrences = $distrib->{$ss};
                    $proba = $occurrences / $total_nb_pkts;
                    $total_proba += $proba;
                }
                print "$proba;";
            }
            print "$total_proba\n";

            # Separate two links with an empty line
            print "\n";
        }
    }
}


# vim: tabstop=4:shiftwidth=4:expandtab:textwidth=80