owlps/scripts/owlps-aggcheck.pl

368 lines
10 KiB
Perl
Raw Normal View History

#!/usr/bin/perl -w
=head1 NAME
owlps-aggcheck - verifies an aggregation CSV file
=head1 SYNOPSIS
B<owlps-aggcheck> [ B<-h> | B<-V> ] [ B<-v> ] [ B<-d> ] [ B<-n> ]
[ I<aggregation_files> ]
=head1 DESCRIPTION
B<owlps-aggcheck> parses an aggregation CSV file to help the user verifying it
visually. For each line, it displays some information about the request and
counts the number of capture points (CPs) which received the request, along with
the number of packets they got. With the B<-n> option, this information is
printed in CSV format. With the B<-d> option, the global distribution of the
signal strength values for each transmitter and each receiver in the input
file(s) is printed.
I<aggregation_files> is a list of one or more aggregation files to work on,
which I<must> be placed after the options. If no file is provided, the standard
input is read.
=head1 OPTIONS
=over 7
=item B<-h>, B<--help>
Print version and help message and exit.
=item B<-V>, B<--version>
Print version message and exit.
=item B<-d>
Print signal strength distribution in CSV format.
=item B<-n>
Print per-request statistics in CSV format, as well as the total number of
packets received by each capture point. Please note that this display is ordered
by timestamp (unlike the default display which respects the order of the lines
in the input file); this might not correspond to the actual order of
transmission if the devices' clocks are not synchronised.
=item B<-v>
Print human readable information about the lines read (this is the default
behaviour unless B<-d> or B<-n> is used).
=back
=head1 BUGS
When using B<-n> or B<-d>, each timestamp must be unique in the input file. This
is due to the fact that this script uses the timestamp alone as the identifier
of a request (instead of using both the timestamp and the transmitter's MAC
address). This should seldom be a problem in practice, but if you encounter
this case, a workaround is to manually add a decimal to one of the duplicated
timestamps.
=head1 COPYING
This script and its documentation are part of the Owl Positioning System (OwlPS)
project. They are subject to the copyright notice and license terms in the
COPYRIGHT.t2t file found in the top-level directory of the OwlPS distribution
2016-11-03 05:10:34 +01:00
and at https://code.lm7.fr/mcy/owlps/src/master/COPYRIGHT.t2t
=head1 SEE ALSO
owlps(7), owlps-aggregatord(1)
=cut
use strict;
use Getopt::Std;
use Pod::Usage;
## Constants ##
# CSV format version handled by this program
use constant FORMAT_VERSION => 1;
# Number of fields before the captured packets
use constant PREFIX_FIELDS => 9;
## Global variables ##
# Line number
my $line_nb = 0;
# Association between timestamp and transmitter of each request
# key = timestamp
# value = transmitter's MAC address
my %transmitters;
# Structure that holds the global count of packets
# key = timestamp
# value = dictionary {key = CP's MAC address
# value = number of packets}
my %global_stats;
# List of present CPs in the whole file
# key = MAC address
# value = 1
my %global_present_cps;
# Structure that holds the global distribution of the signal strength, per
# transmitting device
# key = transmitter's MAC address
# value = dictionary {key = receiver's MAC address
# value = dictionary {key = signal strength level
# value = number of occurrences}
my %distribution;
## Functions ##
sub VERSION_MESSAGE {
my $handle = $_[0] || *STDOUT;
print $handle
"This is OwlPS AggCheck, part of the Owl Positioning System project.\n"
. "CSV aggregation format version handled: "
. FORMAT_VERSION . "\n";
}
sub HELP_MESSAGE {
my $handle = $_[0] || *STDOUT;
pod2usage(-output => $handle,
-exitval => "NOEXIT");
}
sub error_nb_fields() {
die "Wrong number of fields on line #$line_nb!";
}
## Option parsing ##
$Getopt::Std::STANDARD_HELP_VERSION = 1;
use constant OPTIONS => 'dhnvV';
my %options;
if (!getopts(OPTIONS, \%options)) {
HELP_MESSAGE(*STDERR);
exit 1;
}
if ($options{'h'}) {
VERSION_MESSAGE();
HELP_MESSAGE();
exit 0;
}
if ($options{'V'}) {
VERSION_MESSAGE();
exit 0;
}
my $print_distribution = $options{'d'};
my $print_stats = $options{'n'};
# Enable verbose mode by default or if -v was explicitely used, disable it if -d
# or -n was used (but not -v)
my $verbose = $options{'v'} || !($print_distribution || $print_stats);
## Main loop: read input lines ##
while (<>) {
$line_nb++;
chomp;
# Skip empty lines
next if ($_ eq "");
# Present CPs for this line
# key = MAC address, value = number of packets
my %present_cps;
# Split the line and check the number of fields
my @fields = split(';');
error_nb_fields() if (@fields < PREFIX_FIELDS);
my $field_nb = 0; # Number of the current field
# Print the first fields
if ($verbose) {
print "Line #$line_nb: $fields[$field_nb++]";
while ($field_nb < PREFIX_FIELDS) { print ";$fields[$field_nb++]" }
print "\n";
}
else { $field_nb = PREFIX_FIELDS }
# Check the CSV format version
my $csv_format_version = $fields[0];
if ($csv_format_version != FORMAT_VERSION) {
die "CSV format version \"$csv_format_version\" is not supported!\n";
}
# Extract important information
my $mac_trx = $fields[1];
my $timestamp = $fields[4];
# Print some request information (human-readable)
if ($verbose) {
my $request_type = $fields[2];
my $nb_pkts_expected = $fields[3];
if ($request_type == 0) { print "Positioning" }
elsif ($request_type == 1) { print "Calibration" }
elsif ($request_type == 2) { print "Autocalibration" }
elsif ($request_type == 10) { print "Implicit" }
else { print "Strange" }
print " request transmitted by $mac_trx at $timestamp"
. " ($nb_pkts_expected packets expected).\n";
}
# Make sure we don't have a request with the same timestamp already, because
# it will be a problem if we want to print the statistics or the
# distribution
if (($print_stats or $print_distribution) and $transmitters{$timestamp}) {
die "A request with timestamp \"$timestamp\" was already stored;"
. " this program doesn't work properly with non-unique timestamps"
. " (see section \"BUGS\" in the manual page).";
}
# Read the captured requests and count the packets
while ($field_nb < @fields) {
my $mac_cp = $fields[$field_nb];
$global_present_cps{$mac_cp} = 1; # Save the CP's name
# Increment the number of packets
$present_cps{$mac_cp}++;
# Skip the "packet number" field
$field_nb += 2;
error_nb_fields() if ($field_nb >= @fields);
# Count the signal strength level
my $ss = $fields[$field_nb];
$distribution{$mac_trx}{$mac_cp}{$ss}++;
$field_nb++; # Jump to the next field
}
# Save the values for the stats
$transmitters{$timestamp} = $mac_trx;
$global_stats{$timestamp} = {%present_cps};
# Print the number of packets for each CP and count the number of CPs
if ($verbose) {
my $nb_cps = 0;
while (my ($mac_cp, $nb_pkts) = each %present_cps) {
if ($nb_pkts > 0) {
print "$mac_cp -> $nb_pkts\n";
$nb_cps++;
}
}
# Print the number of CPs for this request
print "$nb_cps different CPs.\n------------\n";
}
}
## Print statistics ##
if ($print_stats) {
# Will contain the total number of packets received by each CP
my %total_nb_pkts;
# Print the header line
my @cps_names = sort keys %global_present_cps;
print "Transmitter;Timestamp";
print ";$_" foreach (@cps_names);
print "\n";
# Print the requests
foreach my $timestamp (sort keys %global_stats) {
print "$transmitters{$timestamp};$timestamp";
my $present_cps = $global_stats{$timestamp};
# Print the CPs in the same order as in the header and count the totals
foreach my $mac (@cps_names) {
my $nb_pkts = ($present_cps->{$mac} or 0);
print ";", $nb_pkts;
$total_nb_pkts{$mac} += $nb_pkts;
}
print "\n";
}
# Print the totals
print "Total;";
print ";$total_nb_pkts{$_}" foreach (@cps_names);
print "\n";
}
## Print distribution ##
if ($print_distribution) {
foreach my $mac_trx (sort keys %distribution) {
foreach my $mac_cp (sort keys %{$distribution{$mac_trx}}) {
# Let's pick a shortcut
my $distrib = $distribution{$mac_trx}{$mac_cp};
# Print the header line
print "Link (Trx/Rx);$mac_trx;$mac_cp\n";
# Make a list of SSs to print (we want to print all the SSs in the
# interval, not just the SSs we found)
my @present_ss_list = sort { $a <=> $b } keys %{$distrib};
my @ss_list = $present_ss_list[0] .. $present_ss_list[-1];
# Print all the SSs
print "SS (dBm);";
foreach my $ss (@ss_list) { print "$ss;" }
print "Sum\n";
# Print the occurrences
print "Occurrences;";
my $total_nb_pkts = 0;
foreach my $ss (@ss_list) {
my $occurrences = 0;
if (defined($distrib->{$ss})) {
$occurrences = $distrib->{$ss};
$total_nb_pkts += $occurrences;
}
print "$occurrences;";
}
print "$total_nb_pkts\n";
# Print the probabilities
print "Probability;";
my $total_proba = 0;
foreach my $ss (@ss_list) {
my $proba = 0;
if (defined($distrib->{$ss})) {
my $occurrences = $distrib->{$ss};
$proba = $occurrences / $total_nb_pkts;
$total_proba += $proba;
}
print "$proba;";
}
print "$total_proba\n";
# Separate two links with an empty line
print "\n";
}
}
}
# vim: tabstop=4:shiftwidth=4:expandtab:textwidth=80