#!/usr/bin/perl -w =head1 NAME owlps-aggcheck - verifies an aggregation CSV file =head1 SYNOPSIS B [ B<-h> | B<-V> ] [ B<-v> ] [ B<-d> ] [ B<-n> ] [ I ] =head1 DESCRIPTION B parses an aggregation CSV file to help the user verifying it visually. For each line, it displays some information about the request and counts the number of capture points (CPs) which received the request, along with the number of packets they got. With the B<-n> option, this information is printed in CSV format. With the B<-d> option, the global distribution of the signal strength values for each transmitter and each receiver in the input file(s) is printed. I is a list of one or more aggregation files to work on, which I be placed after the options. If no file is provided, the standard input is read. =head1 OPTIONS =over 7 =item B<-h>, B<--help> Print version and help message and exit. =item B<-V>, B<--version> Print version message and exit. =item B<-d> Print signal strength distribution in CSV format. =item B<-n> Print per-request statistics in CSV format, as well as the total number of packets received by each capture point. Please note that this display is ordered by timestamp (unlike the default display which respects the order of the lines in the input file); this might not correspond to the actual order of transmission if the devices' clocks are not synchronised. =item B<-v> Print human readable information about the lines read (this is the default behaviour unless B<-d> or B<-n> is used). =back =head1 BUGS When using B<-n> or B<-d>, each timestamp must be unique in the input file. This is due to the fact that this script uses the timestamp alone as the identifier of a request (instead of using both the timestamp and the transmitter's MAC address). This should seldom be a problem in practice, but if you encounter this case, a workaround is to manually add a decimal to one of the duplicated timestamps. =head1 COPYING This script and its documentation are part of the Owl Positioning System (OwlPS) project. They are subject to the copyright notice and license terms in the COPYRIGHT.t2t file found in the top-level directory of the OwlPS distribution and at https://code.lm7.fr/mcy/owlps/src/master/COPYRIGHT.t2t =head1 SEE ALSO owlps(7), owlps-aggregatord(1) =cut use strict; use Getopt::Std; use Pod::Usage; ## Constants ## # CSV format version handled by this program use constant FORMAT_VERSION => 1; # Number of fields before the captured packets use constant PREFIX_FIELDS => 9; ## Global variables ## # Line number my $line_nb = 0; # Association between timestamp and transmitter of each request # key = timestamp # value = transmitter's MAC address my %transmitters; # Structure that holds the global count of packets # key = timestamp # value = dictionary {key = CP's MAC address # value = number of packets} my %global_stats; # List of present CPs in the whole file # key = MAC address # value = 1 my %global_present_cps; # Structure that holds the global distribution of the signal strength, per # transmitting device # key = transmitter's MAC address # value = dictionary {key = receiver's MAC address # value = dictionary {key = signal strength level # value = number of occurrences} my %distribution; ## Functions ## sub VERSION_MESSAGE { my $handle = $_[0] || *STDOUT; print $handle "This is OwlPS AggCheck, part of the Owl Positioning System project.\n" . "CSV aggregation format version handled: " . FORMAT_VERSION . "\n"; } sub HELP_MESSAGE { my $handle = $_[0] || *STDOUT; pod2usage(-output => $handle, -exitval => "NOEXIT"); } sub error_nb_fields() { die "Wrong number of fields on line #$line_nb!"; } ## Option parsing ## $Getopt::Std::STANDARD_HELP_VERSION = 1; use constant OPTIONS => 'dhnvV'; my %options; if (!getopts(OPTIONS, \%options)) { HELP_MESSAGE(*STDERR); exit 1; } if ($options{'h'}) { VERSION_MESSAGE(); HELP_MESSAGE(); exit 0; } if ($options{'V'}) { VERSION_MESSAGE(); exit 0; } my $print_distribution = $options{'d'}; my $print_stats = $options{'n'}; # Enable verbose mode by default or if -v was explicitely used, disable it if -d # or -n was used (but not -v) my $verbose = $options{'v'} || !($print_distribution || $print_stats); ## Main loop: read input lines ## while (<>) { $line_nb++; chomp; # Skip empty lines next if ($_ eq ""); # Present CPs for this line # key = MAC address, value = number of packets my %present_cps; # Split the line and check the number of fields my @fields = split(';'); error_nb_fields() if (@fields < PREFIX_FIELDS); my $field_nb = 0; # Number of the current field # Print the first fields if ($verbose) { print "Line #$line_nb: $fields[$field_nb++]"; while ($field_nb < PREFIX_FIELDS) { print ";$fields[$field_nb++]" } print "\n"; } else { $field_nb = PREFIX_FIELDS } # Check the CSV format version my $csv_format_version = $fields[0]; if ($csv_format_version != FORMAT_VERSION) { die "CSV format version \"$csv_format_version\" is not supported!\n"; } # Extract important information my $mac_trx = $fields[1]; my $timestamp = $fields[4]; # Print some request information (human-readable) if ($verbose) { my $request_type = $fields[2]; my $nb_pkts_expected = $fields[3]; if ($request_type == 0) { print "Positioning" } elsif ($request_type == 1) { print "Calibration" } elsif ($request_type == 2) { print "Autocalibration" } elsif ($request_type == 10) { print "Implicit" } else { print "Strange" } print " request transmitted by $mac_trx at $timestamp" . " ($nb_pkts_expected packets expected).\n"; } # Make sure we don't have a request with the same timestamp already, because # it will be a problem if we want to print the statistics or the # distribution if (($print_stats or $print_distribution) and $transmitters{$timestamp}) { die "A request with timestamp \"$timestamp\" was already stored;" . " this program doesn't work properly with non-unique timestamps" . " (see section \"BUGS\" in the manual page)."; } # Read the captured requests and count the packets while ($field_nb < @fields) { my $mac_cp = $fields[$field_nb]; $global_present_cps{$mac_cp} = 1; # Save the CP's name # Increment the number of packets $present_cps{$mac_cp}++; # Skip the "packet number" field $field_nb += 2; error_nb_fields() if ($field_nb >= @fields); # Count the signal strength level my $ss = $fields[$field_nb]; $distribution{$mac_trx}{$mac_cp}{$ss}++; $field_nb++; # Jump to the next field } # Save the values for the stats $transmitters{$timestamp} = $mac_trx; $global_stats{$timestamp} = {%present_cps}; # Print the number of packets for each CP and count the number of CPs if ($verbose) { my $nb_cps = 0; while (my ($mac_cp, $nb_pkts) = each %present_cps) { if ($nb_pkts > 0) { print "$mac_cp -> $nb_pkts\n"; $nb_cps++; } } # Print the number of CPs for this request print "$nb_cps different CPs.\n------------\n"; } } ## Print statistics ## if ($print_stats) { # Will contain the total number of packets received by each CP my %total_nb_pkts; # Print the header line my @cps_names = sort keys %global_present_cps; print "Transmitter;Timestamp"; print ";$_" foreach (@cps_names); print "\n"; # Print the requests foreach my $timestamp (sort keys %global_stats) { print "$transmitters{$timestamp};$timestamp"; my $present_cps = $global_stats{$timestamp}; # Print the CPs in the same order as in the header and count the totals foreach my $mac (@cps_names) { my $nb_pkts = ($present_cps->{$mac} or 0); print ";", $nb_pkts; $total_nb_pkts{$mac} += $nb_pkts; } print "\n"; } # Print the totals print "Total;"; print ";$total_nb_pkts{$_}" foreach (@cps_names); print "\n"; } ## Print distribution ## if ($print_distribution) { foreach my $mac_trx (sort keys %distribution) { foreach my $mac_cp (sort keys %{$distribution{$mac_trx}}) { # Let's pick a shortcut my $distrib = $distribution{$mac_trx}{$mac_cp}; # Print the header line print "Link (Trx/Rx);$mac_trx;$mac_cp\n"; # Make a list of SSs to print (we want to print all the SSs in the # interval, not just the SSs we found) my @present_ss_list = sort { $a <=> $b } keys %{$distrib}; my @ss_list = $present_ss_list[0] .. $present_ss_list[-1]; # Print all the SSs print "SS (dBm);"; foreach my $ss (@ss_list) { print "$ss;" } print "Sum\n"; # Print the occurrences print "Occurrences;"; my $total_nb_pkts = 0; foreach my $ss (@ss_list) { my $occurrences = 0; if (defined($distrib->{$ss})) { $occurrences = $distrib->{$ss}; $total_nb_pkts += $occurrences; } print "$occurrences;"; } print "$total_nb_pkts\n"; # Print the probabilities print "Probability;"; my $total_proba = 0; foreach my $ss (@ss_list) { my $proba = 0; if (defined($distrib->{$ss})) { my $occurrences = $distrib->{$ss}; $proba = $occurrences / $total_nb_pkts; $total_proba += $proba; } print "$proba;"; } print "$total_proba\n"; # Separate two links with an empty line print "\n"; } } } # vim: tabstop=4:shiftwidth=4:expandtab:textwidth=80