Fix the reverse DNS host problem.
$filename = '/var/lib/awstats/dnscachelastupdate.' . $SiteDomain . '.txt'; if (open(FILE, $filename)) { while(<FILE>) { chomp; @fields = split('\t', $_); if (lc($fields[2]) eq $host) { $host = $fields[1]; } } close FILE; }
TODO:
21593800 92.103.0.242 reverse.completel.net 21593800 213.30.130.42 reverse.completel.net 21593800 92.103.7.242 reverse.completel.net 21593800 92.103.143.70 reverse.completel.net 21593800 212.99.78.124 reverse.completel.net 21593800 92.103.167.46 reverse.completel.net
This should even be fixed in awstats that may detect that this resolution was not complete, or detect that different IPs resolved to the same name and ignore resolution, or maintain a list of not unique names… Or I change the fix to resolve every IP in the log file, which will be a lot longer than back-resolving once the name, or we get a list of IPs, but still they are different visitors (here from all over France), so we should just detect several IPs resolved to the same name, put that name in a file, and then either ignore this name or build a compound name with the ip.
I wrote a new plugin, based on Following Me plugin, that shows all hosts that visited a given page (vs all pages that a given host visited, as Following Me is doing).
TODO:
It would be perfect if these two plugin functionalities were directly integrated in awstats, directly with a link on the url or hostname instead of additional column, and everywhere so that we can continuously navigate between pages and hosts stats. And more important, it should use a database to avoid parsing the whole log every time.
This is pretty necessary with the previous patch of Follow Me to work correctly with resolved hosts.
--- awstats_orig.pl 2011-01-25 13:37:31.640784078 +0100 +++ awstats.pl 2011-01-25 13:19:07.409420053 +0100 @@ -42,7 +42,7 @@ ; # Benchmark info are printing every NBOFLINESFORBENCHMARK lines (Must be a power of 2) $FRAMEWIDTH = 240; # Width of left frame when UseFramesWhenCGI is on $NBOFLASTUPDATELOOKUPTOSAVE = - 500; # Nb of records to save in DNS last update cache file + 50000; # Nb of records to save in DNS last update cache file $LIMITFLUSH = 5000; # Nb of records in data arrays after how we need to flush data on disk $NEWDAYVISITTIMEOUT = 764041; # Delay between 01-23:59:59 and 02-00:00:00
Apparently they modified the code to have the same scale for pages count and hits count. The problem is that if you have a lot less pages than hits, then pages are not readable, though they are a better information about the traffic than hits. The difference between pages and hits is more a constant of your website design than an interesting information about traffic.
Here is how to restore and independent scale for pages:
TODO: not complete
--- awstats_orig.pl 2011-01-25 13:37:31.640784078 +0100 +++ awstats.pl 2011-01-25 13:19:07.409420053 +0100 @@ -14226,6 +14226,10 @@ print "<th> </th>"; print "</tr>\n"; $total_u = $total_v = $total_p = $total_h = $total_k = 0; + $max_p = 1; + foreach ( values %_domener_p ) { + if ( $_ > $max_p ) { $max_p = $_; } + } $max_h = 1; foreach ( values %_domener_h ) { if ( $_ > $max_h ) { $max_h = $_; } @@ -14242,9 +14246,9 @@ my $bredde_p = 0; my $bredde_h = 0; my $bredde_k = 0; - if ( $max_h > 0 ) { + if ( $max_p > 0 ) { $bredde_p = - int( $BarWidth * $_domener_p{$key} / $max_h ) + 1; + int( $BarWidth * $_domener_p{$key} / $max_p ) + 1; } # use max_h to enable to compare pages with hits if ( $_domener_p{$key} && $bredde_p == 1 ) { $bredde_p = 2; } if ( $max_h > 0 ) { @@ -16224,6 +16228,10 @@ } #if (($MonthPages{$YearRequired.$monthix}||0) > $max_p) { $max_p=$MonthPages{$YearRequired.$monthix}; } + if ( ( $MonthPages{ $YearRequired . $monthix } || 0 ) > $max_p ) + { + $max_p = $MonthPages{ $YearRequired . $monthix }; + } if ( ( $MonthHits{ $YearRequired . $monthix } || 0 ) > $max_h ) { $max_h = $MonthHits{ $YearRequired . $monthix }; @@ -16253,7 +16261,7 @@ my @valcolor = ( "$color_u", "$color_v", "$color_p", "$color_h", "$color_k" ); - my @valmax = ( $max_v, $max_v, $max_h, $max_h, $max_k ); + my @valmax = ( $max_v, $max_v, $max_p, $max_h, $max_k ); my @valtotal = ( $total_u, $total_v, $total_p, $total_h, $total_k ); my @valaverage = (); @@ -16305,11 +16313,11 @@ ( $MonthVisits{ $YearRequired . $monthix } || 0 ) / $max_v * $BarHeight ) + 1; } - if ( $max_h > 0 ) { + if ( $max_p > 0 ) { $bredde_p = int( ( $MonthPages{ $YearRequired . $monthix } || 0 ) / - $max_h * $BarHeight ) + 1; + $max_p * $BarHeight ) + 1; } if ( $max_h > 0 ) { $bredde_h = @@ -16565,7 +16573,7 @@ $total_u = $total_v = $total_p = $total_h = $total_k = 0; # Define total and max - $max_v = $max_h = $max_k = + $max_v = $max_p = $max_h = $max_k = 0; # Start from 0 because can be lower than 1 foreach my $daycursor ( $firstdaytoshowtime .. $lastdaytoshowtime ) { @@ -16584,7 +16592,7 @@ $max_v = $DayVisits{ $year . $month . $day }; } -#if (($DayPages{$year.$month.$day}||0) > $max_p) { $max_p=$DayPages{$year.$month.$day}; } +if (($DayPages{$year.$month.$day}||0) > $max_p) { $max_p=$DayPages{$year.$month.$day}; } if ( ( $DayHits{ $year . $month . $day } || 0 ) > $max_h ) { $max_h = $DayHits{ $year . $month . $day }; } @@ -16617,7 +16625,7 @@ $average_k = $average_k / $average_nb; if ( $average_v > $max_v ) { $max_v = $average_v; } - #if ($average_p > $max_p) { $max_p=$average_p; } + if ($average_p > $max_p) { $max_p=$average_p; } if ( $average_h > $max_h ) { $max_h = $average_h; } if ( $average_k > $max_k ) { $max_k = $average_k; } } @@ -16656,7 +16664,7 @@ ); my @valcolor = ( "$color_v", "$color_p", "$color_h", "$color_k" ); - my @valmax = ( $max_v, $max_h, $max_h, $max_k ); + my @valmax = ( $max_v, $max_p, $max_h, $max_k ); my @valtotal = ( $total_v, $total_p, $total_h, $total_k ); $average_v = sprintf( "%.2f", $average_v ); $average_p = sprintf( "%.2f", $average_p ); @@ -16717,10 +16725,10 @@ int( ( $DayVisits{ $year . $month . $day } || 0 ) / $max_v * $BarHeight ) + 1; } - if ( $max_h > 0 ) { + if ( $max_p > 0 ) { $bredde_p = int( ( $DayPages{ $year . $month . $day } || 0 ) / - $max_h * $BarHeight ) + 1; + $max_p * $BarHeight ) + 1; } if ( $max_h > 0 ) { $bredde_h = @@ -16779,8 +16787,8 @@ if ( $max_v > 0 ) { $bredde_v = int( $average_v / $max_v * $BarHeight ) + 1; } - if ( $max_h > 0 ) { - $bredde_p = int( $average_p / $max_h * $BarHeight ) + 1; + if ( $max_p > 0 ) { + $bredde_p = int( $average_p / $max_p * $BarHeight ) + 1; } if ( $max_h > 0 ) { $bredde_h = int( $average_h / $max_h * $BarHeight ) + 1; @@ -17066,7 +17074,7 @@ my @vallabel = ( "$Message[56]", "$Message[57]", "$Message[75]" ); my @valcolor = ( "$color_p", "$color_h", "$color_k" ); - my @valmax = ( int($max_h), int($max_h), int($max_k) ); + my @valmax = ( int($max_p), int($max_h), int($max_k) ); my @valtotal = ( $total_p, $total_h, $total_k ); $average_p = sprintf( "%.2f", $average_p ); $average_h = sprintf( "%.2f", $average_h ); @@ -17115,13 +17123,13 @@ my $bredde_p = 0; my $bredde_h = 0; my $bredde_k = 0; - if ( $max_h > 0 ) { + if ( $max_p > 0 ) { $bredde_p = int( ( $avg_dayofweek_p[$_] ne '?' ? $avg_dayofweek_p[$_] : 0 - ) / $max_h * $BarHeight + ) / $max_p * $BarHeight ) + 1; } if ( $max_h > 0 ) { @@ -17298,7 +17306,7 @@ $max_h = $max_k = 1; for ( my $ix = 0 ; $ix <= 23 ; $ix++ ) { - #if ($_time_p[$ix]>$max_p) { $max_p=$_time_p[$ix]; } + if ($_time_p[$ix]>$max_p) { $max_p=$_time_p[$ix]; } if ( $_time_h[$ix] > $max_h ) { $max_h = $_time_h[$ix]; } if ( $_time_k[$ix] > $max_k ) { $max_k = $_time_k[$ix]; } } @@ -17309,7 +17317,7 @@ my @vallabel = ( "$Message[56]", "$Message[57]", "$Message[75]" ); my @valcolor = ( "$color_p", "$color_h", "$color_k" ); - my @valmax = ( int($max_h), int($max_h), int($max_k) ); + my @valmax = ( int($max_p), int($max_h), int($max_k) ); my @valtotal = ( $total_p, $total_h, $total_k ); my @valaverage = ( $average_p, $average_h, $average_k ); my @valdata = (); @@ -17334,9 +17342,9 @@ my $bredde_p = 0; my $bredde_h = 0; my $bredde_k = 0; - if ( $max_h > 0 ) { + if ( $max_p > 0 ) { $bredde_p = - int( $BarHeight * $_time_p[$ix] / $max_h ) + 1; + int( $BarHeight * $_time_p[$ix] / $max_p ) + 1; } if ( $max_h > 0 ) { $bredde_h =