#!/usr/bin/php
<?php
 /*   
    Copyright (c) 2012, Paul G Talaga
    All rights reserved.
    
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
    
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    DISCLAIMED. IN NO EVENT SHALL Paul G Talaga BE LIABLE FOR ANY
    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
    
/*
    analyse.php - Command-line PHP script to analyse MemcacheTACH log output.
    Usage (typical):    php analyse.php log.txt             - Analyze log.txt
          (CSV export): php analyse.php log.txt out.csv     - Analyze log.txt and export result (append) to out.csv
 
    For correct rack analysis be sure to fill in below.
*/


// Rack config for rack latency analysis
/* $rack is an associative array which maps an IP address to a rack number.  The IP can be of the webserver or the
  memcache server.  For memcache be sure to include the port number as well.  The rack number can be any integer, but 
  memcache servers use a negative of the rack in which it belongs.
*/
$pre = '128.230.109.';  // Helpful prefix values
$pre2 = '192.168.1.';

$rack = array();
$rack[$pre . 49] = 1;
$rack[$pre . 50] = 1;
$rack[$pre . 51] = 1;
$rack[$pre . 52] = 1;
$rack[$pre2 . 52 . ':11211'] = -1;
$rack[$pre . 70] = 2;
$rack[$pre . 53] = 2;
$rack[$pre . 54] = 2;
$rack[$pre . 55] = 2;
$rack[$pre . 56] = 2;
$rack[$pre2 . 56 . ':11211'] = -2;
$rack[$pre . 69] = 3;
$rack[$pre . 57] = 3;
$rack[$pre . 58] = 3;
$rack[$pre . 59] = 3;
$rack[$pre . 60] = 3;
$rack[$pre2 . 60 . ':11211'] = -3;
$rack[$pre . 61] = 4;
$rack[$pre . 62] = 4;
$rack[$pre . 63] = 4;
$rack[$pre . 64] = 4;
$rack[$pre2 . 64 . ':11211'] = -4;
$rack[$pre . 65] = 5;
$rack[$pre . 66] = 5;
$rack[$pre . 67] = 5;
$rack[$pre . 68] = 5;
$rack[$pre2 . 68 . ':11211'] = -5;

$rack[$pre2 . 71 . ':11211'] = -6;
    
// *****************************************
    if(isset($argv[1])){
        $log_file = $argv[1];
        echo "Analyzing $log_file ...\n\n";
    }else{
        echo "Enter log file to analyze as command line arguement.\n";
        die;
    }
    
    if(isset($argv[2])){
        $csv_file = $argv[2];
        echo "Using $csv_file for row output.";
    }else{
        $csv_file = FALSE;
    }
    $sim_file = FALSE;


$wnf = FALSE; // Warn when an item was found by memcache, but not seen previously in our log file
//$fudge_skip = array(0,421177, 421267);  // Fill with line number to skip
$fudge_skip = array();

// Network latency/bandwidth calculation

//$stats = array('hits_log' => 0, 'hits_sim' => 0, 'miss_log' => 0, 'miss_sim' => 0, 'total_time_log' => 0, 'total_time_sim' => 0,'sets' => 0, 'gets' => 0, 'st' => 0);
$tspan = array('start' => 0, 'end' => 0, 'last' => 0);
$plots = array(); //hit_real, hit_sim, hit_diff, miss_real, miss_sim, miss_dif, http_num_requests, http_latency, http
$conv = array();
$xmean = array();
$ymean = array();
$changecnt = 0;
$num_samples = array(); 
$num_samples_multiget = 0; // multiget for now does not keep track of the server the requests go to, so sum separately
$sum_top = array();
$sum_bottom = array();
$sum_objects = 0;
$num_object_transfer = 0;
$i = 0;
$mymiss = 0;
$mysetmiss = 0;
$singlecount = 0;
$writebytes = 0;
$readbytes = 0;
$lastnet = 0;

$size = array(); // store how large objects are for use in later gets
/*b1 = Σ [ (xi - xb)(yi - yb) ] / Σ [ (xi - xb)2] 
b1 = r * (sy / sx) 
b0 = yb - b1 * xb
 three passes: 1:means, 2:regression, 3: Residual calculation
 also do hit and miss calculation
*/
$fh = fopen($log_file,'r'); // assume file is in time order!  You can cat each log file together then do sort
while($line = fgets($fh)){
  $i++;
  if( strlen($line) < 10 || in_array($i,$fudge_skip))continue;
    $data =  unserialize($line);
    if($data === FALSE){echo "Unserialize error on line $i - ",strlen($line),"!!!\n\n$data\n\n";continue;}
    list($time,$cookie,$sv,$msvo,$cmd,$key,$out_len,$found,$params,$delay,$conversation) = $data;
    //if($msvo == ''){echo "Time: $time\nCookie: $cookie\nFrom Server: $sv\nMemcache Server: $msvo\nCommand: $cmd\nKey: $key\nLength: $out_len\nFound? $found\n Params:--\nDelay: $delay\nConv: $conversation\n";}

    // Make sure time goes forward!
    if($tspan['start'] == 0 || $tspan['start'] > $time){$tspan['start'] = $time;}
    //if($tspan['end'] < $time && $sv != '128.230.109.69'){$tspan['end'] = $time;}    // PGT 69 removal!!!!!
    if($tspan['last'] > $time){echo "Log error! Not sorted! Continuing anyway.\n"; }
    $tspan['last'] = $time;
    // conversation tracking
    if(!isset($conv[$conversation])){$conv[$conversation] = array('num_actions' => 0, 'tot_time' => 0, 'end_time' => 0, 'hits' => 0, 'misses' => 0,'tot_sim_time' => 0);}
    $conv[$conversation]['num_actions']++;
    $conv[$conversation]['tot_time'] += $delay;
    $conv[$conversation]['end_time'] = $time; // last will stick

    //
    // $out_len needs to be more complicated so to measure the entire length on network
    // also, a get of a found key needs to transfer data, so keep track
    $msv = $sv . '->' . $msvo; // track each webserver to each memcache server
    if(!isset($xmean[$msv])){$xmean[$msv] = 0; $ymean[$msv] = 0; $num_samples[$msv] = 0; $sum_top[$msv] = 0; $sum_bottom[$msv] = 0;}; // initialize to zero
    $lastnet = $xmean[$msv]; // for read/write byte counting
    if($cmd == 'set'){
      $xmean[$msv] += $out_len +  strlen($key) + strlen($out_len) +13 + 8;  //8 is response
      $size[$key] = $out_len;
      $sum_objects += $out_len;
      $num_object_transfer += 1;
      $changecnt += 1; // count the number of 'sets' issued
      if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
      if($found == FALSE){
        echo "Set bad: $key $mysetmiss $sv $msvo Len:$out_len\n";
        $mysetmiss++;
      }else if($out_len == 0){
        echo "Zero length!!\n";
      }
    }else if($cmd == 'add'){
      if($found){
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 21;
      }else{
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 25; 
      }
      $size[$key] = $out_len;
      $sum_objects += $out_len;
      $num_object_transfer += 1;
      $changecnt += 1; // count the number of 'sets' issued
      if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'replace'){
      if($found){
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 25;
      }else{
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 29; 
      }
      $size[$key] = $out_len;
      $sum_objects += $out_len;
      $num_object_transfer += 1;
      $changecnt += 1; // count the number of 'sets' issued
      if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'append'){
      if($found){
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 24;
      }else{
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 28; 
      }
      $size[$key] = +$out_len;
      //$sum_objects += $out_len;
      //$num_object_transfer += 1;
      $changecnt += 1; // count the number of 'sets' issued
      if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'prepend'){
      if($found){
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 25;
      }else{
        $xmean[$msv] += $out_len + strlen($key) + strlen($out_len) + 29; 
      }
      $size[$key] += $out_len;
      //$sum_objects += $out_len;
      //$num_object_transfer += 1;
      $changecnt += 1; // count the number of 'sets' issued
      if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'get'){
      if($found && isset($size[$key])){
	    $xmean[$msv] += strlen($key)  + $size[$key] + strlen($key) + strlen($size[$key]) + 30; // includes 4 digit cas
	    $sum_objects += $size[$key];
      	$num_object_transfer += 1;
      }else if(isset($size[$key])){
	    if($found && $wnf){echo "$key not defined - get\n";}
	    $xmean[$msv] += strlen($key) + 12;
      }else{
	    if($found && $wnf){echo "$key not defined - get\n";}
	    $xmean[$msv] += strlen($key) + 12;
      }
      if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'multiget'){

      $keys = unserialize($key);
      $lengths = unserialize($out_len);
      $founds = unserialize($found);
      $num_object_transfer += 1;
      foreach($keys as $i => $k){
          if($founds[$i] && isset($size[$k])){
	        $xmean[$msv] += strlen($size[$k]) + $size[$k] + $lengths[$i];
	        $sum_objects += $size[$k];
          }else{
	        if($founds[$i] && $wnf){echo "$key not defined - multiget\n";}
	        $xmean[$msv] += 4 + strlen($k) + 2 + 5;
          }
          if($founds[$i]){$conv[$conversation]['hits']++;}else{$conv[$conversation]['hits']++;}
      }
      //$num_samples_multiget++;
     }else if($cmd == 'delete'){
        if($found){
            $xmean[$msv] += strlen($key) + 18;
        }else{
            $xmean[$msv] += strlen($key) + 20;
        }
        
        if(isset($size[$key])){
	      //unset($size[$key]);	// Don't delete, compute stats later
        }
        $changecnt += 1; // count the number of 'sets' issued
        if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'incr'){
        if($found){$xmean[$msv] += strlen($key) + $out_len + 6; 
        }else{$xmean[$msv] +=  strlen($key) + $out_len + 17;};
        $changecnt += 1; // count the number of 'sets' issued
        if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'decr'){
        if($found){$xmean[$msv] += strlen($key) + $out_len + 6;
        }else{$xmean[$msv] +=  strlen($key) + $out_len + 17;};
        $changecnt += 1; // count the number of 'sets' issued
        if($found){$conv[$conversation]['hits']++;}else{$conv[$conversation]['misses']++;}
    }else if($cmd == 'flush'){
        $xmean[$msv] += 15;
        $changecnt += 1; // count the number of 'sets' issued
    }else{
      echo "Did not understand $cmd!!\ on line $i n";
      die;
    }
    $ymean[$msv] += $delay;
    $num_samples[$msv]++;
    // Read/write data counting
    if($cmd == 'get'){ // read attemp!
    	$readbytes += ($xmean[$msv] - $lastnet);
    }else{ // write attemp
    	$writebytes += ($xmean[$msv] - $lastnet);
    }

	
}

foreach($xmean as $k => $d){
    $xmean[$k] = $xmean[$k] / $num_samples[$k];
    $ymean[$k] = $ymean[$k] / $num_samples[$k];
}


rewind($fh);
$i = 0;
while($line = fgets($fh)){
    $i++;
    if( strlen($line) < 10 || in_array($i,$fudge_skip))continue;
    $data =  unserialize($line);
    if($data === FALSE){echo "Unserialize error on line $i!!!\n";continue;}
    list($time,$cookie,$sv,$msvo,$cmd,$key,$out_len,$found,$params,$delay,$conversation) = $data;
    $msv = $sv . '->' . $msvo; // track each webserver to each memcache server
    $sum_top[$msv] += ($out_len - $xmean[$msv])*($delay - $ymean[$msv]);
    $sum_bottom[$msv] += pow($out_len - $xmean[$msv],2);
    // fill in sets and gets in plot
    $pi = round(($time - $tspan['start'])/($tspan['end'] - $tspan['start']) * $pimax);
    if($cmd == 'set' || $cmd == 'add'){$plots[$pi]['writes']++;} // deal with delete
    if($cmd == 'get'){$plots[$pi]['reads']++;}
    if($cmd == 'multiget'){$keys = unserialize($key); foreach ($keys as $key){$plots[$pi]['reads']++;};};
    // Sim stuff
    if($sim_file){
        if(isset($sim[substr(md5($line),0,10)])){$s = $sim[substr(md5($line),0,10)];}else{ echo "sim not found!\n";}
        if($cmd == 'get' && $s['h'])$plots[$pi]['hit_sim']++;
        if($cmd == 'get' && !$s['h'])$plots[$pi]['miss_sim']++;
    }
}

echo "Time start (x10): ",$tspan['start']," end: ",$tspan['end']," total seconds: ",($tspan['end'] - $tspan['start'])/10,"\n\n";
echo "Network analysis for all paths based on latency and packet size\n";

$csv_line .= ($tspan['end'] - $tspan['start'])/10 . ",";

$latency_predict = array(); // used later
$server_memserver_num_requests = array();
$latency_rack = array();
// prefill in zeros for rack analysis
foreach($rack as $rn){
    if($rn < 0){$rn = -$rn;}
    $latency_rack[$rn] = array('intersum' => 0, 'intersum2' => 0, 'intercnt' => 0,  'intrasum' => 0, 'intrasum2' => 0, 'intracnt' => 0);
}
foreach($xmean as $k => $d){
    $slope = $sum_top[$k]/$sum_bottom[$k];
    $int = $ymean[$k] - $slope * $xmean[$k];
    if($num_samples[$k] == 1 || $slope == 0){// Can't do regression on a single point!
        echo "$k slope: --(single sample) mbps latency: ", round($int * 1000000) / 1000," (ms) \n    size_mean: ",round($xmean[$k] / 1024 * 1000)/1000,"(kB) time_mean: ",round($ymean[$k] * 1000000) / 1000," (ms) num_requests*: ",$num_samples[$k],"\n\n";
    }else{
        echo "$k slope: ",round(1/$slope /1024 / 1024 * 8 * 10) / 10 ," mbps latency: ", round($int * 1000000) / 1000," (ms) \n    size_mean: ",round($xmean[$k] / 1024 * 1000)/1000,"(kB) time_mean: ",round($ymean[$k] * 1000000) / 1000," (ms) num_requests*: ",$num_samples[$k],"\n\n";
    }
    // analyze racks                         weight with # samples, notice square fix below
    $use_this_latency = $int * 1000 * $num_samples[$k]; // $int is predicted based on slope (0 crossing), $ymean[$k] is average of all latencies
    list($from,$to) = explode('->',$k);
    // handle from and to counting
    if(!array_key_exists($from,$server_memserver_num_requests)){$server_memserver_num_requests[$from] = 0;}
    if(!array_key_exists($to,$server_memserver_num_requests)){$server_memserver_num_requests[$to] = 0;}
    $server_memserver_num_requests[$from] += $num_samples[$k];
    $server_memserver_num_requests[$to] += $num_samples[$k];
    //update rack counts
    if(!(array_key_exists($from,$rack)) || !array_key_exists($to,$rack)){echo "What rack is $from or $to?!?\n"; continue;}
    if($rack[$from] == -$rack[$to]){// same rack
        //echo "Same Rack\n";
        $latency_rack[$rack[$from]]['intrasum'] += $use_this_latency;
        $latency_rack[$rack[$from]]['intrasum2'] += $use_this_latency * $use_this_latency / $num_samples[$k]; // Used for variance calculation
        $latency_rack[$rack[$from]]['intracnt']+= $num_samples[$k];

    }else{
        $latency_rack[$rack[$from]]['intersum'] += $use_this_latency;
        $latency_rack[$rack[$from]]['intersum2'] += $use_this_latency * $use_this_latency / $num_samples[$k];
        $latency_rack[$rack[$from]]['intercnt']+= $num_samples[$k];
    }
}

echo "\nMemcache Client & Server messages passed\n";
ksort($server_memserver_num_requests);
foreach($server_memserver_num_requests as $k => $d){
    echo "     $k   $d   messages (sent/received)\n";
}

echo "\n\nRack Latency Stats, based on accumulated source/dests\n";
// latency clustering and averaging
// Intra rack mean/varaince
// Inter rack mean/variance
$ltotals = array('intercnt' => 0, 'intersum' =>0, 'intracnt' => 0, 'intrasum' =>0);
$intercnt = 0;
$intracnt = 0;
foreach($latency_rack as $r => $d){ 
    if( $d['intracnt'] == 0) {$d['intracnt'] = 1;}
    if( $d['intercnt'] == 0) {$d['intercnt'] = 1;}
    $intramean = $d['intrasum'] / $d['intracnt'];
    $intermean = $d['intersum'] / $d['intercnt'];
    echo "Rack $r, within ", round(1000 * $intramean)/1000, ' (ms) var ', round(1000 * ($d['intrasum2']/$d['intracnt'] - $intramean * $intramean))/1000,"\n";
    echo "       leaving ", round(1000 * $intermean)/1000, ' (ms) var ', round(1000 * ($d['intersum2']/$d['intercnt'] - $intermean * $intermean))/1000,"\n";
    echo "       intracnt: ", $d['intracnt']," intercnt: ", $d['intercnt'],"\n\n";
    $ltotals['intercnt'] += $d['intercnt'];
    $ltotals['intracnt'] += $d['intracnt'];
    $ltotals['intersum'] += $intermean * $d['intercnt'];
    $ltotals['intrasum'] += $intramean * $d['intracnt'];
}
echo "Averaged over all racks (count weighted), within is ",round(1000* $ltotals['intrasum'] / $ltotals['intracnt'])/1000, ', leaving is ',round(1000* $ltotals['intersum'] / $ltotals['intercnt'])/1000,"\n\n";

$csv_line .= (1000* $ltotals['intrasum'] / $ltotals['intracnt'])/1000 . ',';
$csv_line .= (1000* $ltotals['intersum'] / $ltotals['intercnt'])/1000 . ',';

// Do the same, but look at each datapoint
$intra_inter_rack = array('intersum' => 0, 'intersum2' => 0, 'intercnt' => 0,  'intrasum' => 0, 'intrasum2' => 0, 'intracnt' => 0);
rewind($fh);
$i = 0;

while($line = fgets($fh)){
    $i++;
    if( strlen($line) < 10 || in_array($i,$fudge_skip))continue;
    $data =  unserialize($line);
    if($data === FALSE){echo "Unserialize error on line $i!!!\n";continue;}
    list($time,$cookie,$sv,$msvo,$cmd,$key,$out_len,$found,$params,$delay,$conversation) = $data;
    $from = $sv;
    $to = $msvo;
    if(!(array_key_exists($from,$rack)) || !array_key_exists($to,$rack)){echo "What rack is $from or $to?!?\n"; continue;}
    $delay = $delay * 1000;
    if($rack[$from] == -$rack[$to]){// same rack
        $intra_inter_rack['intrasum'] += $delay ;
        $intra_inter_rack['intrasum2'] += $delay * $delay; // Used for variance calculation
        $intra_inter_rack['intracnt']+= 1;
    }else{
        $intra_inter_rack['intersum'] += $delay;
        $intra_inter_rack['intersum2'] +=$delay * $delay; // Used for variance calculation
        $intra_inter_rack['intercnt']+= 1;
    }
    
}

if($intra_inter_rack['intracnt'] == 0){$intra_inter_rack['intracnt'] = 1;};
$intramean = $intra_inter_rack['intrasum'] / $intra_inter_rack['intracnt'];
$intermean = $intra_inter_rack['intersum'] / $intra_inter_rack['intercnt'];
echo "Averaged over all messages, within a rack is ",round(1000* $intra_inter_rack['intrasum'] / $intra_inter_rack['intracnt'])/1000, ' (',$intra_inter_rack['intrasum2'] / $intra_inter_rack['intracnt'] - $intramean * $intramean ,' var, ',$intra_inter_rack['intracnt'],' samples), leaving is ',round(1000* $intra_inter_rack['intersum'] / $intra_inter_rack['intercnt'])/1000,' (',$intra_inter_rack['intersum2'] / $intra_inter_rack['intercnt'] - $intermean * $intermean, " var, ",$intra_inter_rack['intercnt']," samples)\n\n";

$csv_line .= $intra_inter_rack['intrasum'] / $intra_inter_rack['intracnt'] . ',';
$csv_line .= $intra_inter_rack['intrasum2'] / $intra_inter_rack['intracnt'] - $intramean * $intramean . ',';
$csv_line .= $intra_inter_rack['intracnt'] . ',';
$csv_line .= $intra_inter_rack['intersum'] / $intra_inter_rack['intercnt'] . ',';
$csv_line .= $intra_inter_rack['intersum2'] / $intra_inter_rack['intercnt'] - $intermean * $intermean . ',';
$csv_line .= $intra_inter_rack['intercnt'] . ',';

        

// Request analysis
// calc averages
$num_actions = 0;
$delays = 0;
$i = 0;
foreach($conv as $c){
    $num_actions += $c['num_actions'];
    $delays += $c['tot_time'];
    $i++;
}

$num_actions = $num_actions / $i;
$delays = $delays / $i;
echo "$i conversations, with $num_actions memcache requests average, taking ",round($delays * 1000000) / 1000," (ms) per conversation.\n\n";

$csv_line .= $i . ',';
$csv_line .= $num_actions . ',';
$csv_line .= $delays * 1000 . ',';

// Key tracking.  What proportion of keys are only used by one server? ********************************************************************
$key_track = array();
rewind($fh);
$i = 0;
while($line = fgets($fh)){
    $i++;
    if( strlen($line) < 10 || in_array($i,$fudge_skip))continue;
    $data =  unserialize($line);
    if($data === FALSE){echo "Unserialize error on line $i!!!\n";continue;}
    list($time,$cookie,$sv,$msvo,$cmd,$key,$out_len,$found,$params,$delay,$conversation) = $data;
    if(isset($key_track[$key])){ // Seen before, so set server
        $key_track[$key][$sv] = 1;
    }else{
        // New key seen, add
        $key_track[$key] = array($sv => 1);
    }
}
$key_counts = array();
foreach($key_track as $on){
    if(!isset($key_counts[count($on)])){$key_counts[count($on)] = 0;}
    $key_counts[count($on)]++;
}
echo "Of ",count($key_track)," keys mentioned, \n";
$csv_line .= count($key_track) . ',';
foreach($key_counts as $k => $c){
    echo "       $c were used by $k server(s)\n";
}

// More Key tracking.  Of all keys mentioned durring a conversation, how many were unique to one server?****************************
$conv_track = array();
rewind($fh);
$i = 0;
while($line = fgets($fh)){
    $i++;
    if( strlen($line) < 10 || in_array($i,$fudge_skip))continue;
    $data =  unserialize($line);
    if($data === FALSE){echo "Unserialize error on line $i!!!\n";continue;}
    list($time,$cookie,$sv,$msvo,$cmd,$key,$out_len,$found,$params,$delay,$conversation) = $data;
    if(!isset($conv_track[$conversation])){ // Seen before, so set server
        $conv_track[$conversation] = array('single' => 0, 'all' => 0);
    }
    if(count($key_track[$key]) == 1){$conv_track[$conversation]['single'] ++;}
    $conv_track[$conversation]['all'] ++;
}
$num_conv = 0;
$sum_s_conv = 0;
foreach($conv_track as $on){
    $sum_s_conv += $on['single'] / $on['all'];
    //echo $on['single'], ' -> ', $on['all'], '  ', $num_conv,"\n";
    $num_conv++;
}

echo "\nGiving a ps value of ",round($sum_s_conv / $num_conv * 100),"%. (Counts all keys, even a get miss)\n";
echo "        (Per Session ps, so of all keys mentioned for a page request\n          what's the percentage that are only used on one client)\n\n";

$csv_line .= $sum_s_conv / $num_conv . ',';

// Object size calculation ********************************************************************
// Average object size, total 'writes'
$total_object_bytes = 0;
$total_requests = 0;
foreach($xmean as $k => $d){
    $total_object_bytes += $d * $num_samples[$k];
    $total_requests += $num_samples[$k];
}
echo "$total_requests requests sent, with an average network size of ", $total_object_bytes / $total_requests, " bytes per request, and ", $changecnt," writes,\n";
echo "or ", round(($total_requests - $changecnt)/$total_requests * 100), "% reads\n";
echo "Total network bytes sent: $total_object_bytes\n";

echo "\n\n Readbytes: $readbytes Writebytes: $writebytes\n";

$csv_line .= $readbytes / ($readbytes + $writebytes) . ',';
$csv_line .= $total_object_bytes / $total_requests . ',';
$csv_line .= $changecnt . ',';
$csv_line .= $total_object_bytes . ',';

$sum_object_sizes = 0;
foreach($size as $k => $s){
	$sum_object_sizes += $s;
}
echo "In cache, $sum_object_sizes bytes stored, ",count($size)," objects, avg size ",$sum_object_sizes / count($size), " bytes\n\n";

echo "Counting only requests with objects transfer ($num_object_transfer), $sum_objects bytes sent on network, or ", $sum_objects/ $num_object_transfer," bytes (object) per request. \n";
echo "\n";
$csv_line .= $sum_object_sizes . ',';
$csv_line .= count($size) . ',';

// Command usage
rewind($fh);
$i = 0;
$cmd_usage = array(); // We'll be lazy and just do indexes
for($i = 0;$i<=19;$i++){$cmd_usage[$i] = 0;}
$i = 0;
$reqs = 0;
while($line = fgets($fh)){
    if( strlen($line) < 10 || in_array($i,$fudge_skip))continue;
    $i++;
    $reqs++;
    $data =  unserialize($line);
    if($data === FALSE){echo "Unserialize error on line $i!!!\n";continue;}
    list($time,$cookie,$sv,$msvo,$cmd,$key,$out_len,$found,$params,$delay,$conversation) = $data;
    if($cmd == 'set'&& $found){$cmd_usage[0]++;
    }else if($cmd == 'set' && !$found){$cmd_usage[19]++;
    }else if($cmd == 'add' && $found){$cmd_usage[1]++;
    }else if($cmd == 'add' && !$found){$cmd_usage[2]++;
    }else if($cmd == 'replace' && $found){$cmd_usage[3]++;
    }else if($cmd == 'replace' && !$found){$cmd_usage[4]++;
    }else if($cmd == 'caaaaaaaas'){ // todo: fix!!
    }else if($cmd == 'delete' && $found){$cmd_usage[8]++;
    }else if($cmd == 'delete' && !$found){$cmd_usage[9]++;
    }else if($cmd == 'incr' && $found){$cmd_usage[10]++;
    }else if($cmd == 'incr' && ! $found){$cmd_usage[11]++;
    }else if($cmd == 'decr' && $found){$cmd_usage[12]++;
    }else if($cmd == 'decr' && !$found){$cmd_usage[13]++;
    }else if($cmd == 'flush'){$cmd_usage[14]++;
    }else if($cmd == 'get' && $found){$cmd_usage[15]++;
    }else if($cmd == 'get' && !$found){$cmd_usage[16]++;
    }else if($cmd == 'multiget'){   // we treat a multiget as multiple single gets
        $founds = unserialize($found);
        $hit = 0;
        foreach($founds as $f){
            if($f){$hit++;}
        }
        if($hit > count($founds)/2){$cmd_usage[15]++;
        }else{$cmd_usage[16]++;}
    }else if($cmd == 'append' && $found){$cmd_usage[17]++;  // Combine append/prepend since they have the same behavior
    }else if($cmd == 'append' && !$found){$cmd_usage[18]++; // from our point of view.
    }else if($cmd == 'prepend' && $found){$cmd_usage[17]++;
    }else if($cmd == 'prepend' && !$found){$cmd_usage[18]++;
    }else{ echo $cmd, " not known!!!!!!\n";
    }
}
    echo "Command Usage out of $reqs requests (multiget treated as single get, with majority hit/miss):\n";
    $csv_line .= $reqs . ',';
    echo "Set Hit    (0): ",$cmd_usage[0]," or ",round($cmd_usage[0] / $reqs * 100),"%\n"; 
    echo "Set Miss  (19): ",$cmd_usage[19]," or ",round($cmd_usage[19]/ $reqs * 100),"%\n";
    echo "Add Hit    (1): ",$cmd_usage[1]," or ", round($cmd_usage[1] / $reqs * 100),"%\n"; 
    echo "Add Miss   (2): ",$cmd_usage[2]," or ",round($cmd_usage[2] / $reqs * 100),"%\n"; 
    echo "Replace Hit(3): ",$cmd_usage[3]," or ",round($cmd_usage[3] / $reqs * 100),"%\n"; 
    echo "ReplaceMiss(4): ",$cmd_usage[4]," or ",round($cmd_usage[4] / $reqs * 100),"%\n"; 
    echo "CAAAA1     (5): ",$cmd_usage[5]," or ",round($cmd_usage[5] / $reqs * 100),"%\n"; 
    echo "CAAAA2     (6): ",$cmd_usage[6]," or ",round($cmd_usage[6] / $reqs * 100),"%\n"; 
    echo "CAAAA3     (7): ",$cmd_usage[7]," or ",round($cmd_usage[7] / $reqs * 100),"%\n"; 
    echo "Delete Hit (8): ",$cmd_usage[8]," or ",round($cmd_usage[8] / $reqs * 100),"%\n"; 
    echo "Delete Miss(9): ",$cmd_usage[9]," or ",round($cmd_usage[9] / $reqs * 100),"%\n";
    echo "Inc Hit   (10): ",$cmd_usage[10]," or ",round($cmd_usage[10]/ $reqs * 100),"%\n"; 
    echo "Inc Miss  (11): ",$cmd_usage[11]," or ",round($cmd_usage[11]/ $reqs * 100),"%\n"; 
    echo "Dec Hit   (12): ",$cmd_usage[12]," or ",round($cmd_usage[12]/ $reqs * 100),"%\n"; 
    echo "Dec Miss  (13): ",$cmd_usage[13]," or ",round($cmd_usage[13]/ $reqs * 100),"%\n"; 
    echo "Flush     (14): ",$cmd_usage[14]," or ",round($cmd_usage[14]/ $reqs * 100),"%\n"; 
    echo "Get Hit   (15): ",$cmd_usage[15]," or ",round($cmd_usage[15]/ $reqs * 100),"%\n"; 
    echo "Get Miss  (16): ",$cmd_usage[16]," or ",round($cmd_usage[16]/ $reqs * 100),"%\n"; 
    echo "Ap/Pre Hit(17): ",$cmd_usage[17]," or ",round($cmd_usage[17]/ $reqs * 100),"%\n"; 
    echo "Ap/PreMiss(18): ",$cmd_usage[18]," or ",round($cmd_usage[18]/ $reqs * 100),"%\n"; 
for($i = 0;$i<=19;$i++){$csv_line .= $cmd_usage[$i] . ',';} 

// CSV
if($csv_file){
    $f = fopen($csv_file,'w');
    $csv_line .="0\n";
    fwrite( $f,$csv_line);
    fclose($f);
    echo "\n\n$csv_file csv file written\n";
}



