/*********************************************************************
Spade, a Snort preprocessor plugin to report unusual packets
Author: James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
copyright (c) 2000 by Silicon Defense (http://www.silicondefense.com/)

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.  

Spade description:

SPADE, the Statistical Packet Anomaly Detection Engine, is a Snort
preprocessor plugin to report packets that are unusual for your network. 
Port scans and probes tend to be unusual, so this will tend to report them
(as well as some benign packets that are simply uncommon).

Spade's home page: http://www.silicondefense.com/spice/

Please send complaints, kudos, and especially improvements and bugfixes to
hoagland@SiliconDefense.com. This is a research project and would love to
have your feedback.  It is still under active development and may change at
any time.

This file (anomsensor_plug.c) is part of Spade v092200.1.  It contains all
the Snort- and sensor-specific code in Spade.
*********************************************************************/

/* Internal version control: $Id: */

/*#define LOG10 2.30258509299 */
#define LOG2 0.69314718056
 
#include "spp_anomsensor.h"
#include "rules.h"
#include "log.h"
#include <string.h>

/* the threshold at which anomolous events are reported */
double report_anom_thres;

char *outfile; // the name of the output log file
char *statefile; // the name of the file to checkpoint to and recover from
int checkpoint_freq; // the frequency (in recorded packet counts) with which
                     // to checkpoint
int prob_mode; // the probability calculation mode

int as_debug= 0; // the bigger the number, the more debuging statements
                 // that are active
int parts=0,part=11; // if parts is 1, the part indicates which part section
                     // in record_packet should be run, overriding the
                     // probabity mode; don't try to calculate anomaly scores
                     // in this case

int adapting=0; // is there an adaptation module active on this run?
int need_anom= 0; // does some module need the anomaly score calculated
double last_anom_score; // the anomaly score for this packet
int skip_packet;  // is this packet being skipped (not added to the tree)
time_t last_pkt_time=(time_t)0; // the time of the last packet added

int tot_packets=0; // the total number of packets added to the tree
                   // on this run
int recent_packets= 0; // the number of packets added since the count was
                       // last reset
int alert_count= 0; // the count of alert sent about packets
int recent_alert_count= 0; // the count of alerts sent since the count was
                           // last reset

int pp_active= 0; // this is a count of how many modules have added
                  // themselves to the preprocessor list and will be calling
                  // record_maybe_skip()
int pp_run_on_pkt= 0; // this is how many have called record_maybe_skip() so
                      // far on this packet


/* globals used in the tree and memory management */
const char *featurename[NUM_FEATURES]={"sip","dip","sport","dport"};
/*const char *featurename[NUM_FEATURES]={"sip","dip","sport","dport","ttl","win"};*/

mindex TNULL;
dmindex DMINDEXMASK;

treeroot **ROOT_M;
intnode **INT_M;
leafnode **LEAF_M;

mindex root_freelist;
mindex int_freelist;
mindex leaf_freelist;

unsigned char ROOT_BLOCK_BITS;
unsigned char INT_BLOCK_BITS;
unsigned char LEAF_BLOCK_BITS;
unsigned int MAX_ROOT_BLOCKS;
unsigned int MAX_INT_BLOCKS;
unsigned int MAX_LEAF_BLOCKS;
mindex T[NUM_FEATURES];
/*************/

/* The most basic role of Spade is to add packets to a tree in a certain way
which will allow probabilities of various features to be calculated.  The
next most basic thing it does is to calculate anomaly scores base on this. 
Typically, when a certain score threshold is exceeded, snort alerts are
generated.  There are additional modules within the sensor (turned on by
config file lines) which do additional things like adapting the reporting
threshold and generating statistics. */
 
/* A call to this function needs to be added to plugbase.c somehow */
void SetupSpade()
{
    /* link the preprocessor keyword list to the init functions in 
       the preproc list to arrange for modules to run when specified */
    RegisterPreprocessor("spade", SpadeInit);
    RegisterPreprocessor("spade-homenet", SpadeHomenetInit);
    RegisterPreprocessor("spade-stats", SpadeStatInit);
    RegisterPreprocessor("spade-threshlearn", SpadeThreshlearnInit);
    RegisterPreprocessor("spade-adapt", SpadeAdaptInit);
    RegisterPreprocessor("spade-adapt2", SpadeAdapt2Init);
    RegisterPreprocessor("spade-adapt3", SpadeAdapt3Init);
    RegisterPreprocessor("spade-survey", SpadeSurveyInit);

	if (as_debug) printf("Preprocessor: Spade is setup...\n");
}



/*========================================================================*/
/*========================= Spade core routines ==========================*/
/*========================================================================*/

/* snort config file line:
	preprocessor spade: [ <anom-report-thresh> [ <state-file> [ <log-file> [ <prob-mode> [ <checkpoint-freq> ]]]]]
	where:
	  <anom-report-thresh> is the (initial) reporting threshold for
anomalous events, or a negative number to not report (default -1)
	  <state-file> is the name of the checkpoint and recovery file to record
to and startup from, or 0 not to checkpoint or recover (default spade.rcv)
	  <log-file> is the name of the file to log to, or '-' for stdout
(default '-')
	  <prob-mode> is the probability mode to run in (0 for bayes net with 4
features, 1 for full joint prob with 4 features, 2 for full joint with 3
feaures, or 3 for full joint with 2 features) (default 3)
	  <checkpoint-freq> is the fequency of checkpointing, in terms of tree
addition counts (default 50000)
*/

/* Spade core init function:
     set up anamaly sensor, register the signal handler,
     register the preprocessor function */
void SpadeInit(u_char *args)
{
	pp_active++;
	
    /* parse the argument list from the rules file */
    ParseSpadeArgs(args);
	if (report_anom_thres >= 0) need_anom= 1;

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpade);

	if (strcmp(statefile,"0") && recover(statefile)) {
		if (as_debug) printf("Recovered from file %s\n",statefile);
	} else {
		init_mem();
		tree_init();
	}

#ifndef OLD_SNORT
    // requires snort 1.6.1-beta3 or later
	AddFuncToCleanExitList(SpadeCatchSig,NULL);
	AddFuncToRestartList(SpadeCatchSig,NULL);
#else
	// use this if above won't compile
    signal(SIGUSR1, CleanUpSpade);
    signal(SIGQUIT, CleanUpSpade);
    signal(SIGHUP, CleanUpSpade);
#endif

	if (as_debug) printf("Preprocessor: Spade Initialized\n");
}

/* Spade 'spade' argument parsing function  */
void ParseSpadeArgs(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		report_anom_thres = atof(toks[0]);
	} else {
		report_anom_thres= -1;
	}
	if (as_debug) printf("anomaly reporting threshold is %f\n",report_anom_thres);
	if (numToks > 1) {
		statefile = toks[1];
	} else {
		statefile= "spade.rcv";
	}
	if (as_debug) printf("state file is %s\n",statefile);
	if (numToks > 2) {
    	outfile = toks[2];
    } else {
    	outfile= "-";
    }
	if (as_debug) printf("output file is %s\n",outfile);
	if (numToks > 3) {
    	prob_mode = atoi(toks[3]);
    	if (prob_mode > 3 || prob_mode < 0) {
    		ErrorMessage("Warning: spp_anomsensor probabity mode #%d undefined, using #3 instead",prob_mode);
    		prob_mode= 3;
    	}
    } else {
    	prob_mode= 3;
    }
	if (as_debug) printf("probability mode is %d\n",prob_mode);
	if (numToks > 4) {
    	checkpoint_freq= atoi(toks[4]);
    } else {
    	checkpoint_freq= 50000;
    }
	if (as_debug) printf("checkpoint frequency is %d\n",checkpoint_freq);
}

/* Spade core routine that is called with each packet */
void PreprocSpade(Packet *p)
{
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	
	if (report_anom_thres >= 0.0 && last_anom_score >= report_anom_thres) {
		char logMessage[65];
		alert_count++;
		recent_alert_count++;
		sprintf(logMessage,"spp_anomsensor: Anomaly threshold exceeded: %.4f",last_anom_score);
		(*AlertFunc)(p, logMessage);
	}
}	



/*========================================================================*/
/*========================= SpadeHomenet module ==========================*/
/*========================================================================*/

/* This module makes only packets to certain networks be considered for the
anomaly sensor; list your most common networks first for increased
efficiency */

/* snort config file line:
	preprocessor spade-homenet: {<network>}
	where <network> is a network in CIDR notation (address/numbits)
	                   or an IP address */
														
ll_net *homelist= NULL;  // the only networks we should be looking at packets going to

/* Spade homenet init function:
     set up the homenet list */
void SpadeHomenetInit(u_char *args)
{
    char **toks;
    int numToks;

	if (as_debug) printf("Preprocessor: SpadeHomenet Initialized\n");

    /* parse the argument list from the rules file */
    toks = mSplit(args, " ", 200, &numToks, '\\');
    if (strspn(toks[numToks-1]," \t") == strlen(toks[numToks-1])) numToks--; /* last is just whitespace */
    homelist= create_netlist(toks,numToks);
    
    if (as_debug) {
    	ll_net *n;
   		struct in_addr net;
    	printf("SpadeHomenet nets are:\n");
    	for (n=homelist; n != NULL; n=n->next) {
    		net.s_addr= n->netaddr;
    		printf("\t%s with mask %lx\n",inet_ntoa(net),(u_long)ntohl(n->netmask));
    	}
    }
}

// create a linked list of network specifications (address and netmask) from
//  a array of strings representing an CIDR network spec or an IP address
ll_net *create_netlist(char *nets[],int count) {
	ll_net *prev=NULL,*head=NULL,*cur=NULL;
	int i;
    char **toks;
    int num_toks;
    int nmask;
    struct in_addr net;
	
	for (i=0; i < count; i++) {
		cur= (ll_net *)malloc(sizeof(ll_net));
		cur->next= NULL;
		if (i > 0) {
			prev->next= cur;
		} else {
			head= cur;
		}
		
		// this code based strongly on GenHomenet in snort.c
		/* break out the CIDR notation from the IP address */
	    toks = mSplit(nets[i],"/",2,&num_toks,0);

        /* convert the CIDR notation into a real live netmask */
	    if (num_toks < 2) {
	    	nmask= 32;
	    } else { 
	    	nmask = atoi(toks[1]);
	    }

        if ((nmask >= 0) && (nmask <= 32))
        {
            cur->netmask = netmasks[nmask];
        }
        else
        {
            FatalError("ERROR: Bad CIDR size [%d], 1 to 32 please!\n",
                       nmask);
        }

	    /* since PC's store things the "wrong" way, shuffle the bytes into 
	       the right order */
#ifndef WORDS_BIGENDIAN
	    cur->netmask = htonl(cur->netmask);
#endif

	    /* convert the IP addr into its 32-bit value */
	    if ((net.s_addr = inet_addr(toks[0])) ==-1)
	    {
	        FatalError("ERROR: network (%s) didn't translate with inet_addr, must be poorly formed\n",
	                   toks[0]);
	    }
	    else
	    {
	        cur->netaddr = ((u_long)net.s_addr & cur->netmask);
	    }

	    free(toks);
		
		prev= cur;
	}
	
	return head;
}

/*========================================================================*/
/*=========================== SpadeStat module ===========================*/
/*========================================================================*/

/* Whenever the CleanUpSpade is invoked, this module arranges for certain
   specified statistics to be written to the log file.  The available
   statistics depend on what is recorded in the tree, which depends on the
   probability measure used.  There is no good way to have more granularity
   at present.  You need to change the setting of the "parts" variable to 1
   and set the part variable to one of the parts in the record_packet
   routine (to which you might add a new part). */

/* snort config file line:
	preprocessor spade-stats: {<stat-option>}
	where <stat-option> is one of:
	  "entropy" (to display the known entropies and conditional entropies)
	  "uncondprob" (to display the known non-0 simple (joint) probabilities)
	  "condprob" (to display the known non-0 conditional (joint)
probabilities) */
														
/* vars to store what the stats module should report */
int print_entropy= 0;
int print_condprob= 0;
int print_uncondprob= 0;

/* Spade Stat module init function:
     set up the print_* variables */
void SpadeStatInit(u_char *args)
{
    char **toks;
    int numToks;
    int i;
    
	if (as_debug) printf("Preprocessor: SpadeStat Initialized\n");

    /* parse the argument list from the rules file */
    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	for (i= 0; i < numToks; i++) {
		if (!(strcmp(toks[i],"entropy"))) {
			print_entropy= 1;
	    } else if (!(strcmp(toks[i],"condprob"))) {
	    	print_condprob= 1;
	   	} else if (!(strcmp(toks[i],"uncondprob"))) {
	    	print_uncondprob= 1;
	  	} else {
	   		fprintf(stderr,"Anomaly Sensor Stat: option \"%s\" not recognized\n",toks[i]);
	    }
	}
}



/*========================================================================*/
/*======================== SpadeThreshlearn module =======================*/
/*========================================================================*/

/* Given a packet count and a length of time, this module reports a reporting
   threshold that would have been effective in producing that number of alerts
   in that time interval.  The idea is that one might use this as a threshold
   for future runs.  The module quietly watches the network for the length of
   time, adding events to the tree and calculating anomaly scores.  When the
   time period is up, the module calls exit() after reporting the top anomaly
   scores seen to the log file. */
   
/* snort config file line:
	preprocessor spade-threshlearn: [ <num-scores> [ <obs-time> ]]
	where:
	  <num-scores> is the number of packets to report for (default 200)
	  <obs-time> is the number of hours to run for (default 24)
*/

/* variables used in the threshold learning module */
int tl_obs_size=0;  // the number of anomalous packets desired
time_t tl_obs_secs; // how long to observe for
ll_double *top_anom_list; // the start of the list of anomaly scores we
                          // maintain; the scores are the highest we've
                          // observed; this list can be up to tl_obs_size+1
                          // long and is orderd by increasing score; the
                          // list is initialized to 0 -> 0 in case we never
                          // see enough packets
int top_anom_list_size; // the number of scores on the list (0-based)
time_t obs_start_time=(time_t)0; // the start time of the observation, set
                                 // after the first packet we see


/* Spade threshold learning module init function:
     set up threshold learning module per args and
     register its preprocessor function */
void SpadeThreshlearnInit(u_char *args)
{
	pp_active++;
	need_anom= 1;
	
    /* parse the argument list from the rules file */
    ParseSpadeThreshlearnArgs(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeThreshlearn);

	/* init list to contain just 0; this is to let us assume the list is not
	   empty elsewhere */
	top_anom_list= (ll_double *)malloc(sizeof(ll_double));
	top_anom_list->val= 0.0;
	top_anom_list_size= 1;
	
	if (as_debug) printf("Preprocessor: SpadeThreshlearn Initialized\n");
}

/* Spade 'spade-thesshlearn' argument parsing function */
void ParseSpadeThreshlearnArgs(char *args)
{
    char **toks;
    int numToks;
    double hours;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		tl_obs_size = atoi(toks[0]);
	} else {
		tl_obs_size= 200;
	}
	if (as_debug) printf("observation size is %d\n",tl_obs_size);
	if (numToks > 1) {
		hours = atof(toks[1]);
		tl_obs_secs= hours*3600;
	} else {
		tl_obs_secs= 24*3600;
	}
	if (as_debug) printf("seconds of observation is %d\n",(int)tl_obs_secs);
}

/* Spade threshold learning module routine that is called with each packet */
void PreprocSpadeThreshlearn(Packet *p)
{
	size_t packet_time= p->pkth->ts.tv_sec;
	double anom;
	ll_double *new,*prev,*l;
	static int alldone=0;

	if (alldone) return;

	if (obs_start_time == 0) { /* first packet */
		obs_start_time= packet_time;
	} else if (packet_time > (obs_start_time + tl_obs_secs)) {
		CleanUpSpade(SIGUSR1);
		alldone=1;
	}

	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	anom= last_anom_score;
	
	if (top_anom_list_size <= tl_obs_size) {
		new= (ll_double *)malloc(sizeof(ll_double));
		top_anom_list_size++;
	} else if (anom > top_anom_list->val) {
		if (top_anom_list->next != NULL && anom < top_anom_list->next->val) {
			top_anom_list->val= anom; /* can just replace first */
			return;
		}
		new= top_anom_list;
		top_anom_list= top_anom_list->next;
	} else {
		return;
	}
	new->val= anom;
	for (prev= top_anom_list, l=top_anom_list->next; l != NULL && anom > l->val; prev=l,l=l->next);
	/* add between prev and l */
	prev->next= new;
	new->next= l;	
}	



/*========================================================================*/
/*=========================== SpadeAdapt module ==========================*/
/*========================================================================*/

/* Given a report count target and a length of time, this module tries to keep
   the reporting threshold at a level that would produce that number of alerts
   in that time interval based on what was observed in the last interval.  To
   support this, a list of the most anomalous scores seen in the current
   interval is maintained.  At the end of the interval, an ideal threshold is
   calculated based on the interval's scores.  This is combined linearly with
   the current threshold to produce the threshold for the next interval.  As a
   default option, the interval can implemented in terms of a count of packets,
   where this count is the average number of packets seen during the specified
   time interval length; this tends to make the transitions more smooth and
   reliable since a more constant number of anomaly scores is used in finding
   the topmost anamolous ones. */

/* snort config file line:
	preprocessor spade-adapt: [ <target-count> [ <adapt-time> [ <new-weight> [ <interval-by-count> ]]]]
	where:
	  <target-count> is the number of packets to aim for (default 20)
	  <adapt-time> is the number of hours in the interval (default 2)
	  <new-weight> is the part of new threshold based on the observed ideal for
	    the previous interal (where the rest depends on the previous threshold)
	    (default 0.5)
	  <interval-by-count> is whether to measure intervals by count (0 to
	    measure strictly by time, 1 to do it by count) (default 1)
*/

/* global-scope variables used in the adapt module */
// the number of alerts that is ideal for the given length of time
int adapt_target=0;
// the length of time in which to ideally produce the given number of alerts;
//   also the interval at which to adjust the report threshold
time_t adapt_period;
// the weight to give to the new observation ideal cutoff in determining the
//   new weight
float new_obs_weight;
// adapt by count or by time only
int adapt_by_count;
// the head of the list of anomaly scores.  This list is like the one in the
//   threshold learning module above
ll_double *top_adapt_list;
// the current size of this list (0-based)
int top_adapt_list_size;


/* Spade adapt module init function:
     set up the adapt module per its args and register its preprocessor function */
void SpadeAdaptInit(u_char *args)
{
	if (adapting) {
		fprintf(stderr,"Anomoly sensor threshold adapting repeadly specified, ignoring later specification: %s\n",args);
		return;
	}
	adapting= 1;
	pp_active++;
	need_anom= 1;

    /* parse the argument list from the rules file */
    ParseSpadeAdaptArgs(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeAdapt);

	/* init list to contain 0 and 0; this is to let us assume the list has a
	   bottom and runner-up elsewhere */
	top_adapt_list= (ll_double *)malloc(sizeof(ll_double));
	top_adapt_list->val= 0.0;
	top_adapt_list->next= (ll_double *)malloc(sizeof(ll_double));
	top_adapt_list->next->val= 0.0;
	top_adapt_list_size= 1;
	
	if (as_debug) printf("Preprocessor: SpadeAdapt Initialized\n");
}

/* Spade 'spade-adapt' argument parsing function  */
void ParseSpadeAdaptArgs(char *args)
{
    char **toks;
    int numToks;
    double hours;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		adapt_target = atoi(toks[0]);
	} else {
		adapt_target= 20;
	}
	if (numToks > 1) {
		hours = atof(toks[1]);
	} else {
		hours= 2;
	}
	adapt_period= hours*3600;
	if (as_debug) printf("adapt target count is %d\n",adapt_target);
	if (as_debug) printf("adapt target period is %d\n",(int)adapt_period);
	if (numToks > 2) {
		new_obs_weight = atof(toks[2]);
	} else {
		new_obs_weight= 0.5;
	}
	if (numToks > 3) {
		adapt_by_count = atoi(toks[3]);
	} else {
		adapt_by_count= 1;
	}
}

/* Spade adapt module routine that is called with each packet */
void PreprocSpadeAdapt(Packet *p)
{
	/* see if time to adjust the rate and if so, do so, and reset */
	size_t packet_time= p->pkth->ts.tv_sec;
	ll_double *new,*prev,*l;
	// when the time interval is time-based, this is when the current interval
	//   started; otherwise this is the last time the packets per interval
	//   was updated average
	static time_t last_adapt_time=(time_t)0;
	// the time period #, starting with 1 for the first interval
	static int time_period_num= 1;
	// the average number of packets per time interval as most recently
	//   calculated
	static float average_pkt_rate;
	
	if (packet_time > (last_adapt_time + adapt_period)) {
		if (last_adapt_time == 0) { /* first packet */
			last_adapt_time= packet_time;
			time_period_num= 1;
		} else {
			if (!adapt_by_count || time_period_num <= 1) { /* adapt by time since not doing count or since this is first period */
				if (as_debug) {
					printf("%d alerts in last time period (of %d)\n",recent_alert_count,recent_packets);
				}
				do_adapt();
			}
			if (adapt_by_count) { /* collect packet rate stats */
				average_pkt_rate= tot_packets/(float)time_period_num;
				if (as_debug) {
					static int last_repcount;
					printf("End of time period %d: ave pkt rate is now %.2f\n",time_period_num,average_pkt_rate);
					printf("  %d alerts in last time period; ave alert rate is %.2f\n",(alert_count-last_repcount),alert_count/(float)time_period_num);
					last_repcount= alert_count;
				}
				time_period_num++;
			}
			last_adapt_time+= adapt_period;
		}
	}
	
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */

	if (adapt_by_count) { /* we are adapting by count */
		if (time_period_num > 1 && recent_packets > average_pkt_rate) { /* time to adapt; note that average_pkt_rate can be adjusted any time in our counting */
			if (as_debug) {
				printf("%d alerts in last packet period (of %d)\n",recent_alert_count,recent_packets);
			}
			do_adapt();
		}
	}
	
	/* add anomaly score to list if it is high enough */
	if (top_adapt_list_size <= adapt_target) {
		new= (ll_double *)malloc(sizeof(ll_double));
		top_adapt_list_size++;
	} else if (last_anom_score > top_adapt_list->val) {
		if (last_anom_score < top_adapt_list->next->val) {
			top_adapt_list->val= last_anom_score; /* can just replace first */
			return;
		}
		new= top_adapt_list;
		top_adapt_list= top_adapt_list->next;
	} else {
		return;
	}
	new->val= last_anom_score;
	for (prev= top_adapt_list, l=top_adapt_list->next; l != NULL && last_anom_score > l->val; prev=l,l=l->next);
	/* add between prev and l */
	prev->next= new;
	new->next= l;
}	

void do_adapt() {
	ll_double *l;
	double obs_thresh= (top_adapt_list->val + top_adapt_list->next->val)/2;
	if (as_debug) printf("observed recent ideal threshold is %.4f\n",obs_thresh);
	if (report_anom_thres < 0.0) { /* started up with no reporting */
		set_new_threshold(obs_thresh);
	} else {
		set_new_threshold((1-new_obs_weight)*report_anom_thres + new_obs_weight*obs_thresh);
	}
	
	if (as_debug) printf("new threshold is %.4f\n",report_anom_thres);	
	
	for (l=top_adapt_list; l != NULL; l=l->next)  l->val= 0.0;
	recent_alert_count= 0;
	recent_packets= 0;
}



/*========================================================================*/
/*========================== SpadeAdapt2 module ==========================*/
/*========================================================================*/

/* Given an hourly alert target count (or target fraction) and a length of
   time, this module tries to keep the reporting threshold at a level that
   would produce that number of alerts (or fraction of total reports) in an
   hour based on what has been observed in the past.  When the report threshold
   is updated, it is based in equal parts on observations from the short term,
   middle term, and long term (at least for these that have been observed). 
   The user can specify the time period for observations, the number of those
   that make up the short term (NS), the number of short terms that make up the
   medium term (NM), and the number of medium terms that make up the long term
   (NL).  The short term component of the threshold is defined to be the
   average of the kth and (k+1)st highest anomaly scores in the last NS
   complete periods of observation, where k is number of anamoly reports that
   should occur in the observation period assuming a uniform rate.  The middle
   term component is the average of the last NM special short term components. 
   The special short term components are the ones that are multiples of NS if
   labeled with the number of observation periods that had completed when it
   was calculated (i.e., #NS, #2NS, #3NS, etc.); these have the property that
   they are based entirely on distinct measurements.  The long term component
   is based on the last NL medium term componenets, including the current one. 
   For each of the components, if there have been less than the specified
   number of constituant parts (but there has been at least one complete one),
   what is observed thus far is used.  To accomadate the varying rates of
   packets fairly, the observation period is based on a count of packets.  This
   count is the product of the specified observation period and the average
   packet rate.
*/

/* snort config file line:
	preprocessor spade-adapt2: [ <target-spec> [ <obs-time> [ <NS> [ <NM> [ <NL> ]]]]]
	where:
	  <target-spec> if >= 1, is the number of alerts to aim for in an hour, and
	    if < 1, is the fraction of packets to aim for (default 0.01)
	  <obs-time> is the number of minutes in an observation period (default 15)
	  <NS> is the number of observation periods that make up the short term
	    (default 4)
	  <NM> is the number of short terms in the medium term (default 24)
	  <NL> is the number of medium terms in the long term (default 7)
*/

/* global-scope variables used in the adapt2 module */
// the first and second arguments from the config line
double adapt2_targetspec,obsper;
// the 3rd, 4th, and 5th args
int NS,NM,NL;
// the current target based on adapt2_targetspec
int adapt2_target;
// latest middle and long term components
double mid_anom_comp,long_anom_comp;
// representation of an array of observation lists, the heads and tails
dll_double **obslists_head,**obslists_tail;
// an array of the (0-based) size of these lists
int *obslists_size;
// the number of complete observation periods
int obsper_count;
// arrays of short and medium term components used for calculating other components
double *recScomps,*recMcomps;

/* Spade adapt2 module init function:
     set up the adapt2 module per its args and register its preprocessor function */
void SpadeAdapt2Init(u_char *args)
{
	int i;
	if (adapting) {
		fprintf(stderr,"Anomoly sensor threshold adapting repeadly specified, ignoring later specification: %s\n",args);
		return;
	}
	adapting= 1;
	pp_active++;
	need_anom= 1;

    /* parse the argument list from the rules file */
    ParseSpadeAdapt2Args(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeAdapt2);

	obslists_head= (dll_double **)malloc(NS * sizeof(dll_double *));
	obslists_tail= (dll_double **)malloc(NS * sizeof(dll_double *));
	obslists_size= (int *)malloc(NS * sizeof(int));
	for (i= 0; i < NS; i++) {
		obslists_head[i]= new_dlink(0.0);
		obslists_tail[i]= new_dlink(0.0);
		obslists_head[i]->next= obslists_tail[i];
		obslists_tail[i]->prev= obslists_head[i];
		obslists_size[i]= 1;
	}
	obsper_count= 0;
	recScomps= (double *)malloc(NM * sizeof(double));
	recMcomps= (double *)malloc(NL * sizeof(double));
	
	if (as_debug) printf("Preprocessor: SpadeAdapt2 Initialized\n");
}

/* Spade 'spade-adapt2' argument parsing function  */
void ParseSpadeAdapt2Args(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		adapt2_targetspec= atof(toks[0]); /* if >= 1, is an hourly count, else is a fraction of total packets */
	} else {
		adapt2_targetspec= 0.01;
	}
	if (numToks > 1) {
		obsper= atof(toks[1])*60.0; /* basic observation/adjust time in mins, converted to secs */
	} else {
		obsper= 15.0*60.0;
	}
	/* 10000 packets per hour is our pure guess as to the rate of packets.
	   Is there a better way to figure out how many packets to note for our
	   first interval when we want a percent of packets? */
	adapt2_target= floor(0.5+ (adapt2_targetspec >= 1 ? adapt2_targetspec*(obsper/3600.0) : ((10000/3600.0)*obsper)*adapt2_targetspec));
	if (adapt2_target==0) adapt2_target= 1; /* ensure at least 1 long */
	if (numToks > 2) {
		NS= atoi(toks[2]); /* how many of the previous go into the time observation of ideal wait and the recent portion of the adapted weight */
	} else {
		NS= 4;
	}
	if (numToks > 3) {
		NM= atoi(toks[3]); /* how many of the previous go into an average to determine the middle portion of the adapted weight */
	} else {
		NM= 24;
	}
	if (numToks > 4) {
		NL= atoi(toks[4]); /* how many of the previous go into an average to determine the long-term portion of the adapted weight */
	} else {
		NL= 7;
	}
	if (as_debug) printf("adapt2 target is %d\n",adapt2_target);
	if (as_debug) printf("%2f seconds in obs per1; %d of these in recent; %d 2's in middle; %d in long\n",obsper,NS,NM,NL);
}

/* Spade adapt2 module routine that is called with each packet */
void PreprocSpadeAdapt2(Packet *p)
{
	/* see if time to adjust the rate and if so, do so, and reset */
	size_t packet_time= p->pkth->ts.tv_sec;
	dll_double *new,*prev,*l;
	int i;
	// the start time of the current observation period
	static time_t obsper_start=(time_t)0;
	// the number of packets thus far in this observation
	static int obscount=0;
	// the last calculated average packet count per component;
	// used to figure out when to adjust the threshold;
	// set high initially to be sure to get a correct value before doing this
	static double ppc= 100000000.0;
	// obsper_count % NS, which obslist to add to
	static int obslist_new_slot= 0;
	
	if (packet_time > (obsper_start + obsper)) {
		static int rec_int_count;
		if (obsper_start == 0) { /* first packet */
			obsper_start= packet_time;
			rec_int_count= 0;
			recent_alert_count= 0;
		} else { /* time to update ppc */
			rec_int_count++;
			if (as_debug) {
				printf("%d alerts in time period %d (of %d packets)\n",recent_alert_count,rec_int_count,recent_packets);
			}
			ppc= tot_packets/(double)rec_int_count;
			obsper_start+= obsper;
			if (as_debug) {
				static int last_repcount;
				printf("End of time period %d: ppc is now %.2f\n",rec_int_count,ppc);
				printf("  %d alerts in last time period; ave alert rate is %.2f\n",(alert_count-last_repcount),(float)alert_count/(float)rec_int_count);
				last_repcount= alert_count;
			}
			
			adapt2_target= floor(0.5+ (adapt2_targetspec >= 1 ? adapt2_targetspec*(obsper/3600.0) : adapt2_targetspec*ppc));
			if (adapt2_target==0) adapt2_target= 1; /* ensure at least 1 long */
			if (as_debug) printf("new target is %d\n",adapt2_target);
			
			if (obsper_count == 0) {
				obsper_count++;
				obslist_new_slot= obsper_count % NS;
				if (obslists_size[0] > adapt2_target) { /* remove excess */
					for (i= adapt2_target, l=obslists_head[0]; i < obslists_size[0]; i++,l=l->next);
					l->prev->next= NULL;
					l->prev= NULL;
					free_dlinks(obslists_head[0]);
					obslists_head[0]= l;
				}
				set_new_threshold((obslists_head[0]->val + obslists_head[0]->next->val)/2.0);
				if (as_debug) printf("-> initial adapted threshold is %.5f\n",report_anom_thres);
				obscount= 0;
				recent_packets= 0;
				recent_alert_count= 0;
			}
		}
	}
	
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	obscount++;
	
	if (obscount > ppc) {
		if (as_debug) {
			printf("%d alerts at end of packet period #%d (of %d)\n",recent_alert_count,obslist_new_slot,recent_packets);
		}
		
		set_new_threshold(calc_new_thresh());
		if (as_debug) printf("-> new threshold is %.5f\n",report_anom_thres);
		
		obsper_count++;
		obslist_new_slot= obsper_count % NS;
		reset_obslist(obslist_new_slot);
		obscount= 0;
		recent_packets= 0;
		recent_alert_count= 0;
	}

	if (obslists_size[obslist_new_slot] < adapt2_target) {
		new= new_dlink(last_anom_score);
		obslists_size[obslist_new_slot]++;
	} else if (last_anom_score > obslists_head[obslist_new_slot]->val) {
		if (last_anom_score < obslists_head[obslist_new_slot]->next->val) {
			obslists_head[obslist_new_slot]->val= last_anom_score; /* can just replace first in place*/
			return;
		}
		new= obslists_head[obslist_new_slot];
		new->val= last_anom_score;
		obslists_head[obslist_new_slot]= obslists_head[obslist_new_slot]->next;
		new->next->prev= NULL;
	} else {
		return;
	}
	for (l=obslists_head[obslist_new_slot]->next; l != NULL && last_anom_score > l->val; l=l->next);
	/* add between l->prev and l */
	prev= (l == NULL) ? obslists_tail[obslist_new_slot] : l->prev;
	prev->next= new;
	new->prev= prev;
	new->next= l;
	if (l == NULL) {
		obslists_tail[obslist_new_slot]= new;
	} else {
		l->prev= new;
	}
}

double calc_new_thresh() {
	static int per2_count=0,per3_count=0; // the count of period 2 and 3 instances

	double rec_anom_comp= thresh_from_obslists();
	if (as_debug) printf("* New recent anom observation (#%d) is %.5f\n",obsper_count,rec_anom_comp);
	if (obsper_count < (NS-1)) {
		return rec_anom_comp; /* haven't observed mid or long yet */
	}
	if (((obsper_count+1) % NS) == 0) { /* time to add new mid */
		recScomps[per2_count % NM]= rec_anom_comp;
		if (as_debug) printf("recScomps[%d]:= %.5f\n",per2_count % NM,rec_anom_comp);
		per2_count++;
		mid_anom_comp= anom_ave(recScomps,((per2_count < NM)?per2_count:NM));
		if (as_debug) printf("** New mid anom component (#%d) is %.5f\n",per2_count-1,mid_anom_comp);
		if (per2_count < (NM-1)) {
			return (rec_anom_comp+mid_anom_comp)/2.0; /* haven't observed long yet */
		}
		if ((per2_count % NM) == 0) { /* time to add new long */
			recMcomps[per3_count % NL]= mid_anom_comp;
			if (as_debug) printf("recMcomps[%d]:= %.5f\n",per3_count % NL,mid_anom_comp);
			per3_count++;	
			long_anom_comp= anom_ave(recMcomps,((per3_count < NL)?per3_count:NL));
			if (as_debug) printf("*** New long anom component (#%d) is %.5f\n",per3_count-1,long_anom_comp);
		}
	}
	if (per2_count < NM) {
		return (rec_anom_comp+mid_anom_comp)/2.0; /* haven't observed long yet */
	}
	return (rec_anom_comp+mid_anom_comp+long_anom_comp)/3.0;
}

double thresh_from_obslists() {
	dll_double **pos= (dll_double **)malloc(NS * sizeof(dll_double *));
 	int i,c,maxpos=-1;
	double max,last_score=0.0,before_last_score=0.0;
	if (as_debug > 1) {
		dll_double *l;
		printf("thresh_from_obslists: finding score that is #%d highest in:\n",adapt2_target);
		for (i= 0; i < NS; i++) {
			printf("  slot %d: %.5f",i,obslists_head[i]->val);
			for (l=obslists_head[i]->next; l != NULL; l=l->next) {
				printf(" -> %.5f",l->val);
			}
			printf("\n");
		}
	}
	for (i= 0; i < NS; i++) {
		pos[i]= obslists_tail[i];
	}
	for (c= 1; c <= adapt2_target+1; c++) {
		max= -1;
		for (i= 0; i < NS; i++) {
			if (pos[i] != NULL) {
				if (max < pos[i]->val) {
					max= pos[i]->val;
					maxpos= i;
				}
				
			}
		}
		if (max == -1) return last_score; /* should only happen if we don't
		                                    have enough packets recorded */
		pos[maxpos]= pos[maxpos]->prev;
		before_last_score= last_score;
		last_score= max; /* in case this is the last */
	}
	return (before_last_score+last_score)/2.0;
}

double anom_ave(double a[],int size) {
	double sum= 0.0;
	int i;
	if (as_debug) {
		printf("anom_ave: taking average of (%.5f",a[0]);
		for (i=1; i < size; i++) printf(",%.5f",a[i]);
		printf(")\n");
	}
	for (i=0; i < size; i++) sum+= a[i];
	return sum/(double)size;
}

void reset_obslist(int slot) {
	dll_double *first= obslists_head[slot];
	dll_double *second= first->next;
	if (second->next != NULL) free_dlinks(second->next);
	first->val= 0.0;
	second->val= 0.0;
	second->next= NULL;
	obslists_tail[slot]= second;
	obslists_size[slot]= 1;
}



/*========================================================================*/
/*========================== SpadeAdapt3 module ==========================*/
/*========================================================================*/

/* Given an hourly alert target count (or target fraction) and a length of
   time, this module tries to keep the reporting threshold at a level that
   would produce that number of alerts (or fraction of total reports) in an
   hour based on what has been observed in the past.  ...
*/

/* snort config file line:
	preprocessor spade-adapt3: [ <target-spec> [ <obs-time> [ <num-obs>]]]
	where:
	  <target-spec> if >= 1, is the number of alerts to aim for in an hour, and
	    if < 1, is the fraction of packets to aim for (default 0.01)
	  <obs-time> is the number of minutes in an observation period (default 60)
	  <num-obs> is the number of observation periods to average over (default 168)
*/

/* global-scope variables used in the Adapt3 module */
// the first and second arguments from the config line
double adapt3_targetspec,adapt3_obsper;
// the 3rd arg
int NO;
// the current target based on adapt3_targetspec
int adapt3_target;
// an array of past observations
double *adapt3hist;
// a linked list of current anomaly scores
ll_double *adapt3anoms;
// (0-based) size of this lists
int adapt3anoms_size;
// number of completed observation period
int completed_obs_per;

/* Spade Adapt3 module init function:
     set up the Adapt3 module per its args and register its preprocessor function */
void SpadeAdapt3Init(u_char *args)
{
	if (adapting) {
		fprintf(stderr,"Anomoly sensor threshold adapting repeadly specified, ignoring later specification: %s\n",args);
		return;
	}
	adapting= 1;
	pp_active++;
	need_anom= 1;

    /* parse the argument list from the rules file */
    ParseSpadeAdapt3Args(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeAdapt3);

	adapt3hist= (double *)malloc(sizeof(double)*NO);
	
	/* init list to contain 0 and 0; this is to let us assume the list
	   has a bottom and runner-up elsewhere */
	adapt3anoms= (ll_double *)malloc(sizeof(ll_double));
	adapt3anoms->val= 0.0;
	adapt3anoms->next= (ll_double *)malloc(sizeof(ll_double));
	adapt3anoms->next->val= 0.0;
	adapt3anoms_size= 1;
	completed_obs_per= 0;
	
	if (as_debug) printf("Preprocessor: SpadeAdapt3 Initialized\n");
}

/* Spade 'spade-Adapt3' argument parsing function  */
void ParseSpadeAdapt3Args(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
		adapt3_targetspec= atof(toks[0]); /* if >= 1, is an hourly count, else is a fraction of total packets */
	} else {
		adapt3_targetspec= 0.01;
	}
	if (numToks > 1) {
		adapt3_obsper= atof(toks[1])*60.0; /* basic observation/adjust time in mins, converted to secs */
	} else {
		adapt3_obsper= 15.0*60.0;
	}
	/* 10000 packets per hour is our pure guess as to the rate of packets.
	   Is there a better way to figure out how many packets to note for our
	   first interval when we want a percent of packets? */
	adapt3_target= floor(0.5+ (adapt3_targetspec >= 1 ? adapt3_targetspec*(adapt3_obsper/3600.0) : ((10000/3600.0)*adapt3_obsper)*adapt3_targetspec));
	if (adapt3_target==0) adapt3_target= 1;
	if (numToks > 2) {
		NO= atoi(toks[2]); /* how many of the previous go into the time observation of ideal wait and the recent portion of the adapted weight */
	} else {
		NO= 168;
	}
	if (as_debug) printf("Adapt3 target is %d\n",adapt3_target);
	if (as_debug) printf("%2f seconds in obs per; %d of these in history\n",adapt3_obsper,NO);
}

/* Spade Adapt3 module routine that is called with each packet */
void PreprocSpadeAdapt3(Packet *p)
{
	size_t packet_time= p->pkth->ts.tv_sec;
	ll_double *prev,*newstart,*next,*new;
	int i;
	// the start time of the current observation period
	static time_t adapt3_obsper_start=(time_t)0;
	// the number of packets thus far in this observation
	static int obscount=0;
	// the last calculated average packet count per interval; used to figure out when to adjust the threshold; set high initially to be sure to get a correct value before doing this
	static double ppi= 100000000.0;
	
	/* see if time to adjust the rate and if so, do so, and reset */
	if (packet_time > (adapt3_obsper_start + adapt3_obsper)) {
		static int rec_int_count;
		if (adapt3_obsper_start == 0) { /* first packet */
			adapt3_obsper_start= packet_time;
			rec_int_count= 0;
			recent_alert_count= 0;
		} else { /* time to update ppi */
			rec_int_count++;
			ppi= tot_packets/(double)rec_int_count;
			adapt3_obsper_start+= adapt3_obsper;
			if (as_debug) printf("End of time period %d: ppi is now %.2f\n",rec_int_count,ppi);
			
			adapt3_target= floor(0.5+ (adapt3_targetspec >= 1 ? adapt3_targetspec*(adapt3_obsper/3600.0) : adapt3_targetspec*ppi));
			if (adapt3_target==0) adapt3_target= 1;
			if (as_debug) printf("new target is %d\n",adapt3_target);
			
			if (completed_obs_per == 0) {
				if (adapt3anoms_size > adapt3_target) { /* remove excess */
					for (i= adapt3_target, prev=adapt3anoms; (i+1) < adapt3anoms_size; i++,prev=prev->next);
					newstart= prev->next;
					prev->next= NULL;
					free_links(adapt3anoms);
					adapt3anoms= newstart;
				}
				do_adapt3();
				obscount= 0;
			}
		}
	}
	
	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	obscount++;
	
	if (obscount > ppi) {
		if (as_debug) {
			printf("%d alerts at end of packet period #%d (of %d)\n",recent_alert_count,completed_obs_per+1,recent_packets);
		}
		do_adapt3();
		obscount= 0;
	}

	/* add anomaly score to list if it is high enough */
	if (adapt3anoms_size <= adapt3_target) {
		new= new_link(last_anom_score);
		adapt3anoms_size++;
	} else if (last_anom_score > adapt3anoms->val) {
		if (last_anom_score < adapt3anoms->next->val) {
			adapt3anoms->val= last_anom_score; /* can just replace first */
			return;
		}
		new= adapt3anoms;
		new->val= last_anom_score;
		adapt3anoms= adapt3anoms->next;
	} else {
		return;
	}
	for (prev= adapt3anoms, next=adapt3anoms->next; next != NULL && last_anom_score > next->val; prev=next,next=next->next);
	/* add between prev and next */
	prev->next= new;
	new->next= next;
}

void do_adapt3() {
	ll_double *l;
	static double obssum= 0; // the sum of all current elements in the array
	double obs_thresh= (adapt3anoms->val + adapt3anoms->next->val)/2;
	int slot;
	
	if (as_debug) printf("observed recent ideal threshold for adapt3 is %.4f\n",obs_thresh);
	
	slot= completed_obs_per % NO;
	completed_obs_per++;
	if (completed_obs_per > NO) obssum-= adapt3hist[slot]; /* kicking a score out */
	adapt3hist[slot]= obs_thresh;
	obssum+= obs_thresh;
	
	if (as_debug > 1) {
		int i;
		printf("adapt3hist= [");
		printf("%.4f",adapt3hist[0]);
		for (i= 1; i < NO && i < completed_obs_per; i++) {
			printf(",%.4f",adapt3hist[i]);
		}
		printf("]\n");
	}
	
	set_new_threshold(obssum/((completed_obs_per >= NO)?NO:completed_obs_per));	
	if (as_debug) printf("new threshold is %.4f\n",report_anom_thres);	
	
	for (l=adapt3anoms; l != NULL; l=l->next)  l->val= 0.0;
	recent_alert_count= 0;
	recent_packets= 0;
}


/*========================================================================*/
/*========================== SpadeSurvey module ==========================*/
/*========================================================================*/

/* This module surveys the anomoly scores observed across periods of time
and reports this to a specified survey file.  The period #, the packet
count, the median score, the 90th percentile score, and the 99th percentile
score are recorded to the file in tab-delinated format.  Interpolation is
used between scores if there is no score at exactly the position implied by
the percentile. */

/* efficiency note:  This use linked list to represent the observed anomoly scores.  While it is necessary to maintain all these scores (the current worst score might end up being the 99th percentile), a different representation (order stat tree?) should be used if the packet count gets high.  */

/* snort config file line:
	preprocessor spade-survey: [ <survey-file> [ <observation-period> ]]
	where:
	  <survey-file> the file to write the survery results to (default is stdout)
	  <observation-period> the interval for the survey in minutes (default 60)
*/

/* global-scope variables used in the survey module */
// the survey log file handle
FILE *survey_log= NULL;
// the list of anomaly scores for the survey
ll_double *survey_list;
// the length of the list (1-based)
int survey_list_len;
// the number of seconds in the survey interval
float survey_interval;
// the suvery period number (starts with 1)
int survey_period;

/* Spade survey module init function:
     set up the survey module per its args and register its preprocessor function */
void SpadeSurveyInit(u_char *args)
{
	pp_active++;
	need_anom= 1;
	
    /* parse the argument list from the rules file */
    ParseSpadeSurveyArgs(args);

    /* Set the preprocessor function into the function list */
    AddFuncToPreprocList(PreprocSpadeSurvey);

	fprintf(survey_log,"%.2f minute interval #\tPacket Count\tMedian Anom\t90th Percentile Anom\t99th Percentile Anom\n",survey_interval/60.0);

	survey_list= NULL;
	survey_list_len= 0;
	survey_period= 1;
	
	if (as_debug) printf("Preprocessor: SpadeSurvey Initialized\n");
}

/* Spade 'spade-survey' argument parsing function  */
void ParseSpadeSurveyArgs(char *args)
{
    char **toks;
    int numToks;

    toks = mSplit(args, " ", 20, &numToks, '\\');
   
	if (numToks > 0) {
    	survey_log= fopen(toks[0],"w");
    	if(!survey_log) FatalError("spp_anomsensor: unable to open %s to record survey",toks[0]);
    } else {
    	survey_log= stdout;
    }
	if (numToks > 1) {
		survey_interval = atof(toks[1])*60.0;
	} else {
		survey_interval= 60*60;
	}
	if (as_debug) printf("seconds of survey interval is %d\n",(int)survey_interval);
}

/* Spade survey module routine that is called with each packet */
void PreprocSpadeSurvey(Packet *p)
{
	size_t packet_time= p->pkth->ts.tv_sec;
	double anom;
	ll_double *new,*prev,*next;
	// the start time for this survey interval
	static time_t survey_interval_start_time=(time_t)0;
	// the number of packets seen in this survey period so far
	static int survey_rec_count= 0;

	while (packet_time > (survey_interval_start_time + survey_interval)) {
		if (survey_interval_start_time == 0) { /* first packet */
			survey_interval_start_time= packet_time;
		} else {
			fprintf(survey_log,"%d\t%d\t%.6f\t%.6f\t%.6f\n",survey_period,survey_rec_count,survey_ostat(0.5),survey_ostat(0.9),survey_ostat(0.99));
			free_links(survey_list);
			survey_list= NULL;
			survey_list_len= 0;
			survey_rec_count=0;
			survey_period++;
			survey_interval_start_time+= survey_interval;
		}
	}

	if (record_maybe_skip(p)) return;
	/* accepted packets only past here; anom score is last_anom_score */
	survey_rec_count++;
	anom= last_anom_score;
	new= new_link(anom);
	
	if (survey_list == NULL) {
		survey_list= new;
		survey_list_len= 1;
	} else {
		if (anom < survey_list->val) { /* add at head */
			new->next= survey_list;
			survey_list= new;
		} else {
			for (prev= survey_list, next=survey_list->next; next != NULL && anom > next->val; prev=next,next=next->next);
			/* add between prev and next */
			prev->next= new;
			new->next= next;	
		}
		survey_list_len++;
	}
}	

double survey_ostat(double loc) {
	ll_double *pos;
	int p;
	double fromnext;
	double posnum;
	
	//printf("loc= %f\n",loc);
	if (survey_list_len == 0) return 0.0;
	posnum= loc*(double)survey_list_len + (1-loc);/* = (survey_list_len-1)*loc+1 */

	for (p= 1, pos=survey_list; p <= posnum; p++,pos=pos->next);
	fromnext= posnum-(double)(p-1);
	if (fromnext == 0 || pos->next == NULL) { /* got it exactly */
		return pos->val;
	} else {
		return (pos->val*(1-fromnext))+(pos->next->val*fromnext);
	}
}

/*********************************************************************/
/*********************************************************************/

int record_maybe_skip(Packet *p) {
	valtype val[NUM_FEATURES];
	ll_net *home;
	size_t packet_time= p->pkth->ts.tv_sec;
	static time_t last_scale=(time_t)0; // the last time the tree was scaled
	
	if (pp_run_on_pkt == pp_active || !pp_run_on_pkt) { /* first time this packet hit */
		while (packet_time - last_scale > SCALE_FREQ) {
			if (last_scale == (size_t)0) { /* this is the first packet */
				last_scale= packet_time;
			} else {
				if (as_debug > 1) printf("scaling by %f at time %d; discarding at %f\n",SCALE_FACTOR,packet_time,MIN_NODE_SIZE);
				scale_and_prune_all_trees(SCALE_FACTOR,MIN_NODE_SIZE);
				last_scale+= SCALE_FREQ;  /* lets pretend we did this right on time */
				if (as_debug > 1) printf("done with scale/prune\n");
			}
		}
		
		skip_packet= p->iph == NULL || p->tcph == NULL || p->iph->ip_proto != IPPROTO_TCP || p->tcph->th_flags != 2;  /* is this a TCP SYN? */
		if (!skip_packet && homelist != NULL) {
			skip_packet= 1; /* skip unless is in a homenet */
			for (home= homelist; home != NULL; home=home->next) {
				if ((p->iph->ip_dst.s_addr & home->netmask) == home->netaddr) {
					skip_packet= 0;
					break;
				}
			}
		}
		if (skip_packet) return 1;

		record_packet(p,val);
		pp_run_on_pkt= 1;
		last_pkt_time= packet_time;
	
		if (as_debug && (tot_packets % 10000) == 0) {
			printf("packet # %d has been added\n",tot_packets);
		}
		if ((tot_packets % checkpoint_freq) == 0) {
			if (strcmp(statefile,"0")) checkpoint(statefile);
		}

		if (need_anom) {
			last_anom_score= calc_anom(val);
			//printf("last_anom_score=%f\n",last_anom_score);
		}
	} else {
		pp_run_on_pkt++;
		if (skip_packet) return 1;
	}
	
	return 0;
}


double calc_anom(valtype val[]) {
	double prob;
	if (!parts) {
		features fl[]= {DIP,DPORT,SIP,SPORT};
		features vl[]= {val[DIP],val[DPORT],val[SIP],val[SPORT]};
		if (prob_mode == 0) {
			prob= prob_simple(DPORT,val[DPORT]) *  /* P(dport) */
				prob_cond2(SIP,val[SIP],DPORT,val[DPORT],SPORT,val[SPORT]) *  /* P(sip|dport,sport) */
				prob_cond1(SPORT,val[SPORT],DPORT,val[DPORT]) *  /* P(sport|dport) */
				prob_cond2(DIP,val[DIP],SPORT,val[SPORT],SIP,val[SIP]);  /* P(dip|sport,sip) */
			return -1*(log(prob)/LOG2);
		} else if (prob_mode == 1) {
			return -1.0*log((double)prob_Njoint(4,fl,vl)/LOG2);
		} else if (prob_mode == 2) {
			return -1.0*log((double)prob_Njoint(3,fl,vl)/LOG2);
		} else if (prob_mode == 3) {
			return -1.0*log((double)prob_2joint(DIP,val[DIP],DPORT,val[DPORT])/LOG2);
		} 
		return 9999999.0;
	} else return 999999.0;
}

void record_packet(Packet *p,valtype val[]) {
	recent_packets++;
	tot_packets++;
	val[SIP]= p->iph->ip_src.s_addr;
	val[DIP]= p->iph->ip_dst.s_addr;
	val[SPORT]= p->sp;
	val[DPORT]= p->dp;
	//val[TTL]= p->iph->ip_ttl;
	//val[WIN] = p->tcph->th_win;
	
	if (as_debug > 2) printf("adding %s, %s, %d, %d\n",inet_ntoa(p->iph->ip_src),inet_ntoa(p->iph->ip_dst),val[SPORT],val[DPORT]);
	if (parts) {
		if (part == 0) {
			/* full all at once */
			
			/* record needed conditional probabilities */
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],0);
			increment_4joint_count(DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],0);
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
			increment_4joint_count(SIP,val[SIP],SPORT,val[SPORT],DPORT,val[DPORT],DIP,val[DIP],1);
			
			/*increment_3joint_count(SIP,val[SIP],DIP,val[DIP],DPORT,val[DPORT],3);*/
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],DIP,val[DIP],1);
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],1);

			increment_3joint_count(SIP,val[SIP],SPORT,val[SPORT],DIP,val[DIP],2);
			/*increment_3joint_count(SIP,val[SIP],DIP,val[DIP],SPORT,val[SPORT],3);*/
			increment_3joint_count(DIP,val[DIP],SPORT,val[SPORT],SIP,val[SIP],2);

			/*increment_3joint_count(SIP,val[SIP],SPORT,val[SPORT],DPORT,val[DPORT],3);*/
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
			increment_3joint_count(SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],0);

			increment_2joint_count(DIP,val[DIP],SIP,val[SIP],1);
			increment_2joint_count(SPORT,val[SPORT],SIP,val[SIP],1);
			increment_2joint_count(DPORT,val[DPORT],SIP,val[SIP],0);
			/*increment_2joint_count(SIP,val[SIP],DIP,val[DIP],2);*/
			increment_2joint_count(SPORT,val[SPORT],DIP,val[DIP],1);
			increment_2joint_count(DPORT,val[DPORT],DIP,val[DIP],1);
			/*increment_2joint_count(SIP,val[SIP],SPORT,val[SPORT],2);*/
			/*increment_2joint_count(DIP,val[DIP],SPORT,val[SPORT],2);*/
			increment_2joint_count(DPORT,val[DPORT],SPORT,val[SPORT],1);
			/*increment_2joint_count(SIP,val[SIP],DPORT,val[DPORT],2);*/
			/*increment_2joint_count(DIP,val[DIP],DPORT,val[DPORT],2);*/
			/*increment_2joint_count(SPORT,val[SPORT],DPORT,val[DPORT],2);*/
		} else if (part == 1) {
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],0);
			increment_4joint_count(SIP,val[SIP],DIP,val[DIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
		} else if (part == 2) {
			increment_4joint_count(SIP,val[SIP],SPORT,val[SPORT],DPORT,val[DPORT],DIP,val[DIP],0);
			increment_3joint_count(SIP,val[SIP],SPORT,val[SPORT],DIP,val[DIP],2);
		} else if (part == 3) {
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],DIP,val[DIP],0);
			increment_3joint_count(SIP,val[SIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
		} else if (part == 4) {
			increment_2joint_count(DIP,val[DIP],SIP,val[SIP],0);
			increment_3joint_count(DIP,val[DIP],SPORT,val[SPORT],SIP,val[SIP],1);
			increment_4joint_count(DIP,val[DIP],SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],2);
		} else if (part == 5) {
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],0);
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SPORT,val[SPORT],2);
		} else if (part == 6) {
			increment_2joint_count(SPORT,val[SPORT],DIP,val[DIP],0);
		} else if (part == 7) {
			increment_3joint_count(SPORT,val[SPORT],DPORT,val[DPORT],SIP,val[SIP],0);
			increment_3joint_count(SPORT,val[SPORT],DPORT,val[DPORT],DIP,val[DIP],2);
		} else if (part == 8) {
			increment_3joint_count(DPORT,val[DPORT],SIP,val[SIP],DIP,val[DIP],0);
			increment_2joint_count(DPORT,val[DPORT],DIP,val[DIP],1);
			increment_2joint_count(DPORT,val[DPORT],SPORT,val[SPORT],1);
		} else if (part == 9) {
			increment_2joint_count(SIP,val[SIP],DIP,val[DIP],0);
			increment_2joint_count(SIP,val[SIP],SPORT,val[SPORT],1);
			increment_2joint_count(SIP,val[SIP],DPORT,val[DPORT],1);
			increment_2joint_count(DIP,val[DIP],SIP,val[SIP],0);
			increment_2joint_count(DIP,val[DIP],SPORT,val[SPORT],1);
			increment_2joint_count(DIP,val[DIP],DPORT,val[DPORT],1);
			increment_2joint_count(SPORT,val[SPORT],SIP,val[SIP],0);
			increment_2joint_count(SPORT,val[SPORT],DIP,val[DIP],1);
			increment_2joint_count(SPORT,val[SPORT],DPORT,val[DPORT],1);
			increment_2joint_count(DPORT,val[DPORT],SIP,val[SIP],0);
			increment_2joint_count(DPORT,val[DPORT],DIP,val[DIP],1);
			increment_2joint_count(DPORT,val[DPORT],SPORT,val[SPORT],1);
		} /*else if (part == 10) {
			increment_2joint_count(TTL,val[TTL],DIP,val[DIP],0);
			increment_2joint_count(TTL,val[TTL],SPORT,val[SPORT],1);
			increment_2joint_count(TTL,val[TTL],DPORT,val[DPORT],1);
			increment_2joint_count(TTL,val[TTL],SIP,val[SIP],1);
			increment_2joint_count(DIP,val[DIP],TTL,val[TTL],0);
			increment_2joint_count(SPORT,val[SPORT],TTL,val[TTL],0);
			increment_2joint_count(DPORT,val[DPORT],TTL,val[TTL],0);
			increment_2joint_count(SIP,val[SIP],TTL,val[TTL],0);
		} else if (part == 11) {
			increment_2joint_count(WIN,val[WIN],DIP,val[DIP],0);
			increment_2joint_count(WIN,val[WIN],SPORT,val[SPORT],1);
			increment_2joint_count(WIN,val[WIN],DPORT,val[DPORT],1);
			increment_2joint_count(WIN,val[WIN],SIP,val[SIP],1);
			increment_2joint_count(WIN,val[WIN],TTL,val[TTL],1);
			increment_2joint_count(DIP,val[DIP],WIN,val[WIN],0);
			increment_2joint_count(SPORT,val[SPORT],WIN,val[WIN],0);
			increment_2joint_count(DPORT,val[DPORT],WIN,val[WIN],0);
			increment_2joint_count(SIP,val[SIP],WIN,val[WIN],0);
			increment_2joint_count(TTL,val[TTL],WIN,val[WIN],0);
		}*/
	} else {
		if (prob_mode == 0) {
			increment_3joint_count(SPORT,val[SPORT],SIP,val[SIP],DIP,val[DIP],0);
			increment_3joint_count(DPORT,val[DPORT],SPORT,val[SPORT],SIP,val[SIP],0);
		} else if (prob_mode == 1) {
			increment_4joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],SPORT,val[SPORT],0);
		} else if (prob_mode == 2) {
			increment_3joint_count(DIP,val[DIP],DPORT,val[DPORT],SIP,val[SIP],0);
		} else if (prob_mode == 3) {
			increment_2joint_count(DIP,val[DIP],DPORT,val[DPORT],0);
		} 
	}
}


void set_new_threshold(double t) {
	char logMessage[85];
	
	report_anom_thres= t;
	sprintf(logMessage,"spp_anomsensor: Threshold adjusted to %.4f after %d alerts (of %d)",report_anom_thres,recent_alert_count,recent_packets);
	(*AlertFunc)(NULL, logMessage);
}

/**********************************************************
 * Called on signals
 *****************************************************/
void SpadeCatchSig(int signal,void *arg) {
	if (signal == SIGQUIT || signal == SIGHUP || signal == SIGUSR1) {
		CleanUpSpade(signal);
	}
}

void CleanUpSpade(int signal) 
{
	featcomb H;
    FILE *file;
    
    if (!tot_packets) return;
    
    if (strcmp(statefile,"0")) checkpoint(statefile);
    
    if (!strcmp(outfile,"-")) {
    	file= stdout;
    } else {
	    file = fopen(outfile, "w");
    	if(!file) FatalError("spp_anomsensor: unable to open %s",outfile);
    }

	fprintf(file,"%d packets recorded\n",tot_packets);
	if (alert_count > 0) fprintf(file,"%d packets reported as alerts\n",alert_count);
	
	if (tl_obs_size && top_anom_list_size > 1 && last_pkt_time-obs_start_time>0) {
		ll_double *n;
		double obs_hours= (last_pkt_time-obs_start_time)/3600.0;
		fprintf(file,"Threshold learning results: top %d anomaly scores over %.5f hours\n",top_anom_list_size-1,obs_hours);
		fprintf(file,"  Suggested threshold based on observation: %.6f\n",(top_anom_list->val+top_anom_list->next->val)/2);
		fprintf(file,"  Top scores: %.5f",top_anom_list->next->val);
		for (n=top_anom_list->next->next; n != NULL; n=n->next) {
			fprintf(file,",%.5f",n->val);
		}
		fprintf(file,"\n  First runner up is %.5f, so use threshold between %.5f and %.5f for %.3f packets/hr\n",top_anom_list->val,top_anom_list->val,top_anom_list->next->val,(top_anom_list_size/obs_hours));
	}

	if (print_entropy) {
		H= calc_all_entropies();
		write_all_entropies(file,H);
	}
	if (print_uncondprob) write_all_uncond_probs(file);
	if (print_condprob) write_all_cond_probs(file);
	
	if (file != stdout) {
		fclose(file);
	}
	
	if (survey_log != NULL) {
		fflush(survey_log);
	}
}


/* creation and recylcling routines for ll_double's */
ll_double *free_link_list=NULL;

ll_double *new_link(double val) {
	ll_double *link;
	if (free_link_list != NULL) {
		link= free_link_list;
		free_link_list= link->next;
	} else {
		link= (ll_double *)malloc(sizeof(ll_double));
	}
	link->val= val;
	link->next= NULL;
	return link;
}

void free_links(ll_double *start) {
	ll_double *end,*next;
	for (end= start, next=start->next; next != NULL; end=next,next=next->next);
	end->next= free_link_list;
	free_link_list= start;
}

/* creation and recylcling routines for dll_double's */
dll_double *free_dlink_list= NULL;

dll_double *new_dlink(double val) {
	dll_double *link;
	if (free_dlink_list != NULL) {
		link= free_dlink_list;
		free_dlink_list= link->next;
	} else {
		link= (dll_double *)malloc(sizeof(dll_double));
	}
	link->val= val;
	link->prev= NULL;
	link->next= NULL;
	return link;
}

void free_dlinks(dll_double *start) {
	dll_double *end;
	for (end= start; end->next != NULL; end=end->next);
	end->next= free_dlink_list;
	free_dlink_list= start;
}
