Skip to content

Commit fae854f

Browse files
committed
added options to control random sampling
1 parent 56b737c commit fae854f

File tree

1 file changed

+56
-11
lines changed

1 file changed

+56
-11
lines changed

src/frag_find.cpp

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ void usage()
236236
printf(" -X1 - do not use bloom filter or prefilter\n");
237237
printf(" -X2 - do not use prefilter\n");
238238
printf(" -xfname.xml - output digital forensics XML file to fname.xml\n");
239+
printf(" -R <frac>[:<pass>] - use <frac> as random sampling fraction (default is 1) and <pass> as number of passes (default is 1)\n");
239240
exit(1);
240241
}
241242

@@ -498,9 +499,44 @@ void masters_t::read_md5deep(const char *fn)
498499
}
499500
}
500501

502+
//RANDOM SAMPLING START
503+
std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
504+
std::stringstream ss(s);
505+
std::string item;
506+
while (std::getline(ss, item, delim)) {
507+
elems.push_back(item);
508+
}
509+
return elems;
510+
}
511+
std::vector<std::string> split(const std::string &s, char delim) {
512+
std::vector<std::string> elems;
513+
split(s, delim, elems);
514+
return elems;
515+
}
516+
int sampling_passes = 1;
517+
double sampling_fraction = 1;
518+
void set_sampling_parameters(const std::string &p){
519+
std::vector<std::string> params = split(p,':');
520+
if(params.size()!=1 && params.size()!=2){
521+
errx(1,"error: sampling parameters must be fraction[:passes]");
522+
}
523+
sampling_fraction = atof(params.at(0).c_str());
524+
if(sampling_fraction<=0 || sampling_fraction>=1){
525+
errx(1,"error: sampling fraction f must be 0<f<=1; you provided '%s'",params.at(0).c_str());
526+
}
527+
if(params.size()==2){
528+
sampling_passes = atoi(params.at(1).c_str());
529+
if(sampling_passes==0){
530+
errx(1,"error: sampling passes must be >=1; you provided '%s'",params.at(1).c_str());
531+
}
532+
}
533+
}
534+
bool sampling(){return sampling_fraction<1.0;}
535+
501536
#ifndef HAVE_RANDOM
502537
#define random(x) rand(x)
503538
#endif
539+
//RANDOM SAMPLING END
504540

505541
int main(int argc,char **argv)
506542
{
@@ -513,6 +549,7 @@ int main(int argc,char **argv)
513549
uint64_t bloom_false_positives=0;
514550
class xml *x = 0;
515551
string command_line;
552+
std::string opt_sampling_params;
516553

517554
/* Make a copy of the command line */
518555
for(int i=0;i<argc;i++){
@@ -522,7 +559,7 @@ int main(int argc,char **argv)
522559

523560
prefilter_t prefilter; // bitset to hold first 3 bytes of block
524561

525-
while ((ch = getopt(argc,argv,"b:e:hM:m:Ss:rx:X:?")) != -1){
562+
while ((ch = getopt(argc,argv,"b:e:hM:m:Ss:rx:X:R:?")) != -1){
526563
switch(ch){
527564
case 's': opt_start = atoi64(optarg);break;
528565
case 'e': opt_end = atoi64(optarg);break;
@@ -531,6 +568,7 @@ int main(int argc,char **argv)
531568
case 'S': opt_stats++;break;
532569
case 'M': opt_M = atoi(optarg); break;
533570
case 'm': masters.read_md5deep(optarg); break;
571+
case 'R': opt_sampling_params = optarg; break;
534572
case 'X':
535573
switch(atoi(optarg)){
536574
case 1: use_bloom = 0;break;
@@ -596,19 +634,25 @@ int main(int argc,char **argv)
596634
timer.start();
597635
u_char *buf = (u_char *)malloc(blocksize);
598636

599-
//RANDOM SAMPLING
600-
bool r_s = true;
637+
//RANDOM SAMPLING START
638+
if(opt_sampling_params.size()>0) set_sampling_parameters(opt_sampling_params);
601639
/* Create a list of blocks to sample */
602640
srand(time(NULL));
603641
std::set<uint64_t> blocks_to_sample;
604-
double sampling_fraction = 0.05;
605642
uint64_t nblocks = imagefile.blocks;
606-
while(blocks_to_sample.size() < nblocks * sampling_fraction){
607-
uint64_t blk_high = ((uint64_t)random()) << 32;
608-
uint64_t blk_low = random();
609-
uint64_t blk = (blk_high | blk_low) % nblocks;
610-
blocks_to_sample.insert(blk); // will be added even if already present
643+
int at_pass = 0;
644+
if (sampling()) {
645+
while(at_pass < sampling_passes) {
646+
at_pass++;
647+
while(blocks_to_sample.size() < nblocks * sampling_fraction * at_pass){
648+
uint64_t blk_high = ((uint64_t)random()) << 32;
649+
uint64_t blk_low = random();
650+
uint64_t blk = (blk_high | blk_low) % nblocks;
651+
blocks_to_sample.insert(blk); // will be added even if already present
652+
}
653+
}
611654
}
655+
//RANDOM SAMPLING END
612656

613657
for(uint64_t blocknumber=opt_start;blocknumber < opt_end && blocknumber < imagefile.blocks; blocknumber++){
614658
/* If this is one of the 100,000 even blocks, print status info */
@@ -628,9 +672,10 @@ int main(int argc,char **argv)
628672
fflush(stdout);
629673
}
630674

631-
//RANDOM SAMPLING
675+
//RANDOM SAMPLING START
632676
/* Limit search to the random samples */
633-
if (r_s && blocks_to_sample.find(blocknumber) == blocks_to_sample.end()) continue;
677+
if (sampling() && blocks_to_sample.find(blocknumber) == blocks_to_sample.end()) continue;
678+
//RANDOM SAMPLING END
634679

635680
/* Scan through the input file block-by-block*/
636681
if(imagefile.getblock(blocknumber,buf)<0){

0 commit comments

Comments
 (0)