@@ -236,6 +236,7 @@ void usage()
236236 printf (" -X1 - do not use bloom filter or prefilter\n " );
237237 printf (" -X2 - do not use prefilter\n " );
238238 printf (" -xfname.xml - output digital forensics XML file to fname.xml\n " );
239+ printf (" -R <frac>[:<pass>] - use <frac> as random sampling fraction (default is 1) and <pass> as number of passes (default is 1)\n " );
239240 exit (1 );
240241}
241242
@@ -498,9 +499,44 @@ void masters_t::read_md5deep(const char *fn)
498499 }
499500}
500501
502+ // RANDOM SAMPLING START
503+ std::vector<std::string> &split (const std::string &s, char delim, std::vector<std::string> &elems) {
504+ std::stringstream ss (s);
505+ std::string item;
506+ while (std::getline (ss, item, delim)) {
507+ elems.push_back (item);
508+ }
509+ return elems;
510+ }
511+ std::vector<std::string> split (const std::string &s, char delim) {
512+ std::vector<std::string> elems;
513+ split (s, delim, elems);
514+ return elems;
515+ }
516+ int sampling_passes = 1 ;
517+ double sampling_fraction = 1 ;
518+ void set_sampling_parameters (const std::string &p){
519+ std::vector<std::string> params = split (p,' :' );
520+ if (params.size ()!=1 && params.size ()!=2 ){
521+ errx (1 ," error: sampling parameters must be fraction[:passes]" );
522+ }
523+ sampling_fraction = atof (params.at (0 ).c_str ());
524+ if (sampling_fraction<=0 || sampling_fraction>=1 ){
525+ errx (1 ," error: sampling fraction f must be 0<f<=1; you provided '%s'" ,params.at (0 ).c_str ());
526+ }
527+ if (params.size ()==2 ){
528+ sampling_passes = atoi (params.at (1 ).c_str ());
529+ if (sampling_passes==0 ){
530+ errx (1 ," error: sampling passes must be >=1; you provided '%s'" ,params.at (1 ).c_str ());
531+ }
532+ }
533+ }
534+ bool sampling (){return sampling_fraction<1.0 ;}
535+
501536#ifndef HAVE_RANDOM
502537#define random (x ) rand(x)
503538#endif
539+ // RANDOM SAMPLING END
504540
505541int main (int argc,char **argv)
506542{
@@ -513,6 +549,7 @@ int main(int argc,char **argv)
513549 uint64_t bloom_false_positives=0 ;
514550 class xml *x = 0 ;
515551 string command_line;
552+ std::string opt_sampling_params;
516553
517554 /* Make a copy of the command line */
518555 for (int i=0 ;i<argc;i++){
@@ -522,7 +559,7 @@ int main(int argc,char **argv)
522559
523560 prefilter_t prefilter; // bitset to hold first 3 bytes of block
524561
525- while ((ch = getopt (argc,argv," b:e:hM:m:Ss:rx:X:?" )) != -1 ){
562+ while ((ch = getopt (argc,argv," b:e:hM:m:Ss:rx:X:R: ?" )) != -1 ){
526563 switch (ch){
527564 case ' s' : opt_start = atoi64 (optarg);break ;
528565 case ' e' : opt_end = atoi64 (optarg);break ;
@@ -531,6 +568,7 @@ int main(int argc,char **argv)
531568 case ' S' : opt_stats++;break ;
532569 case ' M' : opt_M = atoi (optarg); break ;
533570 case ' m' : masters.read_md5deep (optarg); break ;
571+ case ' R' : opt_sampling_params = optarg; break ;
534572 case ' X' :
535573 switch (atoi (optarg)){
536574 case 1 : use_bloom = 0 ;break ;
@@ -596,19 +634,25 @@ int main(int argc,char **argv)
596634 timer.start ();
597635 u_char *buf = (u_char *)malloc (blocksize);
598636
599- // RANDOM SAMPLING
600- bool r_s = true ;
637+ // RANDOM SAMPLING START
638+ if (opt_sampling_params. size ()> 0 ) set_sampling_parameters (opt_sampling_params) ;
601639 /* Create a list of blocks to sample */
602640 srand (time (NULL ));
603641 std::set<uint64_t > blocks_to_sample;
604- double sampling_fraction = 0.05 ;
605642 uint64_t nblocks = imagefile.blocks ;
606- while (blocks_to_sample.size () < nblocks * sampling_fraction){
607- uint64_t blk_high = ((uint64_t )random ()) << 32 ;
608- uint64_t blk_low = random ();
609- uint64_t blk = (blk_high | blk_low) % nblocks;
610- blocks_to_sample.insert (blk); // will be added even if already present
643+ int at_pass = 0 ;
644+ if (sampling ()) {
645+ while (at_pass < sampling_passes) {
646+ at_pass++;
647+ while (blocks_to_sample.size () < nblocks * sampling_fraction * at_pass){
648+ uint64_t blk_high = ((uint64_t )random ()) << 32 ;
649+ uint64_t blk_low = random ();
650+ uint64_t blk = (blk_high | blk_low) % nblocks;
651+ blocks_to_sample.insert (blk); // will be added even if already present
652+ }
653+ }
611654 }
655+ // RANDOM SAMPLING END
612656
613657 for (uint64_t blocknumber=opt_start;blocknumber < opt_end && blocknumber < imagefile.blocks ; blocknumber++){
614658 /* If this is one of the 100,000 even blocks, print status info */
@@ -628,9 +672,10 @@ int main(int argc,char **argv)
628672 fflush (stdout);
629673 }
630674
631- // RANDOM SAMPLING
675+ // RANDOM SAMPLING START
632676 /* Limit search to the random samples */
633- if (r_s && blocks_to_sample.find (blocknumber) == blocks_to_sample.end ()) continue ;
677+ if (sampling () && blocks_to_sample.find (blocknumber) == blocks_to_sample.end ()) continue ;
678+ // RANDOM SAMPLING END
634679
635680 /* Scan through the input file block-by-block*/
636681 if (imagefile.getblock (blocknumber,buf)<0 ){
0 commit comments