diff --git a/README.md b/README.md index 9237650..58c0097 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,37 @@ # massurl -massurl is a simple tool that aims to parse the outputs of tools like gau, and extract the parameters for each URL, remove duplicates and do it all very quickly. Because web scraping tools' outputs can get very large very quickly, it is nice to have a tool that parses them and and outputs something clean and easy to read. + +massurl is a simple tool that aims to parse the outputs of tools like gau, and +extract the parameters for each URL, remove duplicates and do it all very +quickly. Because web scraping tools' outputs can get very large very quickly, +it is nice to have a tool that parses them and and outputs something clean and +easy to read. ## How to use? -Simply clone the git repository and run `make` which outputs the binary *massurl*. You can then simply pipe the output of any command that outputs urls into it or pass the filename where you want it to read the urls from. It expects each line to have only one url. It has several parameters: -``` -usage: massurl [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file +Simply clone the git repository and run `make` which outputs the binary +*massurl*. You can then simply pipe the output of any command that outputs urls +into it or pass the filename where you want it to read the urls from. It +expects each line to have only one url. It has several parameters: + +``` sh +usage: massurl [-r] [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file ``` -You can specify an output file, which it will write instead of stdout, you can also give it a list of payloads which massurl will automatically enter as the values for each parameter. And finally, you can specify the minimum amount of parameters a url must have to be outputted, this value is zero by default but I recommend you use 1. +You can specify an output file, which it will write instead of stdout, you can +also give it a list of payloads which massurl will automatically enter as the +values for each parameter. If you are testing for reflected values, in +parameters, you can put a pseudorandom value in each param using the flag -r. +And finally, you can specify the minimum amount of parameters a url must have +to be outputted, this value is zero by default but I recommend you use 1. ## How fast is it? -The tool uses a binary tree to store the urls and keeps it balanced using the red-black self balancing tree algorithm, which allows it to run at incredible speeds. + +The tool uses a binary tree to store the urls and keeps it balanced using the +red-black self balancing tree algorithm, which allows it to run at incredible +speeds. ## Contributing -This is a very simple project so you shouldn't have trouble reading the code and fixing the bugs you encounter. If you do so, feel free to send a PR. Or, if you can't seem to fix it yourself, don't be shy and open an issue! + +This is a very simple project so you shouldn't have trouble reading the code +and fixing the bugs you encounter. If you do so, feel free to send a PR. Or, if +you can't seem to fix it yourself, don't be shy and open an issue! diff --git a/linkedlist.c b/linkedlist.c index b6f2f2e..c175fc3 100644 --- a/linkedlist.c +++ b/linkedlist.c @@ -10,6 +10,8 @@ #include #include +#define RANDLEN 6 + LinkedList *linkedlistalloc(void){ return (LinkedList *) malloc(sizeof(LinkedList)); @@ -36,10 +38,30 @@ LinkedList *linkedlistadd(LinkedList *p, char *data){ return p; } + +char rstr[RANDLEN+1]; + +char *randstr(){ + char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + int n = RANDLEN; + while((--n) > -1){ + size_t index = (double) rand()/RAND_MAX * (sizeof charset - 1); + rstr[n] = charset[index]; + } + return rstr; +} + void linkedlistprint(LinkedList *p, FILE *out, char* payload){ + int random = 0; + if(!payload){ + random = 1; + payload = randstr(); + } if(p != NULL){ (p->data == NULL) ? fprintf(out, "NULL=NULL") : fprintf(out, "%s=%s", p->data, payload); (p->next == NULL) ? : fprintf(out, "%c",'&'); + if(random) + payload = NULL; linkedlistprint(p->next, out, payload); } } diff --git a/massurl b/massurl index 1533ca1..b3b99bf 100755 Binary files a/massurl and b/massurl differ diff --git a/massurl.c b/massurl.c index 622e5b1..63f1d49 100644 --- a/massurl.c +++ b/massurl.c @@ -16,12 +16,11 @@ #define MAXPAYLOAD 10000 static void usage(void) { - fputs("usage: massurl [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file\n", stderr); +fputs("\ +usage: massurl [-r] [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file\n", stderr); exit(1); } -enum outformat { PLAIN = 01 }; - TreeNode *root = NULL; int main(int argc, char *argv[]) { @@ -29,6 +28,7 @@ int main(int argc, char *argv[]) { FILE *fin = stdin, *fout = stdout, *payloads = NULL; char *param, urlstr[MAXURL], payload[MAXPAYLOAD]; int minparamn, verbose = 0, npayloads = 1; + int randpayloads = 0; time_t begin = time(NULL); unsigned long lines, errors = 0; @@ -36,7 +36,6 @@ int main(int argc, char *argv[]) { param = *++argv; if (param[0] == '-') { param++; - argc--; switch (*param) { case 'o': if ((fout = fopen(*++argv, "w")) == NULL) { @@ -50,10 +49,14 @@ int main(int argc, char *argv[]) { break; case 'n': minparamn = atoi(*++argv); + argc--; break; case 'v': verbose = 1; break; + case 'r': + randpayloads = 1; + break; case 'h': usage(); break; @@ -78,6 +81,7 @@ int main(int argc, char *argv[]) { } } } + printf("v:%d r:%d\n", verbose, randpayloads); URL *url; while (fgets(urlstr, MAXURL, fin) != NULL) { @@ -88,24 +92,22 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Malformed URL %s", urlstr); continue; } - - if (url->nparams >= minparamn) { - TreeNode *newnode = treealloc(); - newnode->path = url->base; - newnode->params = url->params; - newnode->parent = NULL; - newnode->left = newnode->right = NULL; - newnode->nparams = url->nparams; - newnode->red = 1; /* Always color new nodes red */ - root = addtree(root, newnode); - balancetree(root, newnode); - } + TreeNode *newnode = treealloc(); + newnode->path = url->base; + newnode->params = url->params; + newnode->parent = NULL; + newnode->left = newnode->right = NULL; + newnode->nparams = url->nparams; + newnode->red = 1; /* Always color new nodes red */ + root = addtree(root, newnode); + balancetree(root, newnode); } - int printzeros = 0; - if (payloads == NULL) - printtree(root, fout, "%s", 0); - else { + if ( randpayloads ) + printtree(root, fout, NULL, minparamn); + else if ( payloads == NULL ) + printtree(root, fout, "%s", minparamn); + if ( payloads ) { while (fgets(payload, MAXPAYLOAD, payloads) != NULL) { npayloads++; for(int i=0; ileft, out, payload, minparams); if(root->nparams >= minparams){ - fprintf(out, "%s?", root->path); + fprintf(out, "%s", root->path); + (!root->nparams) ? : fprintf(out, "%c",'?'); linkedlistprint(root->params, out, payload); fprintf(out, "%c", '\n'); } diff --git a/urlparse.c b/urlparse.c index 2ca6ff1..8b2701c 100644 --- a/urlparse.c +++ b/urlparse.c @@ -59,9 +59,7 @@ URL *parseurl(char *url) { while(*url != '&' && *url != '\0' && *url != '\n') url++; url++; - } else { - return NULL; - } + } break; default: