You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
3.2 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. /*
  2. * massurl.c
  3. *
  4. * Created by Yigit Colakoglu on 07/06/2021.
  5. * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved.
  6. */
  7. #include "strings.h"
  8. #include "tree.h"
  9. #include "urlparse.h"
  10. #include <time.h>
  11. #include <string.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #define MAXURL 100000
  15. #define MAXPAYLOAD 10000
  16. static void usage(void) {
  17. fputs("\
  18. usage: massurl [-r] [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file\n", stderr);
  19. exit(1);
  20. }
  21. TreeNode *root = NULL;
  22. int main(int argc, char *argv[]) {
  23. FILE *fin = stdin, *fout = stdout, *payloads = NULL;
  24. char *param, urlstr[MAXURL], payload[MAXPAYLOAD];
  25. int minparamn, verbose = 0, npayloads = 1;
  26. int randpayloads = 0;
  27. time_t begin = time(NULL);
  28. unsigned long lines, errors = 0;
  29. while (--argc > 0) {
  30. param = *++argv;
  31. if (param[0] == '-') {
  32. param++;
  33. switch (*param) {
  34. case 'o':
  35. if ((fout = fopen(*++argv, "w")) == NULL) {
  36. fprintf(stderr, "Can't open output file for writing.\n");
  37. return 1;
  38. }
  39. if (ferror(fout)) {
  40. fprintf(stderr, "Can't open output file for writing.\n");
  41. return 1;
  42. }
  43. break;
  44. case 'n':
  45. minparamn = atoi(*++argv);
  46. argc--;
  47. break;
  48. case 'v':
  49. verbose = 1;
  50. break;
  51. case 'r':
  52. randpayloads = 1;
  53. break;
  54. case 'h':
  55. usage();
  56. break;
  57. case 'p':
  58. if ((payloads = fopen(*++argv, "r")) == NULL) {
  59. fprintf(stderr, "Can't open payload file for reading.\n");
  60. return 1;
  61. }
  62. if (ferror(fout)) {
  63. fprintf(stderr, "Can't open payload file for reading.\n");
  64. return 1;
  65. }
  66. break;
  67. default:
  68. fprintf(stderr, "Parameter -%c does not exist!\n", *param);
  69. usage();
  70. }
  71. } else {
  72. if ((fin = fopen(param, "r")) == NULL) {
  73. fprintf(stderr, "Can't open file %s\n", param);
  74. return 1;
  75. }
  76. }
  77. }
  78. printf("v:%d r:%d\n", verbose, randpayloads);
  79. URL *url;
  80. while (fgets(urlstr, MAXURL, fin) != NULL) {
  81. lines++;
  82. if ((url = parseurl(urlstr)) == NULL) {
  83. errors++;
  84. if (verbose)
  85. fprintf(stderr, "Malformed URL %s", urlstr);
  86. continue;
  87. }
  88. TreeNode *newnode = treealloc();
  89. newnode->path = url->base;
  90. newnode->params = url->params;
  91. newnode->parent = NULL;
  92. newnode->left = newnode->right = NULL;
  93. newnode->nparams = url->nparams;
  94. newnode->red = 1; /* Always color new nodes red */
  95. root = addtree(root, newnode);
  96. balancetree(root, newnode);
  97. }
  98. if ( randpayloads )
  99. printtree(root, fout, NULL, minparamn);
  100. else if ( payloads == NULL )
  101. printtree(root, fout, "%s", minparamn);
  102. if ( payloads ) {
  103. while (fgets(payload, MAXPAYLOAD, payloads) != NULL) {
  104. npayloads++;
  105. for(int i=0; i<strlen(payload); i++){
  106. if(*(payload+i) == '\n'){
  107. *(payload+i)='\0';
  108. break;
  109. }
  110. }
  111. printtree(root, fout, payload, minparamn);
  112. minparamn = (minparamn) ? minparamn : 1;
  113. }
  114. }
  115. time_t end = time(NULL);
  116. fprintf(stderr, "%lu urls processeed in %d seconds\nGenerated %lu urls\nSkipped %lu malformed urls\n", lines, (end-begin), npayloads*lines, errors);
  117. return 0;
  118. }