You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
3.2 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. /*
  2. * massurl.c
  3. *
  4. * Created by Yigit Colakoglu on 07/06/2021.
  5. * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved.
  6. */
  7. #include "strings.h"
  8. #include "tree.h"
  9. #include "urlparse.h"
  10. #include <time.h>
  11. #include <string.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #define MAXURL 100000
  15. #define MAXPAYLOAD 10000
  16. static void usage(void) {
  17. fputs("\
  18. usage: massurl [-r] [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file\n", stderr);
  19. exit(1);
  20. }
  21. TreeNode *root = NULL;
  22. int main(int argc, char *argv[]) {
  23. FILE *fin = stdin, *fout = stdout, *payloads = NULL;
  24. char *param, urlstr[MAXURL], payload[MAXPAYLOAD];
  25. int minparamn, verbose = 0, npayloads = 1;
  26. int randpayloads = 0;
  27. time_t begin = time(NULL);
  28. unsigned long lines, errors = 0;
  29. while (--argc > 0) {
  30. param = *++argv;
  31. if (param[0] == '-') {
  32. param++;
  33. switch (*param) {
  34. case 'o':
  35. if ((fout = fopen(*++argv, "w")) == NULL) {
  36. fprintf(stderr, "Can't open output file for writing.\n");
  37. return 1;
  38. }
  39. if (ferror(fout)) {
  40. fprintf(stderr, "Can't open output file for writing.\n");
  41. return 1;
  42. }
  43. break;
  44. case 'n':
  45. minparamn = atoi(*++argv);
  46. argc--;
  47. break;
  48. case 'v':
  49. verbose = 1;
  50. break;
  51. case 'r':
  52. randpayloads = 1;
  53. break;
  54. case 'h':
  55. usage();
  56. break;
  57. case 'p':
  58. if ((payloads = fopen(*++argv, "r")) == NULL) {
  59. fprintf(stderr, "Can't open payload file for reading.\n");
  60. return 1;
  61. }
  62. if (ferror(fout)) {
  63. fprintf(stderr, "Can't open payload file for reading.\n");
  64. return 1;
  65. }
  66. break;
  67. default:
  68. fprintf(stderr, "Parameter -%c does not exist!\n", *param);
  69. usage();
  70. }
  71. } else {
  72. if ((fin = fopen(param, "r")) == NULL) {
  73. fprintf(stderr, "Can't open file %s\n", param);
  74. return 1;
  75. }
  76. }
  77. }
  78. URL *url;
  79. while (fgets(urlstr, MAXURL, fin) != NULL) {
  80. lines++;
  81. if ((url = parseurl(urlstr)) == NULL) {
  82. errors++;
  83. if (verbose)
  84. fprintf(stderr, "Malformed URL %s", urlstr);
  85. continue;
  86. }
  87. TreeNode *newnode = treealloc();
  88. newnode->path = url->base;
  89. newnode->params = url->params;
  90. newnode->parent = NULL;
  91. newnode->left = newnode->right = NULL;
  92. newnode->nparams = url->nparams;
  93. newnode->red = 1; /* Always color new nodes red */
  94. root = addtree(root, newnode);
  95. balancetree(root, newnode);
  96. }
  97. if ( randpayloads )
  98. printtree(root, fout, NULL, minparamn);
  99. else if ( payloads == NULL )
  100. printtree(root, fout, "%s", minparamn);
  101. if ( payloads ) {
  102. while (fgets(payload, MAXPAYLOAD, payloads) != NULL) {
  103. npayloads++;
  104. for(int i=0; i<strlen(payload); i++){
  105. if(*(payload+i) == '\n'){
  106. *(payload+i)='\0';
  107. break;
  108. }
  109. }
  110. printtree(root, fout, payload, minparamn);
  111. minparamn = (minparamn) ? minparamn : 1;
  112. }
  113. }
  114. time_t end = time(NULL);
  115. fprintf(stderr, "%lu urls processeed in %d seconds\nGenerated %lu urls\nSkipped %lu malformed urls\n", lines, (end-begin), npayloads*lines, errors);
  116. return 0;
  117. }