You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
3.1 KiB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
  1. /*
  2. * massurl.c
  3. *
  4. * Created by Yigit Colakoglu on 07/06/2021.
  5. * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved.
  6. */
  7. #include "strings.h"
  8. #include "tree.h"
  9. #include "urlparse.h"
  10. #include <time.h>
  11. #include <string.h>
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #define MAXURL 100000
  15. #define MAXPAYLOAD 10000
  16. static void usage(void) {
  17. fputs("usage: massurl [-v] [-o outfile] [-p payloads] [-n minparamnum] input_file\n", stderr);
  18. exit(1);
  19. }
  20. enum outformat { PLAIN = 01 };
  21. TreeNode *root = NULL;
  22. int main(int argc, char *argv[]) {
  23. FILE *fin = stdin, *fout = stdout, *payloads = NULL;
  24. char *param, urlstr[MAXURL], payload[MAXPAYLOAD];
  25. int minparamn, verbose = 0, npayloads = 1;
  26. time_t begin = time(NULL);
  27. unsigned long lines, errors = 0;
  28. while (--argc > 0) {
  29. param = *++argv;
  30. if (param[0] == '-') {
  31. param++;
  32. argc--;
  33. switch (*param) {
  34. case 'o':
  35. if ((fout = fopen(*++argv, "w")) == NULL) {
  36. fprintf(stderr, "Can't open output file for writing.\n");
  37. return 1;
  38. }
  39. if (ferror(fout)) {
  40. fprintf(stderr, "Can't open output file for writing.\n");
  41. return 1;
  42. }
  43. break;
  44. case 'n':
  45. minparamn = atoi(*++argv);
  46. break;
  47. case 'v':
  48. verbose = 1;
  49. break;
  50. case 'h':
  51. usage();
  52. break;
  53. case 'p':
  54. if ((payloads = fopen(*++argv, "r")) == NULL) {
  55. fprintf(stderr, "Can't open payload file for reading.\n");
  56. return 1;
  57. }
  58. if (ferror(fout)) {
  59. fprintf(stderr, "Can't open payload file for reading.\n");
  60. return 1;
  61. }
  62. break;
  63. default:
  64. fprintf(stderr, "Parameter -%c does not exist!\n", *param);
  65. usage();
  66. }
  67. } else {
  68. if ((fin = fopen(param, "r")) == NULL) {
  69. fprintf(stderr, "Can't open file %s\n", param);
  70. return 1;
  71. }
  72. }
  73. }
  74. URL *url;
  75. while (fgets(urlstr, MAXURL, fin) != NULL) {
  76. lines++;
  77. if ((url = parseurl(urlstr)) == NULL) {
  78. errors++;
  79. if (verbose)
  80. fprintf(stderr, "Malformed URL %s", urlstr);
  81. continue;
  82. }
  83. if (url->nparams >= minparamn) {
  84. TreeNode *newnode = treealloc();
  85. newnode->path = url->base;
  86. newnode->params = url->params;
  87. newnode->parent = NULL;
  88. newnode->left = newnode->right = NULL;
  89. newnode->nparams = url->nparams;
  90. newnode->red = 1; /* Always color new nodes red */
  91. root = addtree(root, newnode);
  92. balancetree(root, newnode);
  93. }
  94. }
  95. int printzeros = 0;
  96. if (payloads == NULL)
  97. printtree(root, fout, "%s", 0);
  98. else {
  99. while (fgets(payload, MAXPAYLOAD, payloads) != NULL) {
  100. npayloads++;
  101. for(int i=0; i<strlen(payload); i++){
  102. if(*(payload+i) == '\n'){
  103. *(payload+i)='\0';
  104. break;
  105. }
  106. }
  107. printtree(root, fout, payload, printzeros);
  108. printzeros = 1;
  109. }
  110. }
  111. time_t end = time(NULL);
  112. fprintf(stderr, "%lu urls processeed in %d seconds\nGenerated %lu urls\nSkipped %lu malformed urls\n", lines, (end-begin), npayloads*lines, errors);
  113. return 0;
  114. }