commit 7ec7f0360da87128baa32089ccb4e14f8d5facaa Author: Yigit Colakoglu Date: Tue Jun 8 12:13:11 2021 +0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9e7271a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.ccls-cache/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8b1346d --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +## +# massurl +# +# @file Makefile +# @version 0.1 + +VERSION = 0.1 +CC = gcc +CFLAGS = -g -w +SRC = linkedlist.c urlparse.c tree.c massurl.c + +all: massurl + +massurl: $(SRC) + ${CC} $(SRC) -o massurl $(CFLAGS) + + +# end diff --git a/linkedlist.c b/linkedlist.c new file mode 100644 index 0000000..ac11380 --- /dev/null +++ b/linkedlist.c @@ -0,0 +1,45 @@ +/* + * linkedlist.c + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#include "linkedlist.h" +#include +#include +#include + + +LinkedList *linkedlistalloc(void){ + return (LinkedList *) malloc(sizeof(LinkedList)); +} + +int linkedlistfind(LinkedList *p, char *str) { + int count = 0; + while(p != NULL){ + if(!strcmp(p->data, str)) + return count; + count++; + p = p->next; + } + return -1; +} + +LinkedList *linkedlistadd(LinkedList *p, char *data){ + if(p == NULL){ + p = linkedlistalloc(); + p->next = NULL; + p->data = data; + }else + p->next = linkedlistadd(p->next, data); + return p; +} + +void linkedlistprint(LinkedList *p, FILE *out){ + if(p != NULL){ + (p->data == NULL) ? fprintf(out, "NULL") : fprintf(out, "%s", p->data); + (p->next == NULL) ? : fprintf(out, "%c",'&'); + linkedlistprint(p->next, out); + } +} diff --git a/linkedlist.h b/linkedlist.h new file mode 100644 index 0000000..2e5772f --- /dev/null +++ b/linkedlist.h @@ -0,0 +1,22 @@ +/* + * linkedlist.h + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#ifndef linkedlist_h +#define linkedlist_h + +#include + +typedef struct { + struct linkedlist *next; + char *data; +} LinkedList; + +LinkedList *linkedlistalloc(void); +int linkedlistfind(LinkedList *p, char *str); +LinkedList*linkedlistadd(LinkedList *p, char *data); +void linkedlistprint(LinkedList *p, FILE *out); +#endif /* Symbol’s value as variable is void: \. */ diff --git a/massurl b/massurl new file mode 100755 index 0000000..eb762f8 Binary files /dev/null and b/massurl differ diff --git a/massurl.c b/massurl.c new file mode 100644 index 0000000..55533a4 --- /dev/null +++ b/massurl.c @@ -0,0 +1,68 @@ +/* + * massurl.c + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#include +#include "urlparse.h" +#include "tree.h" +#include "strings.h" +#define MAXURL 100000 + +static void usage(void){ + fputs("\n", stderr); +} + +enum outformat{ PLAIN = 01 }; + +int main(int argc, char *argv[]) { + + FILE *fin = stdin, *fout = stdout; + char *param, urlstr[MAXURL]; + + while(--argc > 0){ + param= *++argv; + if(param[0] == '-'){ + param++; + argc--; + switch(*param){ + case 'o': + if ((fout = fopen(*++argv, "w")) == NULL) { + fprintf(stderr, "Can't open output file for writing.\n"); + return 1; + } + if(ferror(fout)){ + fprintf(stderr, "Can't open output file for writing.\n"); + return 1; + } + break; + default: + fprintf(stderr, "Parameter -%c does not exist!", *param); + } + }else{ + if((fin = fopen(param, "r")) == NULL){ + fprintf(stderr, "Can't open file %s\n", param); + return 1; + } + } + } + + TreeNode *urltree = treealloc(); + URL *url; + while(fgets(urlstr, MAXURL, fin) != NULL){ + if((url = parseurl(urlstr)) == NULL){ + fprintf(stderr, "Malformed URL %s", urlstr); + continue; + } + if(urltree->path == NULL){ + urltree->path = url->base; + urltree->params = url->params; + }else{ + urltree = addtree(urltree, url); + } + } + printtree(urltree, fout); + return 0; +} diff --git a/test.data b/test.data new file mode 100644 index 0000000..9ae2ae1 --- /dev/null +++ b/test.data @@ -0,0 +1,8 @@ +http://test.com?param4=var4 +http://test.com?param3=var3 +http://abc.com?abcpar=123&asdasd=asdas +http://abc.com?abcpar123=123 +http://test.com/path1?param3=var3 +http://test.com/path1?param1=var2¶m2=var2 +http://bc.com +http://test.com?param1=var2¶m2=var2 diff --git a/tree.c b/tree.c new file mode 100644 index 0000000..b5417fe --- /dev/null +++ b/tree.c @@ -0,0 +1,51 @@ +/* + * tree.c + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#include "tree.h" +#include "linkedlist.h" +#include "urlparse.h" +#include +#include +#include + +TreeNode *addtree(TreeNode *p, URL *url) { + if (p == NULL) { + TreeNode *newnode = treealloc(); + newnode->path = url->base; + newnode->params = url->params; + newnode->left = newnode->right = NULL; + return newnode; + } + int strdiff = strcmp(url->base, p->path); + if (!strdiff) { + while(url->params != NULL){ + if(p->params == NULL || linkedlistfind(p->params, url->params->data) == -1){ + p->params = linkedlistadd(p->params, url->params->data); + } + url->params = url->params->next; + } + } else if (strdiff < 0) { + p->left = addtree(p->left, url); + } else { + p->right = addtree(p->right, url); + } + return p; +} + +TreeNode *treealloc(void){ + return (TreeNode *) malloc(sizeof(TreeNode)); +} + +void printtree(TreeNode *root, FILE *out){ + if(root != NULL){ + printtree(root->left, out); + fprintf(out, "%s ", root->path); + linkedlistprint(root->params, out); + fprintf(out, "%c", '\n'); + printtree(root->right, out); + } +} diff --git a/tree.h b/tree.h new file mode 100644 index 0000000..6b38e70 --- /dev/null +++ b/tree.h @@ -0,0 +1,25 @@ +/* + * tree.h + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#include "urlparse.h" +#include + +#ifndef tree_h +#define tree_h + +typedef struct { + char *path; + LinkedList *params; + struct tnode *left; + struct tnode *right; +} TreeNode; + +TreeNode *addtree(TreeNode *root, URL *url); +TreeNode *treealloc(void); +void printtree(TreeNode *root, FILE *out); + +#endif /* Symbol’s value as variable is void: \. */ diff --git a/urlparse.c b/urlparse.c new file mode 100644 index 0000000..b7632cc --- /dev/null +++ b/urlparse.c @@ -0,0 +1,86 @@ +/* + * urlparse.c + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#include "urlparse.h" +#include "linkedlist.h" +#include +#include +#include + +URL *parseurl(char *url) { + URL *urlp = urlalloc(); + urlp->params = NULL; + short stage = 0; /* var to keep track of where we are in url */ + int counter = 0; + + while (*url != '\0' && *url != '\n') { + switch (*url++) { + case ':': + if (stage == 0) { + urlp->https = *(url - 2) == 's'; + if (*(url + 1) == '\0' || *url == '\0' || *url == '\n') /* weird stuff would happen with strings like "http:" */ + return NULL; + url += 2; /* Skip the // after the :*/ + stage = 1; + counter+=4; + } + break; + + case '?': + if (stage == 1) { + urlp->base = + (char *)malloc(counter); /* +1 for the '\0' in the end */ + strncpy(urlp->base, url - counter, counter - 1); + stage = 2; + counter = 1; + } else { + return NULL; + } + break; + + case '=': + if (stage == 2) { + char *foo; + foo = (char *)malloc(counter); + strncpy(foo, url - counter, counter-1); + counter = 1; + if (urlp->params == NULL){ + urlp->params = linkedlistalloc(); + urlp->params->data = foo; + }else + urlp->params = linkedlistadd(urlp->params, foo); + while(*url != '&' && *url != '\0' && *url != '\n') + url++; + url++; + } else { + return NULL; + } + break; + + default: + counter++; + break; + } + } + + switch(stage){ + case 0: + return NULL; + break; + case 1: + urlp->base = (char *)malloc(counter); /* +1 for the '\0' in the end */ + strncpy(urlp->base, url - (counter-1), counter - 1); + break; + case 2: + break; + default: + return NULL; + } + return urlp; +} + +URL *urlalloc(void) { return (URL *)malloc(sizeof(URL)); } diff --git a/urlparse.h b/urlparse.h new file mode 100644 index 0000000..11e83b9 --- /dev/null +++ b/urlparse.h @@ -0,0 +1,22 @@ +/* + * urlparse.h + * + * Created by Yigit Colakoglu on 07/06/2021. + * Copyright yigit@yigitcolakoglu.com. 2021. All rights reserved. + */ + +#include "linkedlist.h" + +#ifndef urlparse_h +#define urlparse_h + +typedef struct{ + unsigned int https : 1; + char *base; + LinkedList *params; +} URL; + +URL *parseurl(char *urlstr); +URL *urlalloc(void); + +#endif /* Symbol’s value as variable is void: \. */