From f7e47b3b235e9b11414c8e0db9b2d4219d02f89e Mon Sep 17 00:00:00 2001 From: Maciek Borzecki Date: Sat, 30 Sep 2017 14:41:55 +0200 Subject: [PATCH] utils, test: helper for extracting http[s] URLs from text When sharing URLs through KDE Connect, the incoming "url" usually contains some additional description that was added by the source application. Use regext to pick up any valid http[s] URLs that may be found in the text. --- meson.build | 14 ++++++++++++ src/mconnect/utils.vala | 37 ++++++++++++++++++++++++++++++ test/mconn-utils-test.vala | 46 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 test/mconn-utils-test.vala diff --git a/meson.build b/meson.build index 6ce6f61..0915efb 100644 --- a/meson.build +++ b/meson.build @@ -84,6 +84,20 @@ test_mconn_crypt = executable('test-mconn-crypt', test_mconn_crypt_src, install: false) test('mconn-crypt', test_mconn_crypt) +test_mconn_utils_src = [ + 'test/mconn-utils-test.vala', + 'src/mconnect/utils.vala', + 'src/mconnect/logging.vala', +] +test_mconn_utils = executable('test-mconn-utils', test_mconn_utils_src, + dependencies: [ + glib_dep, gobject_dep, + gio_dep, gio_unix_dep, + posix_dep, + ], + install: false) +test('mconn-utils', test_mconn_utils) + # other files applicationsdir = join_paths(get_option('datadir'), 'applications') diff --git a/src/mconnect/utils.vala b/src/mconnect/utils.vala index 20ce20b..f5d796b 100644 --- a/src/mconnect/utils.vala +++ b/src/mconnect/utils.vala @@ -149,4 +149,41 @@ using Posix; }); return tls_conn; } + + /** + * find_urls: + * + * Locate and extract URL like patterns in the text. URLs are assumed to + * start with http or https. + * + * @text: input test + * @return array of matches, if there were none then array if of length 0 + */ + string[] find_urls(string text) { + try { + // regex taken from SO + Regex r = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&\/=]*)/; + + MatchInfo mi; + + string[] matches = {}; + + if (r.match(text, RegexMatchFlags.NOTEMPTY, out mi)) { + while (mi.matches()) { + if (mi.is_partial_match() == false) { + var m = mi.fetch(0); + debug("found match %s", m); + matches += m; + } + mi.next(); + } + } else { + debug("no match"); + } + return matches; + } catch (RegexError e) { + warning("failed to compile regex: %s", e.message); + return null; + } + } } \ No newline at end of file diff --git a/test/mconn-utils-test.vala b/test/mconn-utils-test.vala new file mode 100644 index 0000000..0a1a2a6 --- /dev/null +++ b/test/mconn-utils-test.vala @@ -0,0 +1,46 @@ +void test_find_urls_simple() { + var urls = Utils.find_urls("https://en.m.wikipedia.org/wiki/Isle_of_Man via DuckDuckGo for Android"); + + assert(urls != null); + assert(urls.length == 1); + + assert(urls[0] == "https://en.m.wikipedia.org/wiki/Isle_of_Man"); +} + +void test_find_urls_extract() { + var urls = Utils.find_urls("Foo bar baz?\n\nhttp://foo.bar.com/123/345/abcd\n\nShared from my Google cards"); + + assert(urls != null); + assert(urls.length == 1); + + assert(urls[0] == "http://foo.bar.com/123/345/abcd"); +} + +void test_find_urls_many() { + var urls = Utils.find_urls("https://foo.bar.com http://google.biz http://www.funny.io"); + + assert(urls != null); + assert(urls.length == 3); + + assert(urls[0] == "https://foo.bar.com"); + assert(urls[1] == "http://google.biz"); + assert(urls[2] == "http://www.funny.io"); +} + +void test_find_urls_none() { + var urls = Utils.find_urls("baz bar \nbar.com foo "); + + assert(urls != null); + assert(urls.length == 0); +} + + +public static void main(string[] args) { + Test.init(ref args); + + Test.add_func("/mconn-utils/find-urls/simple", test_find_urls_simple); + Test.add_func("/mconn-utils/find-urls/extract", test_find_urls_extract); + Test.add_func("/mconn-utils/find-urls/many", test_find_urls_many); + Test.add_func("/mconn-utils/find-urls/none", test_find_urls_none); + Test.run(); +} \ No newline at end of file