summaryrefslogtreecommitdiff
path: root/CPP/cpp_book/chap6/find_url
diff options
context:
space:
mode:
Diffstat (limited to 'CPP/cpp_book/chap6/find_url')
-rwxr-xr-xCPP/cpp_book/chap6/find_url/find_urlbin0 -> 51128 bytes
-rw-r--r--CPP/cpp_book/chap6/find_url/furl.cpp88
-rw-r--r--CPP/cpp_book/chap6/find_url/main.cpp26
-rw-r--r--CPP/cpp_book/chap6/find_url/text.txt3
4 files changed, 117 insertions, 0 deletions
diff --git a/CPP/cpp_book/chap6/find_url/find_url b/CPP/cpp_book/chap6/find_url/find_url
new file mode 100755
index 0000000..4d7ac32
--- /dev/null
+++ b/CPP/cpp_book/chap6/find_url/find_url
Binary files differ
diff --git a/CPP/cpp_book/chap6/find_url/furl.cpp b/CPP/cpp_book/chap6/find_url/furl.cpp
new file mode 100644
index 0000000..17e9822
--- /dev/null
+++ b/CPP/cpp_book/chap6/find_url/furl.cpp
@@ -0,0 +1,88 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+
+static bool
+not_url_char(char c)
+{
+ static const std::string url_ch = "~;/?:@=&$-_.+!*`(),";
+
+ return !(isalnum(c) ||
+ find(url_ch.begin(), url_ch.end(), c) != url_ch.end());
+}
+
+static std::string::const_iterator
+url_beg(
+ std::string::const_iterator b,
+ std::string::const_iterator e)
+{
+ static const std::string sep = "://";
+
+ std::string::const_iterator i = b;
+
+ while ((i = search(i, e, sep.begin(), sep.end())) != e) {
+
+ // make sure the separator isn't at the beginning of the line
+ if (i != b && i + sep.size() != e) {
+
+ // beg marks the beginning of the protocol name
+ std::string::const_iterator beg = i;
+ while (beg != b && isalpha(beg[-1]))
+ beg--;
+
+ // Is there at least one char before and after the separator?
+ if (beg != i && !not_url_char(i[sep.size()])) {
+ return beg;
+ }
+ }
+
+ // The separator we found wasn't part of a URL;
+ // advance i past the separator
+ i += sep.size();
+ }
+
+ return e;
+
+}
+
+static std::string::const_iterator
+url_end(
+ std::string::const_iterator b,
+ std::string::const_iterator e)
+{
+ return find_if(b, e, not_url_char);
+}
+
+/*
+ * Find URLs in the string 's' and return
+ * a vector containing all strings found.
+ * We ignore the protocol and just get the url
+ */
+std::vector<std::string>
+find_urls(const std::string& s)
+{
+ std::vector<std::string> ret;
+ std::string::const_iterator b = s.begin();
+ std::string::const_iterator e = s.end();
+
+ while (b != e) {
+
+ /* Look for one or more chars followed by '://' */
+ b = url_beg(b, e);
+
+ /* Found it? */
+ if (b != e) {
+
+ /* Get the end of the URL after '://' */
+ std::string::const_iterator after = url_end(b, e);
+
+ /* Save the url */
+ ret.push_back(std::string(b, after));
+
+ /* Advance b and check if file has more URLs */
+ b = after;
+ }
+ }
+
+ return ret;
+}
diff --git a/CPP/cpp_book/chap6/find_url/main.cpp b/CPP/cpp_book/chap6/find_url/main.cpp
new file mode 100644
index 0000000..8bd9416
--- /dev/null
+++ b/CPP/cpp_book/chap6/find_url/main.cpp
@@ -0,0 +1,26 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+
+std::vector<std::string> find_urls(const std::string& s);
+
+int
+main(void)
+{
+ std::string s;
+
+ while(std::getline(std::cin, s)) {
+ std::vector<std::string> urls;
+ urls = find_urls(s);
+
+ std::vector<std::string>::const_iterator i = urls.begin();
+
+ while (i != urls.end()) {
+ std::cout << *i << std::endl;
+ i++;
+ }
+ }
+
+ return 0;
+}
+
diff --git a/CPP/cpp_book/chap6/find_url/text.txt b/CPP/cpp_book/chap6/find_url/text.txt
new file mode 100644
index 0000000..b068661
--- /dev/null
+++ b/CPP/cpp_book/chap6/find_url/text.txt
@@ -0,0 +1,3 @@
+http://www.ronaldo.com
+parangaricotirimirruaru
+https://www.toxiclabs.cc