summaryrefslogtreecommitdiff
path: root/CPP/cpp_book/chap6/find_url/furl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'CPP/cpp_book/chap6/find_url/furl.cpp')
-rw-r--r--CPP/cpp_book/chap6/find_url/furl.cpp88
1 files changed, 88 insertions, 0 deletions
diff --git a/CPP/cpp_book/chap6/find_url/furl.cpp b/CPP/cpp_book/chap6/find_url/furl.cpp
new file mode 100644
index 0000000..17e9822
--- /dev/null
+++ b/CPP/cpp_book/chap6/find_url/furl.cpp
@@ -0,0 +1,88 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+
+static bool
+not_url_char(char c)
+{
+ static const std::string url_ch = "~;/?:@=&$-_.+!*`(),";
+
+ return !(isalnum(c) ||
+ find(url_ch.begin(), url_ch.end(), c) != url_ch.end());
+}
+
+static std::string::const_iterator
+url_beg(
+ std::string::const_iterator b,
+ std::string::const_iterator e)
+{
+ static const std::string sep = "://";
+
+ std::string::const_iterator i = b;
+
+ while ((i = search(i, e, sep.begin(), sep.end())) != e) {
+
+ // make sure the separator isn't at the beginning of the line
+ if (i != b && i + sep.size() != e) {
+
+ // beg marks the beginning of the protocol name
+ std::string::const_iterator beg = i;
+ while (beg != b && isalpha(beg[-1]))
+ beg--;
+
+ // Is there at least one char before and after the separator?
+ if (beg != i && !not_url_char(i[sep.size()])) {
+ return beg;
+ }
+ }
+
+ // The separator we found wasn't part of a URL;
+ // advance i past the separator
+ i += sep.size();
+ }
+
+ return e;
+
+}
+
+static std::string::const_iterator
+url_end(
+ std::string::const_iterator b,
+ std::string::const_iterator e)
+{
+ return find_if(b, e, not_url_char);
+}
+
+/*
+ * Find URLs in the string 's' and return
+ * a vector containing all strings found.
+ * We ignore the protocol and just get the url
+ */
+std::vector<std::string>
+find_urls(const std::string& s)
+{
+ std::vector<std::string> ret;
+ std::string::const_iterator b = s.begin();
+ std::string::const_iterator e = s.end();
+
+ while (b != e) {
+
+ /* Look for one or more chars followed by '://' */
+ b = url_beg(b, e);
+
+ /* Found it? */
+ if (b != e) {
+
+ /* Get the end of the URL after '://' */
+ std::string::const_iterator after = url_end(b, e);
+
+ /* Save the url */
+ ret.push_back(std::string(b, after));
+
+ /* Advance b and check if file has more URLs */
+ b = after;
+ }
+ }
+
+ return ret;
+}