extra codeHEAD master

author: Carlos Maiolino <[email protected]> 2026-02-20 16:17:14 +0100
committer: Carlos Maiolino <[email protected]> 2026-02-20 16:17:14 +0100
commit: 4ff0e42f65d8bba3d21bed53bfe1251d8db5c13f (patch)
tree: 9ac61873e2676c3f392bd26063afeb16897c3902 /CPP/cpp_book/chap6/find_url/furl.cpp
parent: fd313dd5ad9ac067a31f2b1760b85bd305567131 (diff)
1 files changed, 88 insertions, 0 deletions
diff --git a/CPP/cpp_book/chap6/find_url/furl.cpp b/CPP/cpp_book/chap6/find_url/furl.cpp
new file mode 100644
index 0000000..17e9822
--- /dev/null
+++ b/CPP/cpp_book/chap6/find_url/furl.cpp
@@ -0,0 +1,88 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+
+static bool
+not_url_char(char c)
+{
+	static const std::string url_ch = "~;/?:@=&$-_.+!*`(),";
+
+	return !(isalnum(c) ||
+		 find(url_ch.begin(), url_ch.end(), c) != url_ch.end());
+}
+
+static std::string::const_iterator
+url_beg(
+	std::string::const_iterator b,
+	std::string::const_iterator e)
+{
+	static const std::string sep = "://";
+
+	std::string::const_iterator i = b;
+
+	while ((i = search(i, e, sep.begin(), sep.end())) != e) {
+
+		// make sure the separator isn't at the beginning of the line
+		if (i != b && i + sep.size() != e) {
+
+			// beg marks the beginning of the protocol name
+			std::string::const_iterator beg = i;
+			while (beg != b && isalpha(beg[-1]))
+				beg--;
+
+			// Is there at least one char before and after the separator?
+			if (beg != i && !not_url_char(i[sep.size()])) {
+				return beg;
+			}
+		}
+
+		// The separator we found wasn't part of a URL;
+		// advance i past the separator
+		i += sep.size();
+	}
+
+	return e;
+
+}
+
+static std::string::const_iterator
+url_end(
+	std::string::const_iterator b,
+	std::string::const_iterator e)
+{
+	return find_if(b, e, not_url_char);
+}
+
+/*
+ * Find URLs in the string 's' and return
+ * a vector containing all strings found.
+ * We ignore the protocol and just get the url
+ */
+std::vector<std::string>
+find_urls(const std::string& s)
+{
+	std::vector<std::string> ret;
+	std::string::const_iterator b = s.begin();
+	std::string::const_iterator e = s.end();
+
+	while (b != e) {
+
+		/* Look for one or more chars followed by '://' */
+		b = url_beg(b, e);
+
+		/* Found it? */
+		if (b != e) {
+
+			/* Get the end of the URL after '://' */
+			std::string::const_iterator after = url_end(b, e);
+
+			/* Save the url */
+			ret.push_back(std::string(b, after));
+
+			/* Advance b and check if file has more URLs */
+			b = after;
+		}
+	}
+
+	return ret;
+}
author	Carlos Maiolino <[email protected]>	2026-02-20 16:17:14 +0100
committer	Carlos Maiolino <[email protected]>	2026-02-20 16:17:14 +0100
commit	4ff0e42f65d8bba3d21bed53bfe1251d8db5c13f (patch)
tree	9ac61873e2676c3f392bd26063afeb16897c3902 /CPP/cpp_book/chap6/find_url/furl.cpp
parent	fd313dd5ad9ac067a31f2b1760b85bd305567131 (diff)