diff options
| author | Carlos Maiolino <[email protected]> | 2026-02-20 16:17:14 +0100 |
|---|---|---|
| committer | Carlos Maiolino <[email protected]> | 2026-02-20 16:17:14 +0100 |
| commit | 4ff0e42f65d8bba3d21bed53bfe1251d8db5c13f (patch) | |
| tree | 9ac61873e2676c3f392bd26063afeb16897c3902 /CPP/cpp_book/chap6/find_url | |
| parent | fd313dd5ad9ac067a31f2b1760b85bd305567131 (diff) | |
Diffstat (limited to 'CPP/cpp_book/chap6/find_url')
| -rwxr-xr-x | CPP/cpp_book/chap6/find_url/find_url | bin | 0 -> 51128 bytes | |||
| -rw-r--r-- | CPP/cpp_book/chap6/find_url/furl.cpp | 88 | ||||
| -rw-r--r-- | CPP/cpp_book/chap6/find_url/main.cpp | 26 | ||||
| -rw-r--r-- | CPP/cpp_book/chap6/find_url/text.txt | 3 |
4 files changed, 117 insertions, 0 deletions
diff --git a/CPP/cpp_book/chap6/find_url/find_url b/CPP/cpp_book/chap6/find_url/find_url Binary files differnew file mode 100755 index 0000000..4d7ac32 --- /dev/null +++ b/CPP/cpp_book/chap6/find_url/find_url diff --git a/CPP/cpp_book/chap6/find_url/furl.cpp b/CPP/cpp_book/chap6/find_url/furl.cpp new file mode 100644 index 0000000..17e9822 --- /dev/null +++ b/CPP/cpp_book/chap6/find_url/furl.cpp @@ -0,0 +1,88 @@ +#include <iostream> +#include <vector> +#include <algorithm> + +static bool +not_url_char(char c) +{ + static const std::string url_ch = "~;/?:@=&$-_.+!*`(),"; + + return !(isalnum(c) || + find(url_ch.begin(), url_ch.end(), c) != url_ch.end()); +} + +static std::string::const_iterator +url_beg( + std::string::const_iterator b, + std::string::const_iterator e) +{ + static const std::string sep = "://"; + + std::string::const_iterator i = b; + + while ((i = search(i, e, sep.begin(), sep.end())) != e) { + + // make sure the separator isn't at the beginning of the line + if (i != b && i + sep.size() != e) { + + // beg marks the beginning of the protocol name + std::string::const_iterator beg = i; + while (beg != b && isalpha(beg[-1])) + beg--; + + // Is there at least one char before and after the separator? + if (beg != i && !not_url_char(i[sep.size()])) { + return beg; + } + } + + // The separator we found wasn't part of a URL; + // advance i past the separator + i += sep.size(); + } + + return e; + +} + +static std::string::const_iterator +url_end( + std::string::const_iterator b, + std::string::const_iterator e) +{ + return find_if(b, e, not_url_char); +} + +/* + * Find URLs in the string 's' and return + * a vector containing all strings found. + * We ignore the protocol and just get the url + */ +std::vector<std::string> +find_urls(const std::string& s) +{ + std::vector<std::string> ret; + std::string::const_iterator b = s.begin(); + std::string::const_iterator e = s.end(); + + while (b != e) { + + /* Look for one or more chars followed by '://' */ + b = url_beg(b, e); + + /* Found it? */ + if (b != e) { + + /* Get the end of the URL after '://' */ + std::string::const_iterator after = url_end(b, e); + + /* Save the url */ + ret.push_back(std::string(b, after)); + + /* Advance b and check if file has more URLs */ + b = after; + } + } + + return ret; +} diff --git a/CPP/cpp_book/chap6/find_url/main.cpp b/CPP/cpp_book/chap6/find_url/main.cpp new file mode 100644 index 0000000..8bd9416 --- /dev/null +++ b/CPP/cpp_book/chap6/find_url/main.cpp @@ -0,0 +1,26 @@ +#include <iostream> +#include <vector> +#include <algorithm> + +std::vector<std::string> find_urls(const std::string& s); + +int +main(void) +{ + std::string s; + + while(std::getline(std::cin, s)) { + std::vector<std::string> urls; + urls = find_urls(s); + + std::vector<std::string>::const_iterator i = urls.begin(); + + while (i != urls.end()) { + std::cout << *i << std::endl; + i++; + } + } + + return 0; +} + diff --git a/CPP/cpp_book/chap6/find_url/text.txt b/CPP/cpp_book/chap6/find_url/text.txt new file mode 100644 index 0000000..b068661 --- /dev/null +++ b/CPP/cpp_book/chap6/find_url/text.txt @@ -0,0 +1,3 @@ +http://www.ronaldo.com +parangaricotirimirruaru +https://www.toxiclabs.cc |
