$ vim t.php
<?php
$data = "..<a href='...'>...</a>.."; //file_get_contents(...);
if (preg_match_all("#<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>#", $data, $matches) )
print_r($matches);
$ php t.php
Array
(
[0] => Array
(
[0] => <a href='...'>...</a>
)
[1] => Array
(
[0] => ...
)
[2] => Array
(
[0] => ...
)
)
對 PCRE 來說:
$ vim pcre_test.cpp
#include <pcre.h>
#include <iostream>
int main() {
const char *error;
int erroroffset;
pcre *preg_pattern_a_tag = pcre_compile("<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>", PCRE_MULTILINE, &error, &erroroffset, NULL);
if (!preg_pattern_a_tag) {
std::cout << "ERROR\n";
return -1;
}
std::string raw = "..<a href='...'>...</a>..";
unsigned int offset = 0;
unsigned int len = raw.size();
int matchInfo[3*2] = {0};
int rc = 0;
while (offset < len && (rc = pcre_exec(preg_pattern_a_tag, 0, raw.c_str(), len, offset, 0, matchInfo, sizeof(matchInfo))) >= 0) {
for (int n=0; n<rc ; ++n) {
int data_length = matchInfo[2*n+1] - matchInfo[2*n];
std::cout << "Found:[" << raw.substr(matchInfo[2*n], data_length) << "]\n";
}
offset = matchInfo[1];
}
return 0;
}
$ g++ -std=c++11 pcre_test.cpp -lpcre
$ ./a.out
Found:[<a href='...'>...</a>]
Found:[...]
Found:[...]
對 RE2 來說:
$ vim re2_test.cpp
#include <re2/re2.h>
#include <iostream>
int main() {
//RE2 preg_pattern_a_tag("<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>", RE2::Latin1);
RE2 preg_pattern_a_tag("<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>");
std::string raw = "..<a href='...'>...</a>..";
re2::StringPiece result_a_href, result_a_body;
while(RE2::PartialMatch(raw, preg_pattern_a_tag, &result_a_href, &result_a_body)) {
std::cout << "result_a_href:[" << result_a_href << "]\n";
std::cout << "result_a_body:[" << result_a_body << "]\n";
raw = result_a_body.data();
}
return 0;
}
$ g++ -std=c++11 re2_test.cpp /path/libre2.a -lpthread
$ ./a.out
result_a_href:[...]
result_a_body:[...]
沒有留言:
張貼留言