$ vim t.php
<?php
$data = "..<a href='...'>...</a>.."; //file_get_contents(...);
if (preg_match_all("#<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>#", $data, $matches) )
        print_r($matches);
$ php t.php
Array
(
    [0] => Array
        (
            [0] => <a href='...'>...</a>
        )
    [1] => Array
        (
            [0] => ...
        )
    [2] => Array
        (
            [0] => ...
        )
)
對 PCRE 來說:
$ vim pcre_test.cpp
#include <pcre.h>
#include <iostream>
int main() {
 const char *error;
 int erroroffset;
 pcre *preg_pattern_a_tag = pcre_compile("<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>", PCRE_MULTILINE, &error,  &erroroffset, NULL);
 if (!preg_pattern_a_tag) {
  std::cout << "ERROR\n";
  return -1;
 }
 std::string raw = "..<a href='...'>...</a>..";
 unsigned int offset = 0;
 unsigned int len = raw.size();
 int matchInfo[3*2] = {0};
 int rc = 0;
 while (offset < len && (rc = pcre_exec(preg_pattern_a_tag, 0, raw.c_str(), len, offset, 0,  matchInfo, sizeof(matchInfo))) >= 0) {
  for (int n=0; n<rc ; ++n) {
   int data_length = matchInfo[2*n+1] - matchInfo[2*n];
   std::cout << "Found:[" << raw.substr(matchInfo[2*n], data_length) << "]\n";
  }
  offset = matchInfo[1];
 }
 return 0;
}
$ g++ -std=c++11 pcre_test.cpp -lpcre
$ ./a.out
Found:[<a href='...'>...</a>]
Found:[...]
Found:[...]
對 RE2 來說:
$ vim re2_test.cpp
#include <re2/re2.h>
#include <iostream>
int main() {
 //RE2 preg_pattern_a_tag("<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>", RE2::Latin1);
 RE2 preg_pattern_a_tag("<a[^h]*href=['\"]{0,1}([^\"']+)[\"']{0,1}[^>]*>(.*?)</a>");
 std::string raw = "..<a href='...'>...</a>..";
 re2::StringPiece result_a_href, result_a_body;
 while(RE2::PartialMatch(raw, preg_pattern_a_tag, &result_a_href, &result_a_body)) {
  std::cout << "result_a_href:[" << result_a_href << "]\n";
  std::cout << "result_a_body:[" << result_a_body << "]\n";
  raw = result_a_body.data();
 }
 return 0;
}
$ g++ -std=c++11 re2_test.cpp /path/libre2.a -lpthread
$ ./a.out
result_a_href:[...]
result_a_body:[...]
 
沒有留言:
張貼留言