NGINX中使用PCRE最为正则表达式的解析接口,对编译和解析过程中的一些点进行了测试验证
PCRE接口pcre_fullinfo()的描述在这个链接中有详细的描述,这里对接口的一些
链接 http://mushclient.com/pcre/index.html
这里描述一下命名子模式(named subpattern)和非命名子模式(numbering subpattern)混合使用的情况接口
PCRE_INFO_CAPTURECOUNT: 得到的是所有子模式的个数,包含上述的两种子模式个数;
PCRE_INFO_NAMECOUNT: 得到的是命名子模式的个数,不包括非命名子模式的个数;
PCRE_INFO_NAMETABLE:
对于只有命名子模式的情况,例如文档描述的一个例子:
(?<date> (?<year>(\d\d)?\d\d) - (?<month>\d\d) - (?<day>\d\d) )
00 01 d a t e 00 ??
00 05 d a y 00 ?? ??
00 04 m o n t h 00
00 02 y e a r 00 ??
而对于混合模式;例子如下,注意开头的两个数字,就是说capture的数字是和非命名子模式一起排列的,也就是根据左括号的先后排列的:
(eeeee)(?<abb> exception)(?<adfa>xydz)(ddddd)\k<abb>\1\2
{captures 4
named_captures 2
name_size 7}
#include <stdlib.h>
#include <pcre.h>
#define NGX_REGEX_CASELESS PCRE_CASELESS
typedef struct {
pcre *code;
pcre_extra *extra;
} ngx_regex_t;
int main()
{
pcre *re;
char *errstr;
int erroff;
int captures =0, named_captures, name_size;
char *name;
char *data = "(eeeee)(?<abb> exception)(?<adfa>xydz)(ddddd)\\k<abb>\\1\\2";
int n, i;
char *p;
printf("%s \n", data);
p = data;
printf("%s \n", p);
re = pcre_compile(data, PCRE_CASELESS, &errstr, &erroff, NULL);
if(NULL == re)
{
printf("compile pcre failed\n");
return 0;
}
n = pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &captures);
if(n < 0)
{
printf("pcre_fullinfo PCRE_INFO_CAPTURECOUNT failed %d \n", n);
return 0;
}
printf(" captures %d \n", captures);
n = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &named_captures);
if(n < 0)
{
printf("pcre_fullinfo PCRE_INFO_NAMECOUNT failed %d \n", n);
return 0;
}
printf("named_captures %d \n", named_captures);
n = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_size);
if(n < 0)
{
printf("pcre_fullinfo PCRE_INFO_NAMEENTRYSIZE failed %d \n", n);
return 0;
}
printf("name_size %d \n", name_size);
n = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &name);
if(n < 0)
{
printf("pcre_fullinfo PCRE_INFO_NAMETABLE failed %d \n", n);
return 0;
}
p =name;
int j;
for(j = 0; j < named_captures; j++)
{
for(i = 0; i <2; i++)
{
printf("%x ", p[i]);
}
printf("%s \n", &p[2]);
p += name_size;
}
return 1;
}