繼續:
int Perl_yyparse (pTHX_ int gramtype){
register yy_parser *parser;
register yy_stack_frame *ps;
----從這兩句話,我們看出,有兩個變量用于parser,也就是說,是一種多層語言。
這種技術,是很常見的。比如,解析一門語言時,進入了另一種狀态,比如進入了注釋。
往前,我們找到最重要的一句話:
parser->yychar = yylex();
,所有的編譯器都是這樣的,lex是yacc的一個工具。是以,自然要從yacc中調用lex.
簡單來說,編譯器,是一種流式的解析器,它一次讀入流,完成一個任務。
雖然,有的編譯器,如C語言,理論上,是多遍完成解析的,因為有預編譯。
但,對于每一次來說,也就是每一種輸入來說,隻需要解析一次。
這也是編譯器的精妙之處。
lex的任務,是一個字元,一個字元地讀入,然後驅動内部的狀态機。當狀态機被激發,則會發給yacc一個token.
前面我解釋過了,perl解析器,沒有專門編寫一個lex檔案,而是直接手工編寫了一個token. 隻是原理,也lex沒有差别。
============
歇一會,
的第504行找到:
barestmt: PLUGSTMT
{ $$ = $1; }
| PEG
{
$$ = newOP(OP_NULL,0);
TOKEN_GETMAD($1,$$,'p');
}
。。。
| ';'
{
PL_parser->expect = XSTATE;
$$ = IF_MAD(newOP(OP_NULL, 0), (OP*)NULL);
TOKEN_GETMAD($1,$$,';');
PL_parser->copline = NOLINE;
}
;
========================================
現在,停掉重頭再來。
因為關鍵的東西還都沒有找到。
重新寫個腳本,最簡單的:
前面,打兩個回車,然後定義個變量,就可以了。
編譯器都是這樣寫的,從一個個簡單的語句解析開始。
然後,在token.c中,找到一句話:
void
Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
。。。
parser->linestart = SvPVX(parser->linestr);
parser->linestr,是在哪裡初始化的呢?
-----------
SvPVX,是從yacc的目前yyval中,得到想要的東西。因為yyval是一個union,是以,要根據需要,得到那個具體的值。
define SvPVX(sv) ((sv)->sv_u.svu_pv)
char *linestart;
-------------------------
重來。

真是難搞。
找到了第一行處。
我一定是錯過了許多東西。而且大部分地方,也沒看懂。
原來是想拿來直接用perl解析器。
然後加個自定義的東西。
現在來看,太難了。
我再想想其它的辦法。
就算是一個記錄吧。
找到第一個identify是在這裡:
現在,才明白,原來lex和yacc的解析器,文法與perl很象。
找到了指派語句:
termbinop: term ASSIGNOP term
{ $$ = newASSIGNOP(OPf_STACKED, $1, IVAL($2), $3);
TOKEN_GETMAD($2,$$,'o');
}
在核心的op.c中:
/*
=for apidoc Am|OP *|newASSIGNOP|I32 flags|OP *left|I32 optype|OP *right
Constructs, checks, and returns an assignment op. I<left> and I<right>
supply the parameters of the assignment; they are consumed by this
function and become part of the constructed op tree.
If I<optype> is C<OP_ANDASSIGN>, C<OP_ORASSIGN>, or C<OP_DORASSIGN>, then
a suitable conditional optree is constructed. If I<optype> is the opcode
of a binary operator, such as C<OP_BIT_OR>, then an op is constructed that
performs the binary operation and assigns the result to the left argument.
Either way, if I<optype> is non-zero then I<flags> has no effect.
If I<optype> is zero, then a plain scalar or list assignment is
constructed. Which type of assignment it is is automatically determined.
I<flags> gives the eight bits of C<op_flags>, except that C<OPf_KIDS>
will be set automatically, and, shifted up eight bits, the eight bits
of C<op_private>, except that the bit with value 1 or 2 is automatically
set as required.
=cut
*/
OP *
Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right)
{
dVAR;
OP *o;
if (optype) {
if (optype == OP_ANDASSIGN || optype == OP_ORASSIGN || optype == OP_DORASSIGN) {
return newLOGOP(optype, 0,
op_lvalue(scalar(left), optype),
newUNOP(OP_SASSIGN, 0, scalar(right)));
}
else {
return newBINOP(optype, OPf_STACKED,
op_lvalue(scalar(left), optype), scalar(right));
}
}
if (is_list_assignment(left)) {
static const char no_list_state[] = "Initialization of state variables"
" in list context currently forbidden";
OP *curop;
bool maybe_common_vars = TRUE;
PL_modcount = 0;
left = op_lvalue(left, OP_AASSIGN);
curop = list(force_list(left));
o = newBINOP(OP_AASSIGN, flags, list(force_list(right)), curop);
o->op_private = (U8)(0 | (flags >> 8));
if ((left->op_type == OP_LIST
|| (left->op_type == OP_NULL && left->op_targ == OP_LIST)))
{
OP* lop = ((LISTOP*)left)->op_first;
maybe_common_vars = FALSE;
while (lop) {
if (lop->op_type == OP_PADSV ||
lop->op_type == OP_PADAV ||
lop->op_type == OP_PADHV ||
lop->op_type == OP_PADANY) {
if (!(lop->op_private & OPpLVAL_INTRO))
maybe_common_vars = TRUE;
if (lop->op_private & OPpPAD_STATE) {
if (left->op_private & OPpLVAL_INTRO) {
/* Each variable in state($a, $b, $c) = ... */
}
else {
/* Each state variable in
(state $a, my $b, our $c, $d, undef) = ... */
}
yyerror(no_list_state);
} else {
/* Each my variable in
(state $a, my $b, our $c, $d, undef) = ... */
}
} else if (lop->op_type == OP_UNDEF ||
lop->op_type == OP_PUSHMARK) {
/* undef may be interesting in
(state $a, undef, state $c) */
} else {
/* Other ops in the list. */
maybe_common_vars = TRUE;
}
lop = lop->op_sibling;
}
}
else if ((left->op_private & OPpLVAL_INTRO)
&& ( left->op_type == OP_PADSV
|| left->op_type == OP_PADAV
|| left->op_type == OP_PADHV
|| left->op_type == OP_PADANY))
{
if (left->op_type == OP_PADSV) maybe_common_vars = FALSE;
if (left->op_private & OPpPAD_STATE) {
/* All single variable list context state assignments, hence
state ($a) = ...
(state $a) = ...
state @a = ...
state (@a) = ...
(state @a) = ...
state %a = ...
state (%a) = ...
(state %a) = ...
*/
yyerror(no_list_state);
}
}
/* PL_generation sorcery:
* an assignment like ($a,$b) = ($c,$d) is easier than
* ($a,$b) = ($c,$a), since there is no need for temporary vars.
* To detect whether there are common vars, the global var
* PL_generation is incremented for each assign op we compile.
* Then, while compiling the assign op, we run through all the
* variables on both sides of the assignment, setting a spare slot
* in each of them to PL_generation. If any of them already have
* that value, we know we've got commonality. We could use a
* single bit marker, but then we'd have to make 2 passes, first
* to clear the flag, then to test and set it. To find somewhere
* to store these values, evil chicanery is done with SvUVX().
*/
if (maybe_common_vars) {
PL_generation++;
if (aassign_common_vars(o))
o->op_private |= OPpASSIGN_COMMON;
LINKLIST(o);
}
if (right && right->op_type == OP_SPLIT && !PL_madskills) {
OP* tmpop = ((LISTOP*)right)->op_first;
if (tmpop && (tmpop->op_type == OP_PUSHRE)) {
PMOP * const pm = (PMOP*)tmpop;
if (left->op_type == OP_RV2AV &&
!(left->op_private & OPpLVAL_INTRO) &&
!(o->op_private & OPpASSIGN_COMMON) )
{
tmpop = ((UNOP*)left)->op_first;
if (tmpop->op_type == OP_GV
#ifdef USE_ITHREADS
&& !pm->op_pmreplrootu.op_pmtargetoff
#else
&& !pm->op_pmreplrootu.op_pmtargetgv
#endif
) {
#ifdef USE_ITHREADS
pm->op_pmreplrootu.op_pmtargetoff
= cPADOPx(tmpop)->op_padix;
cPADOPx(tmpop)->op_padix = 0; /* steal it */
#else
pm->op_pmreplrootu.op_pmtargetgv
= MUTABLE_GV(cSVOPx(tmpop)->op_sv);
cSVOPx(tmpop)->op_sv = NULL; /* steal it */
#endif
pm->op_pmflags |= PMf_ONCE;
tmpop = cUNOPo->op_first; /* to list (nulled) */
tmpop = ((UNOP*)tmpop)->op_first; /* to pushmark */
tmpop->op_sibling = NULL; /* don't free split */
right->op_next = tmpop->op_next; /* fix starting loc */
op_free(o); /* blow off assign */
right->op_flags &= ~OPf_WANT;
/* "I don't know and I don't care." */
return right;
}
}
else {
if (PL_modcount < RETURN_UNLIMITED_NUMBER &&
((LISTOP*)right)->op_last->op_type == OP_CONST)
{
SV *sv = ((SVOP*)((LISTOP*)right)->op_last)->op_sv;
if (SvIOK(sv) && SvIVX(sv) == 0)
sv_setiv(sv, PL_modcount+1);
}
}
}
}
return o;
}
if (!right)
right = newOP(OP_UNDEF, 0);
if (right->op_type == OP_READLINE) {
right->op_flags |= OPf_STACKED;
return newBINOP(OP_NULL, flags, op_lvalue(scalar(left), OP_SASSIGN),
scalar(right));
}
else {
o = newBINOP(OP_SASSIGN, flags,
scalar(right), op_lvalue(scalar(left), OP_SASSIGN) );
}
return o;
}
注意那個OP.
#define BASEOP \
OP* op_next; \
OP* op_sibling; \
OP* (*op_ppaddr)(pTHX); \
MADPROP_IN_BASEOP \
PADOFFSET op_targ; \
PERL_BITFIELD16 op_type:9; \
PERL_BITFIELD16 op_opt:1; \
PERL_BITFIELD16 op_latefree:1; \
PERL_BITFIELD16 op_latefreed:1; \
PERL_BITFIELD16 op_attached:1; \
PERL_BITFIELD16 op_spare:3; \
U8 op_flags; \
U8 op_private;
#endif
用來記錄操作表達式。
因為我就寫了一句話,後面什麼也沒幹。
也就沒什麼可跟的了。
跟的過程中,可以清楚地看到,如果在lex中,沒有找到什麼yacc 感興趣的東西,lex就把這些東西吞掉了。
主要就是這句:
parser->yychar = yylex();
===========
不過,perl的解釋器的确是我所見過的最複雜的。
lex 會在開始前,和結束後,生成一些token,發給yacc。
這讓我頭大了許多。
先到這裡吧。以後也不打算寫了。實在累人。