天天看点

PERL原码分析2

继续:

int Perl_yyparse (pTHX_ int gramtype){

    register yy_parser *parser;       

    register yy_stack_frame  *ps;  

----从这两句话,我们看出,有两个变量用于parser,也就是说,是一种多层语言。

这种技术,是很常见的。比如,解析一门语言时,进入了另一种状态,比如进入了注释。

往前,我们找到最重要的一句话:

    parser->yychar = yylex();

,所有的编译器都是这样的,lex是yacc的一个工具。所以,自然要从yacc中调用lex.

简单来说,编译器,是一种流式的解析器,它一次读入流,完成一个任务。

虽然,有的编译器,如C语言,理论上,是多遍完成解析的,因为有预编译。

但,对于每一次来说,也就是每一种输入来说,只需要解析一次。

这也是编译器的精妙之处。

lex的任务,是一个字符,一个字符地读入,然后驱动内部的状态机。当状态机被激发,则会发给yacc一个token.

前面我解释过了,perl解析器,没有专门编写一个lex文件,而是直接手工编写了一个token. 只是原理,也lex没有差别。

============

歇一会,

的第504行找到:

barestmt:    PLUGSTMT

            { $$ = $1; }

    |    PEG

            {

              $$ = newOP(OP_NULL,0);

              TOKEN_GETMAD($1,$$,'p');

            }

。。。

    |    ';'

            {

              PL_parser->expect = XSTATE;

              $$ = IF_MAD(newOP(OP_NULL, 0), (OP*)NULL);

              TOKEN_GETMAD($1,$$,';');

              PL_parser->copline = NOLINE;

            }

    ;

========================================

现在,停掉重头再来。

因为关键的东西还都没有找到。

重新写个脚本,最简单的:

前面,打两个回车,然后定义个变量,就可以了。

编译器都是这样写的,从一个个简单的语句解析开始。

然后,在token.c中,找到一句话:

void

Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)

{

。。。

parser->linestart = SvPVX(parser->linestr);

parser->linestr,是在哪里初始化的呢?

-----------

SvPVX,是从yacc的当前yyval中,得到想要的东西。因为yyval是一个union,所以,要根据需要,得到那个具体的值。

define SvPVX(sv) ((sv)->sv_u.svu_pv)

 char    *linestart;    

-------------------------

重来。

PERL原码分析2

真是难搞。

找到了第一行处。

我一定是错过了许多东西。而且大部分地方,也没看懂。

原来是想拿来直接用perl解析器。

然后加个自定义的东西。

现在来看,太难了。

我再想想其它的办法。

就算是一个记录吧。

找到第一个identify是在这里:

PERL原码分析2

现在,才明白,原来lex和yacc的解析器,语法与perl很象。

找到了赋值语句:

termbinop:    term ASSIGNOP term                    

            { $$ = newASSIGNOP(OPf_STACKED, $1, IVAL($2), $3);

              TOKEN_GETMAD($2,$$,'o');

            }

在核心的op.c中:

/*
=for apidoc Am|OP *|newASSIGNOP|I32 flags|OP *left|I32 optype|OP *right

Constructs, checks, and returns an assignment op.  I<left> and I<right>
supply the parameters of the assignment; they are consumed by this
function and become part of the constructed op tree.

If I<optype> is C<OP_ANDASSIGN>, C<OP_ORASSIGN>, or C<OP_DORASSIGN>, then
a suitable conditional optree is constructed.  If I<optype> is the opcode
of a binary operator, such as C<OP_BIT_OR>, then an op is constructed that
performs the binary operation and assigns the result to the left argument.
Either way, if I<optype> is non-zero then I<flags> has no effect.

If I<optype> is zero, then a plain scalar or list assignment is
constructed.  Which type of assignment it is is automatically determined.
I<flags> gives the eight bits of C<op_flags>, except that C<OPf_KIDS>
will be set automatically, and, shifted up eight bits, the eight bits
of C<op_private>, except that the bit with value 1 or 2 is automatically
set as required.

=cut
*/

OP *
Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right)
{
    dVAR;
    OP *o;

    if (optype) {
	if (optype == OP_ANDASSIGN || optype == OP_ORASSIGN || optype == OP_DORASSIGN) {
	    return newLOGOP(optype, 0,
		op_lvalue(scalar(left), optype),
		newUNOP(OP_SASSIGN, 0, scalar(right)));
	}
	else {
	    return newBINOP(optype, OPf_STACKED,
		op_lvalue(scalar(left), optype), scalar(right));
	}
    }

    if (is_list_assignment(left)) {
	static const char no_list_state[] = "Initialization of state variables"
	    " in list context currently forbidden";
	OP *curop;
	bool maybe_common_vars = TRUE;

	PL_modcount = 0;
	left = op_lvalue(left, OP_AASSIGN);
	curop = list(force_list(left));
	o = newBINOP(OP_AASSIGN, flags, list(force_list(right)), curop);
	o->op_private = (U8)(0 | (flags >> 8));

	if ((left->op_type == OP_LIST
	     || (left->op_type == OP_NULL && left->op_targ == OP_LIST)))
	{
	    OP* lop = ((LISTOP*)left)->op_first;
	    maybe_common_vars = FALSE;
	    while (lop) {
		if (lop->op_type == OP_PADSV ||
		    lop->op_type == OP_PADAV ||
		    lop->op_type == OP_PADHV ||
		    lop->op_type == OP_PADANY) {
		    if (!(lop->op_private & OPpLVAL_INTRO))
			maybe_common_vars = TRUE;

		    if (lop->op_private & OPpPAD_STATE) {
			if (left->op_private & OPpLVAL_INTRO) {
			    /* Each variable in state($a, $b, $c) = ... */
			}
			else {
			    /* Each state variable in
			       (state $a, my $b, our $c, $d, undef) = ... */
			}
			yyerror(no_list_state);
		    } else {
			/* Each my variable in
			   (state $a, my $b, our $c, $d, undef) = ... */
		    }
		} else if (lop->op_type == OP_UNDEF ||
			   lop->op_type == OP_PUSHMARK) {
		    /* undef may be interesting in
		       (state $a, undef, state $c) */
		} else {
		    /* Other ops in the list. */
		    maybe_common_vars = TRUE;
		}
		lop = lop->op_sibling;
	    }
	}
	else if ((left->op_private & OPpLVAL_INTRO)
		&& (   left->op_type == OP_PADSV
		    || left->op_type == OP_PADAV
		    || left->op_type == OP_PADHV
		    || left->op_type == OP_PADANY))
	{
	    if (left->op_type == OP_PADSV) maybe_common_vars = FALSE;
	    if (left->op_private & OPpPAD_STATE) {
		/* All single variable list context state assignments, hence
		   state ($a) = ...
		   (state $a) = ...
		   state @a = ...
		   state (@a) = ...
		   (state @a) = ...
		   state %a = ...
		   state (%a) = ...
		   (state %a) = ...
		*/
		yyerror(no_list_state);
	    }
	}

	/* PL_generation sorcery:
	 * an assignment like ($a,$b) = ($c,$d) is easier than
	 * ($a,$b) = ($c,$a), since there is no need for temporary vars.
	 * To detect whether there are common vars, the global var
	 * PL_generation is incremented for each assign op we compile.
	 * Then, while compiling the assign op, we run through all the
	 * variables on both sides of the assignment, setting a spare slot
	 * in each of them to PL_generation. If any of them already have
	 * that value, we know we've got commonality.  We could use a
	 * single bit marker, but then we'd have to make 2 passes, first
	 * to clear the flag, then to test and set it.  To find somewhere
	 * to store these values, evil chicanery is done with SvUVX().
	 */

	if (maybe_common_vars) {
	    PL_generation++;
	    if (aassign_common_vars(o))
		o->op_private |= OPpASSIGN_COMMON;
	    LINKLIST(o);
	}

	if (right && right->op_type == OP_SPLIT && !PL_madskills) {
	    OP* tmpop = ((LISTOP*)right)->op_first;
	    if (tmpop && (tmpop->op_type == OP_PUSHRE)) {
		PMOP * const pm = (PMOP*)tmpop;
		if (left->op_type == OP_RV2AV &&
		    !(left->op_private & OPpLVAL_INTRO) &&
		    !(o->op_private & OPpASSIGN_COMMON) )
		{
		    tmpop = ((UNOP*)left)->op_first;
		    if (tmpop->op_type == OP_GV
#ifdef USE_ITHREADS
			&& !pm->op_pmreplrootu.op_pmtargetoff
#else
			&& !pm->op_pmreplrootu.op_pmtargetgv
#endif
			) {
#ifdef USE_ITHREADS
			pm->op_pmreplrootu.op_pmtargetoff
			    = cPADOPx(tmpop)->op_padix;
			cPADOPx(tmpop)->op_padix = 0;	/* steal it */
#else
			pm->op_pmreplrootu.op_pmtargetgv
			    = MUTABLE_GV(cSVOPx(tmpop)->op_sv);
			cSVOPx(tmpop)->op_sv = NULL;	/* steal it */
#endif
			pm->op_pmflags |= PMf_ONCE;
			tmpop = cUNOPo->op_first;	/* to list (nulled) */
			tmpop = ((UNOP*)tmpop)->op_first; /* to pushmark */
			tmpop->op_sibling = NULL;	/* don't free split */
			right->op_next = tmpop->op_next;  /* fix starting loc */
			op_free(o);			/* blow off assign */
			right->op_flags &= ~OPf_WANT;
				/* "I don't know and I don't care." */
			return right;
		    }
		}
		else {
                   if (PL_modcount < RETURN_UNLIMITED_NUMBER &&
		      ((LISTOP*)right)->op_last->op_type == OP_CONST)
		    {
			SV *sv = ((SVOP*)((LISTOP*)right)->op_last)->op_sv;
			if (SvIOK(sv) && SvIVX(sv) == 0)
			    sv_setiv(sv, PL_modcount+1);
		    }
		}
	    }
	}
	return o;
    }
    if (!right)
	right = newOP(OP_UNDEF, 0);
    if (right->op_type == OP_READLINE) {
	right->op_flags |= OPf_STACKED;
	return newBINOP(OP_NULL, flags, op_lvalue(scalar(left), OP_SASSIGN),
		scalar(right));
    }
    else {
	o = newBINOP(OP_SASSIGN, flags,
	    scalar(right), op_lvalue(scalar(left), OP_SASSIGN) );
    }
    return o;
}
           

注意那个OP.

#define BASEOP                \

    OP*        op_next;        \

    OP*        op_sibling;        \

    OP*        (*op_ppaddr)(pTHX);    \

    MADPROP_IN_BASEOP            \

    PADOFFSET    op_targ;        \

    PERL_BITFIELD16 op_type:9;        \

    PERL_BITFIELD16 op_opt:1;        \

    PERL_BITFIELD16 op_latefree:1;    \

    PERL_BITFIELD16 op_latefreed:1;    \

    PERL_BITFIELD16 op_attached:1;    \

    PERL_BITFIELD16 op_spare:3;        \

    U8        op_flags;        \

    U8        op_private;

#endif

用来记录操作表达式。

因为我就写了一句话,后面什么也没干。

也就没什么可跟的了。

跟的过程中,可以清楚地看到,如果在lex中,没有找到什么yacc 感兴趣的东西,lex就把这些东西吞掉了。

主要就是这句:

parser->yychar = yylex();

===========

不过,perl的解释器的确是我所见过的最复杂的。

lex 会在开始前,和结束后,生成一些token,发给yacc。

这让我头大了许多。

先到这里吧。以后也不打算写了。实在累人。