示例:字元串搜尋
說明:
搜尋比對一個模式的字元串是一個常見問題。正規表達式是描述字元串模式的一種标準語言。與其為每一個正規表達式都構造一個特定的算法,不如使用一種通用的搜尋算法來解釋執行一個正規表達式,該正規表達式定義了待比對字元串的集合。
(1)、文法
正規表達式用下列文法定義:
expression ::= literal | alternation | sequence | repetition | '(' expression ')'
alternation ::= expression '|' expression
sequence ::= expression '&' expression
repetition ::= expression 'repeat'
literal ::= 'a' | 'b' | 'c'| …… {'a' | 'b' | 'c' | ……}*
(2)、表示:文法元素對象。
![](https://img.laitimes.com/img/_0nNw4CM6IyYiwiM6ICdiwiIml2ZuY1MEVUN1AzMwYTN5ITMfBzLcFjMvwVMwETMwIzLcRnbl1GajFGd0F2LcRXZu5ibkN3YukGavw1LcpDc0RHaiojIsJye.gif)
(3)、解釋器
Match操作實作了一個正規表達式的一個解釋器。定義抽象文法樹的每一個類都實作了這一操作。
(4)、句子
例如:正規表達式"( ( 'dog' | 'cat' ) repeat &'weather')"比對輸入字元串"dog dog cat weather"。
代碼:
unit uStringExpression;
interface
uses
Classes,Contnrs;
type
{文字流}
TStream1 = class
private
FData: string;
FPosition: Integer;
public
constructor Create(AData: string);
//---
function Copy: TStream1;
{讀入文字,推進輸入流}
function NextAvailable(ASize: integer): string;
procedure Next(const AStep: integer = 1);
function Eof:boolean;
//---
property Data: string read FData;
property Position: Integer read FPosition;
end;
{流狀态清單}
TState = class(TObjectList)
private
function GetStream(Index: integer): TStream1;
public
constructor Create;
destructor Destroy; override;
//---
procedure Add(AStream: TStream1);
procedure AddAll(AState: TState);
function Copy: TState;
//---
property Stream[Index: integer]: TStream1 read GetStream; default;
end;
TRegularExpression = class;
TNode1 = class
public
function asRExp: TRegularExpression; virtual; abstract;
end;
TString1 = class(TNode1)
private
FLiterals: string;
public
constructor Create(const ALiterals: string);
//---
function Size: integer;
function Equal(const ALiterals: string): Boolean;
//---
{'&'運算}
function Sequence(ANode: TNode1): TRegularExpression;
{'*'運算}
function Repetition: TRegularExpression;
{'|'運算}
function Alternation(ANode: TNode1): TRegularExpression;
//---
function asRExp: TRegularExpression; override;
end;
{表達式}
TRegularExpression = class(TNode1)
public
function Match(AInputState: TState): TState; virtual; abstract;
//---
{'&'運算}
function Sequence(ANode: TNode1): TRegularExpression;
{'*'運算}
function Repetition: TRegularExpression;
{'|'運算}
function Alternation(ANode: TNode1): TRegularExpression;
//---
function asRExp: TRegularExpression; override;
end;
{字元常量}
TLiteralExpression = class(TRegularExpression)
private
FComponents: TString1;
public
constructor Create(AComponents: TString1);
destructor Destroy; override;
//---
function Match(AInputState: TState): TState; override;
end;
{'|'運算}
TAlternationExpression = class(TRegularExpression)
private
FAlternative1: TRegularExpression;
FAlternative2: TRegularExpression;
public
constructor Create(Alternative1,Alternative2: TRegularExpression);
destructor Destroy; override;
//---
function Match(AInputState: TState): TState; override;
end;
{'*'運算}
TRepetitionExpression = class(TRegularExpression)
private
FRepetition: TRegularExpression;
public
constructor Create(ARepetition: TRegularExpression);
destructor Destroy; override;
//---
function Match(AInputState: TState): TState; override;
end;
{'&'運算}
TSequenceExpression = class(TRegularExpression)
private
FExpression1: TRegularExpression;
FExpression2: TRegularExpression;
public
constructor Create(AExpression1,AExpression2: TRegularExpression);
destructor Destroy; override;
//---
function Match(AInputState: TState): TState; override;
end;
implementation
var
FTempStates: TObjectList;
constructor TLiteralExpression.Create(AComponents: TString1);
begin
FComponents := AComponents;
end;
destructor TLiteralExpression.Destroy;
begin
FComponents.Free;
//---
inherited;
end;
function TLiteralExpression.Match(AInputState: TState): TState;
var
AFinalState: TState;
AStream: TStream1;
i: Integer;
begin
AFinalState := TState.Create;
//---
with AInputState do
begin
for i := 0 to Count - 1 do
begin
AStream := Stream[i].Copy;
if FComponents.Equal(AStream.NextAvailable(FComponents.Size)) then
AFinalState.Add(AStream)
else
AStream.Free;
end;
end;
//---
Result := AFinalState;
end;
{ TSequenceExpression }
constructor TSequenceExpression.Create(AExpression1,AExpression2:
TRegularExpression);
begin
FExpression1 := AExpression1;
FExpression2 := AExpression2;
end;
destructor TSequenceExpression.Destroy;
begin
FExpression1.Free;
FExpression2.Free;
//---
inherited;
end;
function TSequenceExpression.Match(AInputState: TState): TState;
begin
Result := FExpression2.Match(FExpression1.Match(AInputState));
end;
{ TAlternationExpression }
constructor TAlternationExpression.Create(Alternative1,Alternative2:
TRegularExpression);
begin
FAlternative1 := Alternative1;
FAlternative2 := Alternative2;
end;
destructor TAlternationExpression.Destroy;
begin
FAlternative1.Free;
FAlternative2.Free;
//---
inherited;
end;
function TAlternationExpression.Match(AInputState: TState): TState;
var
AFinalState: TState;
begin
AFinalState := FAlternative1.Match(AInputState);
AFinalState.AddAll(FAlternative2.Match(AInputState));
//---
Result := AFinalState;
end;
{ TRepetitionExpression }
constructor TRepetitionExpression.Create(ARepetition: TRegularExpression);
begin
FRepetition := ARepetition;
end;
destructor TRepetitionExpression.Destroy;
begin
FRepetition.Free;
//---
inherited;
end;
function TRepetitionExpression.Match(AInputState: TState): TState;
var
AState,AFinalState: TState;
begin
AState := AInputState;
AFinalState := AInputState.Copy;
while AState.Count > 0 do
begin
AState := FRepetition.Match(aState);
AFinalState.AddAll(aState);
end;
//---
Result := AFinalState;
end;
constructor TStream1.Create(AData: string);
begin
FData := AData;
FPosition := 1;
end;
function TStream1.Copy: TStream1;
begin
Result := TStream1.Create(FData);
Result.FPosition := self.FPosition;
end;
function TStream1.NextAvailable(ASize: integer): string;
//---去掉空白字元
procedure _SkipBlanks;
begin
while FPosition <= Length(FData) do
begin
if FData[FPosition] = ' ' then
inc(FPosition)
else
break;
end;
end;
begin
_SkipBlanks;
//---
if FPosition + ASize - 1 > Length(FData) then
Result := ''
else
begin
Result := System.Copy(FData,FPosition,ASize);
FPosition := FPosition + ASize;
end;
end;
{ TState }
procedure TState.Add(AStream: TStream1);
begin
inherited Add(AStream);
end;
procedure TState.AddAll(AState: TState);
var
i: Integer;
begin
with AState do
begin
for i := 0 to Count - 1 do
self.Add(Stream[i].Copy);
end;
end;
function TState.Copy: TState;
begin
Result := TState.Create;
Result.AddAll(self);
end;
constructor TState.Create;
begin
inherited;
//---
FTempStates.Add(self);
end;
destructor TState.Destroy;
begin
if not FTempStates.OwnsObjects then
FTempStates.Remove(self);
//---
inherited;
end;
function TState.GetStream(Index: integer): TStream1;
begin
Result := TStream1(Items[Index]);
end;
function TRegularExpression.Sequence(ANode: TNode1): TRegularExpression;
begin
Result := TSequenceExpression.Create(self,ANode.asRExp);
end;
function TRegularExpression.Repetition: TRegularExpression;
begin
Result := TRepetitionExpression.Create(self);
end;
function TRegularExpression.Alternation(ANode: TNode1): TRegularExpression;
begin
Result := TAlternationExpression.Create(self,ANode.asRExp);
end;
function TRegularExpression.asRExp: TRegularExpression;
begin
Result := self;
end;
constructor TString1.Create(const ALiterals: string);
begin
FLiterals := ALiterals;
end;
function TString1.Alternation(ANode: TNode1): TRegularExpression;
begin
Result := TAlternationExpression.Create(self.asRExp,ANode.asRExp);
end;
function TString1.asRExp: TRegularExpression;
begin
Result := TLiteralExpression.Create(self);
end;
function TString1.Size: integer;
begin
Result := Length(FLiterals);
end;
function TString1.Equal(const ALiterals: string): Boolean;
begin
Result := FLiterals = ALiterals;
end;
function TString1.Repetition: TRegularExpression;
begin
Result := TRepetitionExpression.Create(self.asRExp);
end;
function TString1.Sequence(ANode: TNode1): TRegularExpression;
begin
Result := TSequenceExpression.Create(self.asRExp,ANode.asRExp);
end;
procedure TStream1.Next(const AStep: integer = 1);
begin
FPosition := FPosition + AStep;
end;
function TStream1.Eof: boolean;
begin
Result := FPosition > Length(FData);
end;
initialization
FTempStates := TObjectList.Create;
FTempStates.OwnsObjects := false;
finalization
FTempStates.OwnsObjects := true;
FTempStates.Free;
end.
procedure TForm1.Button1Click(Sender: TObject);
{正規表達式: ( ( 'dog' | 'cat' ) repeat & 'weather') }
var
AExpression: TRegularExpression;
AInState,AOutState: TState;
begin
Memo1.Clear;
//---
AExpression := TString1.Create('dog').Alternation(TString1.Create('cat')).Repetition.Sequence(TString1.Create('weather'));
try
AInState := TState.Create;
try
AInState.Add(TStream1.Create('dog dog cat weather'));
//---
AOutState := AExpression.Match(AInState);
if AOutState.Count > 0 then
Memo1.Text := '字元串比對';
AOutState.Free;
finally
AInState.Free;
end;
finally
AExpression.Free;
end;
end;
procedure TForm1.Button2Click(Sender: TObject);
{正規表達式: ( ( 'dog' | 'cat' ) repeat & 'weather') }
var
AExpression: TRegularExpression;
AInStream: TStream1;
AStartPosition,AEndPosition: integer;
//---
function _FindString(AInStream: TStream1): integer;
var
AInState,AOutState: TState;
begin
AInState := TState.Create;
try
AInState.Add(AInStream.Copy);
//---
AOutState := AExpression.Match(AInState);
if AOutState.Count > 0 then
Result := AOutState[0].Position
else
Result := 0;
AOutState.Free;
finally
AInState.Free;
end;
end;
begin
AExpression := TString1.Create('dog').Alternation(TString1.Create('cat')).Repetition.Sequence(TString1.Create('weather'));
try
AInStream := TStream1.Create('12345 dog dog cat weather 12345 cat weather 12345 dog weather cat weather');
try
Memo1.Clear;
//---
with AInStream do
begin
while not Eof do
begin
AStartPosition := Position;
AEndPosition := _FindString(AInStream);
if AEndPosition > 0 then
begin
Memo1.Lines.Add(format('起始位置:%.2d 内容:%s', [AStartPosition,System.Copy(Data,AStartPosition,AEndPosition - AStartPosition + 1)]));
Next(AEndPosition - AStartPosition + 1);
end
else
Next;
end;
end;
finally
AInStream.Free;
end;
finally
AExpression.Free;
end;
end;