天天看點

html解析器c#,基于C# 的HTML解析器

namespace ParseHTML

{

public class Tag

{

private string m_TagName = "";

private string m_FollowedText = "";

private ArrayList m_TagAttributes = new ArrayList();

public string TagName

{

set

{

m_TagName = value;

}

get

{

return m_TagName;

}

}

public string FollowedText

{

set

{

m_FollowedText = value;

}

get

{

return m_FollowedText;

}

}

public ArrayList TagAttributes

{

set

{

m_TagAttributes = value;

}

get

{

return m_TagAttributes;

}

}

}

public class ParseHTML : Parse

{

public AttributeList GetTag()

{

AttributeList tag = new AttributeList();

tag.Name = m_tag;

foreach (Attribute x in List)

{

tag.Add((Attribute)x.Clone());

}

return tag;

}

public String BuildTag()

{

String buffer = "

buffer += m_tag;

int i = 0;

while (this[i] != null)

{// has attributes

buffer += " ";

if (this[i].Value == null)

{

if (this[i].Delim != 0)

buffer += this[i].Delim;

buffer += this[i].Name;

if (this[i].Delim != 0)

buffer += this[i].Delim;

}

else

{

buffer += this[i].Name;

if (this[i].Value != null)

{

buffer += "=";

if (this[i].Delim != 0)

buffer += this[i].Delim;

buffer += this[i].Value;

if (this[i].Delim != 0)

buffer += this[i].Delim;

}

}

i++;

}

buffer += ">";

return buffer;

}

protected void ParseTag()

{

m_tag = "";

Clear();

// Is it a comment?

if ((GetCurrentChar() == '!') &&

(GetCurrentChar(1) == '-') &&

(GetCurrentChar(2) == '-'))

{

while (!Eof())

{

if ((GetCurrentChar() == '-') &&

(GetCurrentChar(1) == '-') &&

(GetCurrentChar(2) == '>'))

break;

if (GetCurrentChar() != '\r')

m_tag += GetCurrentChar();

Advance();

}

m_tag += "--";

Advance();

Advance();

Advance();

ParseDelim = (char)0;

return;

}

while (!Eof())

{

if (IsWhiteSpace(GetCurrentChar()) || (GetCurrentChar() == '>'))

break;

m_tag += GetCurrentChar();

Advance();

}

EatWhiteSpace();

while (GetCurrentChar() != '>')

{

ParseName = "";

ParseValue = "";

ParseDelim = (char)0;

ParseAttributeName();

if (GetCurrentChar() == '>')

{

AddAttribute();

break;

}

// Get the value(if any)

ParseAttributeValue();

AddAttribute();

}

Advance();

}

public char Parse()

{

if (GetCurrentChar() == '

{

Advance();

char ch = char.ToUpper(GetCurrentChar());

if ((ch >= 'A') && (ch <= 'Z') || (ch == '!') || (ch == '/'))

{

ParseTag();

return (char)0;

}

else return (AdvanceCurrentChar());

}

else return (AdvanceCurrentChar());

}

}

public class Attribute : ICloneable

{

private string m_name;

private string m_value;

private char m_delim;

public Attribute(string name, string value, char delim)

{

m_name = name;

m_value = value;

m_delim = delim;

}

public Attribute()

: this("", "", (char)0)

{

}

public Attribute(String name, String value)

: this(name, value, (char)0)

{

}

public char Delim

{

get

{

return m_delim;

}

set

{

m_delim = value;

}

}

public string Name

{

get

{

return m_name;

}

set

{

m_name = value;

}

}

public string Value

{

get

{

return m_value;

}

set

{

m_value = value;

}

}

#region ICloneable Members

public virtual object Clone()

{

return new Attribute(m_name, m_value, m_delim);

}

#endregion

}

public class AttributeList : Attribute

{

protected ArrayList m_list;

public override Object Clone()

{

AttributeList rtn = new AttributeList();

for (int i = 0; i < m_list.Count; i++)

rtn.Add((Attribute)this[i].Clone());

return rtn;

}

public AttributeList()

: base("", "")

{

m_list = new ArrayList();

}

public void Add(Attribute a)

{

m_list.Add(a);

}

public void Clear()

{

m_list.Clear();

}

public bool IsEmpty()

{

return (m_list.Count <= 0);

}

public void Set(string name, string value)

{

if (name == null)

return;

if (value == null)

value = "";

Attribute a = this[name];

if (a == null)

{

a = new Attribute(name, value);

Add(a);

}

else

a.Value = value;

}

public int Count         {             get             {                 return m_list.Count;             }         }