Code/Resource
Windows Develop
Linux-Unix program
Internet-Socket-Network
Web Server
Browser Client
Ftp Server
Ftp Client
Browser Plugins
Proxy Server
Email Server
Email Client
WEB Mail
Firewall-Security
Telnet Server
Telnet Client
ICQ-IM-Chat
Search Engine
Sniffer Package capture
Remote Control
xml-soap-webservice
P2P
WEB(ASP,PHP,...)
TCP/IP Stack
SNMP
Grid Computing
SilverLight
DNS
Cluster Service
Network Security
Communication-Mobile
Game Program
Editor
Multimedia program
Graph program
Compiler program
Compress-Decompress algrithms
Crypt_Decrypt algrithms
Mathimatics-Numerical algorithms
MultiLanguage
Disk/Storage
Java Develop
assembly language
Applications
Other systems
Database system
Embeded-SCM Develop
FlashMX/Flex
source in ebook
Delphi VCL
OS Develop
MiddleWare
MPI
MacOS develop
LabView
ELanguage
Software/Tools
E-Books
Artical/Document
HTMLParser.pas
Package: EMailMasterbysuperroyor.rar [view]
Upload User: yjb1804
Upload Date: 2021-01-30
Package Size: 3105k
Code Size: 8k
Category:
Email Server
Development Platform:
Delphi
- unit HtmlParser;
- interface
- uses
- DomCore, HtmlReader, HtmlTags;
- type
- THtmlParser = class
- private
- FHtmlDocument: TDocument;
- FHtmlReader: THtmlReader;
- FCurrentNode: TNode;
- FCurrentTag: THtmlTag;
- function FindDefParent: TElement;
- function FindParent: TElement;
- function FindParentElement(tagList: THtmlTagSet): TElement;
- function FindTableParent: TElement;
- function FindThisElement: TElement;
- function GetMainElement(const tagName: TDomString): TElement;
- procedure ProcessAttributeEnd(Sender: TObject);
- procedure ProcessAttributeStart(Sender: TObject);
- procedure ProcessCDataSection(Sender: TObject);
- procedure ProcessComment(Sender: TObject);
- procedure ProcessDocType(Sender: TObject);
- procedure ProcessElementEnd(Sender: TObject);
- procedure ProcessElementStart(Sender: TObject);
- procedure ProcessEndElement(Sender: TObject);
- procedure ProcessEntityReference(Sender: TObject);
- procedure ProcessTextNode(Sender: TObject);
- public
- constructor Create;
- destructor Destroy; override;
- function parseString(const htmlStr: TDomString): TDocument;
- property HtmlDocument: TDocument read FHtmlDocument;
- end;
- implementation
- const
- htmlTagName = 'html';
- headTagName = 'head';
- bodyTagName = 'body';
- constructor THtmlParser.Create;
- begin
- inherited Create;
- FHtmlReader := THtmlReader.Create;
- with FHtmlReader do
- begin
- OnAttributeEnd := ProcessAttributeEnd;
- OnAttributeStart := ProcessAttributeStart;
- OnCDataSection := ProcessCDataSection;
- OnComment := ProcessComment;
- OnDocType := ProcessDocType;
- OnElementEnd := ProcessElementEnd;
- OnElementStart := ProcessElementStart;
- OnEndElement := ProcessEndElement;
- OnEntityReference := ProcessEntityReference;
- //OnNotation := ProcessNotation;
- //OnProcessingInstruction := ProcessProcessingInstruction;
- OnTextNode := ProcessTextNode;
- end
- end;
- destructor THtmlParser.Destroy;
- begin
- FHtmlReader.Free;
- inherited Destroy
- end;
- function THtmlParser.FindDefParent: TElement;
- begin
- if FCurrentTag.Number in [HEAD_TAG, BODY_TAG] then
- Result := FHtmlDocument.appendChild(FHtmlDocument.createElement(htmlTagName)) as TElement
- else
- if FCurrentTag.Number in HeadTags then
- Result := GetMainElement(headTagName)
- else
- Result := GetMainElement(bodyTagName)
- end;
- function THtmlParser.FindParent: TElement;
- begin
- if (FCurrentTag.Number = P_TAG) or (FCurrentTag.Number in BlockTags) then
- Result := FindParentElement(BlockParentTags)
- else
- if FCurrentTag.Number = LI_TAG then
- Result := FindParentElement(ListItemParentTags)
- else
- if FCurrentTag.Number in [DD_TAG, DT_TAG] then
- Result := FindParentElement(DefItemParentTags)
- else
- if FCurrentTag.Number in [TD_TAG, TH_TAG] then
- Result := FindParentElement(CellParentTags)
- else
- if FCurrentTag.Number = TR_TAG then
- Result := FindParentElement(RowParentTags)
- else
- if FCurrentTag.Number = COL_TAG then
- Result := FindParentElement(ColParentTags)
- else
- if FCurrentTag.Number in [COLGROUP_TAG, THEAD_TAG, TFOOT_TAG, TBODY_TAG] then
- Result := FindParentElement(TableSectionParentTags)
- else
- if FCurrentTag.Number = TABLE_TAG then
- Result := FindTableParent
- else
- if FCurrentTag.Number = OPTION_TAG then
- Result := FindParentElement(OptionParentTags)
- else
- if FCurrentTag.Number in [HEAD_TAG, BODY_TAG] then
- Result := FHtmlDocument.documentElement as TElement
- else
- Result := nil;
- if Result = nil then
- Result := FindDefParent
- end;
- function THtmlParser.FindParentElement(tagList: THtmlTagSet): TElement;
- var
- Node: TNode;
- HtmlTag: THtmlTag;
- begin
- Node := FCurrentNode;
- while Node.nodeType = ELEMENT_NODE do
- begin
- HtmlTag := HtmlTagList.GetTagByName(Node.nodeName);
- if HtmlTag.Number in tagList then
- begin
- Result := Node as TElement;
- Exit
- end;
- Node := Node.parentNode
- end;
- Result := nil
- end;
- function THtmlParser.FindTableParent: TElement;
- var
- Node: TNode;
- HtmlTag: THtmlTag;
- begin
- Node := FCurrentNode;
- while Node.nodeType = ELEMENT_NODE do
- begin
- HtmlTag := HtmlTagList.GetTagByName(Node.nodeName);
- if (HtmlTag.Number = TD_TAG) or (HtmlTag.Number in BlockTags) then
- begin
- Result := Node as TElement;
- Exit
- end;
- Node := Node.parentNode
- end;
- Result := GetMainElement(bodyTagName)
- end;
- function THtmlParser.FindThisElement: TElement;
- var
- Node: TNode;
- begin
- Node := FCurrentNode;
- while Node.nodeType = ELEMENT_NODE do
- begin
- Result := Node as TElement;
- if Result.tagName = FHtmlReader.nodeName then
- Exit;
- Node := Node.parentNode
- end;
- Result := nil
- end;
- function THtmlParser.GetMainElement(const tagName: TDomString): TElement;
- var
- child: TNode;
- I: Integer;
- begin
- if FHtmlDocument.documentElement = nil then
- FHtmlDocument.appendChild(FHtmlDocument.createElement(htmlTagName));
- for I := 0 to FHtmlDocument.documentElement.childNodes.length - 1 do
- begin
- child := FHtmlDocument.documentElement.childNodes.item(I);
- if (child.nodeType = ELEMENT_NODE) and (child.nodeName = tagName) then
- begin
- Result := child as TElement;
- Exit
- end
- end;
- Result := FHtmlDocument.createElement(tagName);
- FHtmlDocument.documentElement.appendChild(Result)
- end;
- procedure THtmlParser.ProcessAttributeEnd(Sender: TObject);
- begin
- FCurrentNode := (FCurrentNode as TAttr).ownerElement
- end;
- procedure THtmlParser.ProcessAttributeStart(Sender: TObject);
- var
- Attr: TAttr;
- begin
- Attr := FHtmlDocument.createAttribute((Sender as THtmlReader).nodeName);
- (FCurrentNode as TElement).setAttributeNode(Attr);
- FCurrentNode := Attr
- end;
- procedure THtmlParser.ProcessCDataSection(Sender: TObject);
- var
- CDataSection: TCDataSection;
- begin
- CDataSection := FHtmlDocument.createCDATASection(FHtmlReader.nodeValue);
- FCurrentNode.appendChild(CDataSection)
- end;
- procedure THtmlParser.ProcessComment(Sender: TObject);
- var
- Comment: TComment;
- begin
- Comment := FHtmlDocument.createComment(FHtmlReader.nodeValue);
- FCurrentNode.appendChild(Comment)
- end;
- procedure THtmlParser.ProcessDocType(Sender: TObject);
- begin
- with FHtmlReader do
- FHtmlDocument.docType := DomImplementation.createDocumentType(nodeName, publicID, systemID);
- end;
- procedure THtmlParser.ProcessElementEnd(Sender: TObject);
- begin
- if FHtmlReader.isEmptyElement or (FCurrentTag.Number in EmptyTags) then
- FCurrentNode := FCurrentNode.parentNode;
- FCurrentTag := nil
- end;
- procedure THtmlParser.ProcessElementStart(Sender: TObject);
- var
- Element: TElement;
- Parent: TNode;
- begin
- FCurrentTag := HtmlTagList.GetTagByName(FHtmlReader.nodeName);
- if FCurrentTag.Number in NeedFindParentTags + BlockTags then
- begin
- Parent := FindParent;
- if not Assigned(Parent) then
- raise DomException.Create(HIERARCHY_REQUEST_ERR);
- FCurrentNode := Parent
- end;
- Element := FHtmlDocument.createElement(FHtmlReader.nodeName);
- FCurrentNode.appendChild(Element);
- FCurrentNode := Element
- end;
- procedure THtmlParser.ProcessEndElement(Sender: TObject);
- var
- Element: TElement;
- begin
- Element := FindThisElement;
- if Assigned(Element) then
- FCurrentNode := Element.parentNode
- { else
- if IsBlockTagName(FHtmlReader.nodeName) then
- raise DomException.Create(HIERARCHY_REQUEST_ERR)}
- end;
- procedure THtmlParser.ProcessEntityReference(Sender: TObject);
- var
- EntityReference: TEntityReference;
- begin
- EntityReference := FHtmlDocument.createEntityReference(FHtmlReader.nodeName);
- FCurrentNode.appendChild(EntityReference)
- end;
- procedure THtmlParser.ProcessTextNode(Sender: TObject);
- var
- TextNode: TTextNode;
- begin
- TextNode := FHtmlDocument.createTextNode(FHtmlReader.nodeValue);
- FCurrentNode.appendChild(TextNode)
- end;
- function THtmlParser.parseString(const htmlStr: TDomString): TDocument;
- begin
- FHtmlReader.htmlStr := htmlStr;
- FHtmlDocument := DomImplementation.createEmptyDocument(nil);
- FCurrentNode := FHtmlDocument;
- try
- while FHtmlReader.Read do;
- except
- // TODO: Add event ?
- end;
- Result := FHtmlDocument
- end;
- end.