PK
, , and blocks) we # want to drop leading newlines data = token["data"] self.processSpaceCharacters = self.processSpaceCharactersNonPre if (data.startswith("\n") and self.tree.openElements[-1].name in ("pre", "listing", "textarea") and not self.tree.openElements[-1].hasContent()): data = data[1:] if data: self.tree.reconstructActiveFormattingElements() self.tree.insertText(data) def processCharacters(self, token): if token["data"] == "\u0000": # The tokenizer should always emit null on its own return self.tree.reconstructActiveFormattingElements() self.tree.insertText(token["data"]) # This must be bad for performance if (self.parser.framesetOK and any([char not in spaceCharacters for char in token["data"]])): self.parser.framesetOK = False def processSpaceCharactersNonPre(self, token): self.tree.reconstructActiveFormattingElements() self.tree.insertText(token["data"]) def startTagProcessInHead(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagBody(self, token): self.parser.parseError("unexpected-start-tag", {"name": "body"}) if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): assert self.parser.innerHTML else: self.parser.framesetOK = False for attr, value in token["data"].items(): if attr not in self.tree.openElements[1].attributes: self.tree.openElements[1].attributes[attr] = value def startTagFrameset(self, token): self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): assert self.parser.innerHTML elif not self.parser.framesetOK: pass else: if self.tree.openElements[1].parent: self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) while self.tree.openElements[-1].name != "html": self.tree.openElements.pop() self.tree.insertElement(token) self.parser.phase = self.parser.phases["inFrameset"] def startTagCloseP(self, token): if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) self.tree.insertElement(token) def startTagPreListing(self, token): if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) self.tree.insertElement(token) self.parser.framesetOK = False self.processSpaceCharacters = self.processSpaceCharactersDropNewline def startTagForm(self, token): if self.tree.formPointer: self.parser.parseError("unexpected-start-tag", {"name": "form"}) else: if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) self.tree.insertElement(token) self.tree.formPointer = self.tree.openElements[-1] def startTagListItem(self, token): self.parser.framesetOK = False stopNamesMap = {"li": ["li"], "dt": ["dt", "dd"], "dd": ["dt", "dd"]} stopNames = stopNamesMap[token["name"]] for node in reversed(self.tree.openElements): if node.name in stopNames: self.parser.phase.processEndTag( impliedTagToken(node.name, "EndTag")) break if (node.nameTuple in specialElements and node.name not in ("address", "div", "p")): break if self.tree.elementInScope("p", variant="button"): self.parser.phase.processEndTag( impliedTagToken("p", "EndTag")) self.tree.insertElement(token) def startTagPlaintext(self, token): if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) self.tree.insertElement(token) self.parser.tokenizer.state = self.parser.tokenizer.plaintextState def startTagHeading(self, token): if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) if self.tree.openElements[-1].name in headingElements: self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) self.tree.openElements.pop() self.tree.insertElement(token) def startTagA(self, token): afeAElement = self.tree.elementInActiveFormattingElements("a") if afeAElement: self.parser.parseError("unexpected-start-tag-implies-end-tag", {"startName": "a", "endName": "a"}) self.endTagFormatting(impliedTagToken("a")) if afeAElement in self.tree.openElements: self.tree.openElements.remove(afeAElement) if afeAElement in self.tree.activeFormattingElements: self.tree.activeFormattingElements.remove(afeAElement) self.tree.reconstructActiveFormattingElements() self.addFormattingElement(token) def startTagFormatting(self, token): self.tree.reconstructActiveFormattingElements() self.addFormattingElement(token) def startTagNobr(self, token): self.tree.reconstructActiveFormattingElements() if self.tree.elementInScope("nobr"): self.parser.parseError("unexpected-start-tag-implies-end-tag", {"startName": "nobr", "endName": "nobr"}) self.processEndTag(impliedTagToken("nobr")) # XXX Need tests that trigger the following self.tree.reconstructActiveFormattingElements() self.addFormattingElement(token) def startTagButton(self, token): if self.tree.elementInScope("button"): self.parser.parseError("unexpected-start-tag-implies-end-tag", {"startName": "button", "endName": "button"}) self.processEndTag(impliedTagToken("button")) return token else: self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) self.parser.framesetOK = False def startTagAppletMarqueeObject(self, token): self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) self.tree.activeFormattingElements.append(Marker) self.parser.framesetOK = False def startTagXmp(self, token): if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) self.tree.reconstructActiveFormattingElements() self.parser.framesetOK = False self.parser.parseRCDataRawtext(token, "RAWTEXT") def startTagTable(self, token): if self.parser.compatMode != "quirks": if self.tree.elementInScope("p", variant="button"): self.processEndTag(impliedTagToken("p")) self.tree.insertElement(token) self.parser.framesetOK = False self.parser.phase = self.parser.phases["inTable"] def startTagVoidFormatting(self, token): self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True self.parser.framesetOK = False def startTagInput(self, token): framesetOK = self.parser.framesetOK self.startTagVoidFormatting(token) if ("type" in token["data"] and token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): # input type=hidden doesn't change framesetOK self.parser.framesetOK = framesetOK def startTagParamSource(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagHr(self, token): if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True self.parser.framesetOK = False def startTagImage(self, token): # No really... self.parser.parseError("unexpected-start-tag-treated-as", {"originalName": "image", "newName": "img"}) self.processStartTag(impliedTagToken("img", "StartTag", attributes=token["data"], selfClosing=token["selfClosing"])) def startTagIsIndex(self, token): self.parser.parseError("deprecated-tag", {"name": "isindex"}) if self.tree.formPointer: return form_attrs = {} if "action" in token["data"]: form_attrs["action"] = token["data"]["action"] self.processStartTag(impliedTagToken("form", "StartTag", attributes=form_attrs)) self.processStartTag(impliedTagToken("hr", "StartTag")) self.processStartTag(impliedTagToken("label", "StartTag")) # XXX Localization ... if "prompt" in token["data"]: prompt = token["data"]["prompt"] else: prompt = "This is a searchable index. Enter search keywords: " self.processCharacters( {"type": tokenTypes["Characters"], "data": prompt}) attributes = token["data"].copy() if "action" in attributes: del attributes["action"] if "prompt" in attributes: del attributes["prompt"] attributes["name"] = "isindex" self.processStartTag(impliedTagToken("input", "StartTag", attributes=attributes, selfClosing=token["selfClosing"])) self.processEndTag(impliedTagToken("label")) self.processStartTag(impliedTagToken("hr", "StartTag")) self.processEndTag(impliedTagToken("form")) def startTagTextarea(self, token): self.tree.insertElement(token) self.parser.tokenizer.state = self.parser.tokenizer.rcdataState self.processSpaceCharacters = self.processSpaceCharactersDropNewline self.parser.framesetOK = False def startTagIFrame(self, token): self.parser.framesetOK = False self.startTagRawtext(token) def startTagNoscript(self, token): if self.parser.scripting: self.startTagRawtext(token) else: self.startTagOther(token) def startTagRawtext(self, token): """iframe, noembed noframes, noscript(if scripting enabled)""" self.parser.parseRCDataRawtext(token, "RAWTEXT") def startTagOpt(self, token): if self.tree.openElements[-1].name == "option": self.parser.phase.processEndTag(impliedTagToken("option")) self.tree.reconstructActiveFormattingElements() self.parser.tree.insertElement(token) def startTagSelect(self, token): self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) self.parser.framesetOK = False if self.parser.phase in (self.parser.phases["inTable"], self.parser.phases["inCaption"], self.parser.phases["inColumnGroup"], self.parser.phases["inTableBody"], self.parser.phases["inRow"], self.parser.phases["inCell"]): self.parser.phase = self.parser.phases["inSelectInTable"] else: self.parser.phase = self.parser.phases["inSelect"] def startTagRpRt(self, token): if self.tree.elementInScope("ruby"): self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != "ruby": self.parser.parseError() self.tree.insertElement(token) def startTagMath(self, token): self.tree.reconstructActiveFormattingElements() self.parser.adjustMathMLAttributes(token) self.parser.adjustForeignAttributes(token) token["namespace"] = namespaces["mathml"] self.tree.insertElement(token) # Need to get the parse error right for the case where the token # has a namespace not equal to the xmlns attribute if token["selfClosing"]: self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagSvg(self, token): self.tree.reconstructActiveFormattingElements() self.parser.adjustSVGAttributes(token) self.parser.adjustForeignAttributes(token) token["namespace"] = namespaces["svg"] self.tree.insertElement(token) # Need to get the parse error right for the case where the token # has a namespace not equal to the xmlns attribute if token["selfClosing"]: self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagMisplaced(self, token): """ Elements that should be children of other elements that have a different insertion mode; here they are ignored "caption", "col", "colgroup", "frame", "frameset", "head", "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "noscript" """ self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) def startTagOther(self, token): self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) def endTagP(self, token): if not self.tree.elementInScope("p", variant="button"): self.startTagCloseP(impliedTagToken("p", "StartTag")) self.parser.parseError("unexpected-end-tag", {"name": "p"}) self.endTagP(impliedTagToken("p", "EndTag")) else: self.tree.generateImpliedEndTags("p") if self.tree.openElements[-1].name != "p": self.parser.parseError("unexpected-end-tag", {"name": "p"}) node = self.tree.openElements.pop() while node.name != "p": node = self.tree.openElements.pop() def endTagBody(self, token): if not self.tree.elementInScope("body"): self.parser.parseError() return elif self.tree.openElements[-1].name != "body": for node in self.tree.openElements[2:]: if node.name not in frozenset(("dd", "dt", "li", "optgroup", "option", "p", "rp", "rt", "tbody", "td", "tfoot", "th", "thead", "tr", "body", "html")): # Not sure this is the correct name for the parse error self.parser.parseError( "expected-one-end-tag-but-got-another", {"gotName": "body", "expectedName": node.name}) break self.parser.phase = self.parser.phases["afterBody"] def endTagHtml(self, token): # We repeat the test for the body end tag token being ignored here if self.tree.elementInScope("body"): self.endTagBody(impliedTagToken("body")) return token def endTagBlock(self, token): # Put us back in the right whitespace handling mode if token["name"] == "pre": self.processSpaceCharacters = self.processSpaceCharactersNonPre inScope = self.tree.elementInScope(token["name"]) if inScope: self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("end-tag-too-early", {"name": token["name"]}) if inScope: node = self.tree.openElements.pop() while node.name != token["name"]: node = self.tree.openElements.pop() def endTagForm(self, token): node = self.tree.formPointer self.tree.formPointer = None if node is None or not self.tree.elementInScope(node): self.parser.parseError("unexpected-end-tag", {"name": "form"}) else: self.tree.generateImpliedEndTags() if self.tree.openElements[-1] != node: self.parser.parseError("end-tag-too-early-ignored", {"name": "form"}) self.tree.openElements.remove(node) def endTagListItem(self, token): if token["name"] == "li": variant = "list" else: variant = None if not self.tree.elementInScope(token["name"], variant=variant): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) else: self.tree.generateImpliedEndTags(exclude=token["name"]) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError( "end-tag-too-early", {"name": token["name"]}) node = self.tree.openElements.pop() while node.name != token["name"]: node = self.tree.openElements.pop() def endTagHeading(self, token): for item in headingElements: if self.tree.elementInScope(item): self.tree.generateImpliedEndTags() break if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("end-tag-too-early", {"name": token["name"]}) for item in headingElements: if self.tree.elementInScope(item): item = self.tree.openElements.pop() while item.name not in headingElements: item = self.tree.openElements.pop() break def endTagFormatting(self, token): """The much-feared adoption agency algorithm""" # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 # XXX Better parseError messages appreciated. # Step 1 outerLoopCounter = 0 # Step 2 while outerLoopCounter < 8: # Step 3 outerLoopCounter += 1 # Step 4: # Let the formatting element be the last element in # the list of active formatting elements that: # - is between the end of the list and the last scope # marker in the list, if any, or the start of the list # otherwise, and # - has the same tag name as the token. formattingElement = self.tree.elementInActiveFormattingElements( token["name"]) if (not formattingElement or (formattingElement in self.tree.openElements and not self.tree.elementInScope(formattingElement.name))): # If there is no such node, then abort these steps # and instead act as described in the "any other # end tag" entry below. self.endTagOther(token) return # Otherwise, if there is such a node, but that node is # not in the stack of open elements, then this is a # parse error; remove the element from the list, and # abort these steps. elif formattingElement not in self.tree.openElements: self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) self.tree.activeFormattingElements.remove(formattingElement) return # Otherwise, if there is such a node, and that node is # also in the stack of open elements, but the element # is not in scope, then this is a parse error; ignore # the token, and abort these steps. elif not self.tree.elementInScope(formattingElement.name): self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) return # Otherwise, there is a formatting element and that # element is in the stack and is in scope. If the # element is not the current node, this is a parse # error. In any case, proceed with the algorithm as # written in the following steps. else: if formattingElement != self.tree.openElements[-1]: self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) # Step 5: # Let the furthest block be the topmost node in the # stack of open elements that is lower in the stack # than the formatting element, and is an element in # the special category. There might not be one. afeIndex = self.tree.openElements.index(formattingElement) furthestBlock = None for element in self.tree.openElements[afeIndex:]: if element.nameTuple in specialElements: furthestBlock = element break # Step 6: # If there is no furthest block, then the UA must # first pop all the nodes from the bottom of the stack # of open elements, from the current node up to and # including the formatting element, then remove the # formatting element from the list of active # formatting elements, and finally abort these steps. if furthestBlock is None: element = self.tree.openElements.pop() while element != formattingElement: element = self.tree.openElements.pop() self.tree.activeFormattingElements.remove(element) return # Step 7 commonAncestor = self.tree.openElements[afeIndex - 1] # Step 8: # The bookmark is supposed to help us identify where to reinsert # nodes in step 15. We have to ensure that we reinsert nodes after # the node before the active formatting element. Note the bookmark # can move in step 9.7 bookmark = self.tree.activeFormattingElements.index(formattingElement) # Step 9 lastNode = node = furthestBlock innerLoopCounter = 0 index = self.tree.openElements.index(node) while innerLoopCounter < 3: innerLoopCounter += 1 # Node is element before node in open elements index -= 1 node = self.tree.openElements[index] if node not in self.tree.activeFormattingElements: self.tree.openElements.remove(node) continue # Step 9.6 if node == formattingElement: break # Step 9.7 if lastNode == furthestBlock: bookmark = self.tree.activeFormattingElements.index(node) + 1 # Step 9.8 clone = node.cloneNode() # Replace node with clone self.tree.activeFormattingElements[ self.tree.activeFormattingElements.index(node)] = clone self.tree.openElements[ self.tree.openElements.index(node)] = clone node = clone # Step 9.9 # Remove lastNode from its parents, if any if lastNode.parent: lastNode.parent.removeChild(lastNode) node.appendChild(lastNode) # Step 9.10 lastNode = node # Step 10 # Foster parent lastNode if commonAncestor is a # table, tbody, tfoot, thead, or tr we need to foster # parent the lastNode if lastNode.parent: lastNode.parent.removeChild(lastNode) if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): parent, insertBefore = self.tree.getTableMisnestedNodePosition() parent.insertBefore(lastNode, insertBefore) else: commonAncestor.appendChild(lastNode) # Step 11 clone = formattingElement.cloneNode() # Step 12 furthestBlock.reparentChildren(clone) # Step 13 furthestBlock.appendChild(clone) # Step 14 self.tree.activeFormattingElements.remove(formattingElement) self.tree.activeFormattingElements.insert(bookmark, clone) # Step 15 self.tree.openElements.remove(formattingElement) self.tree.openElements.insert( self.tree.openElements.index(furthestBlock) + 1, clone) def endTagAppletMarqueeObject(self, token): if self.tree.elementInScope(token["name"]): self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("end-tag-too-early", {"name": token["name"]}) if self.tree.elementInScope(token["name"]): element = self.tree.openElements.pop() while element.name != token["name"]: element = self.tree.openElements.pop() self.tree.clearActiveFormattingElements() def endTagBr(self, token): self.parser.parseError("unexpected-end-tag-treated-as", {"originalName": "br", "newName": "br element"}) self.tree.reconstructActiveFormattingElements() self.tree.insertElement(impliedTagToken("br", "StartTag")) self.tree.openElements.pop() def endTagOther(self, token): for node in self.tree.openElements[::-1]: if node.name == token["name"]: self.tree.generateImpliedEndTags(exclude=token["name"]) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while self.tree.openElements.pop() != node: pass break else: if node.nameTuple in specialElements: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) break class TextPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("script", self.endTagScript)]) self.endTagHandler.default = self.endTagOther def processCharacters(self, token): self.tree.insertText(token["data"]) def processEOF(self): self.parser.parseError("expected-named-closing-tag-but-got-eof", {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() self.parser.phase = self.parser.originalPhase return True def startTagOther(self, token): assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] def endTagScript(self, token): node = self.tree.openElements.pop() assert node.name == "script" self.parser.phase = self.parser.originalPhase # The rest of this method is all stuff that only happens if # document.write works def endTagOther(self, token): self.tree.openElements.pop() self.parser.phase = self.parser.originalPhase class InTablePhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-table def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("caption", self.startTagCaption), ("colgroup", self.startTagColgroup), ("col", self.startTagCol), (("tbody", "tfoot", "thead"), self.startTagRowGroup), (("td", "th", "tr"), self.startTagImplyTbody), ("table", self.startTagTable), (("style", "script"), self.startTagStyleScript), ("input", self.startTagInput), ("form", self.startTagForm) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("table", self.endTagTable), (("body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"), self.endTagIgnore) ]) self.endTagHandler.default = self.endTagOther # helper methods def clearStackToTableContext(self): # "clear the stack back to a table context" while self.tree.openElements[-1].name not in ("table", "html"): # self.parser.parseError("unexpected-implied-end-tag-in-table", # {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() # When the current node is it's an innerHTML case # processing methods def processEOF(self): if self.tree.openElements[-1].name != "html": self.parser.parseError("eof-in-table") else: assert self.parser.innerHTML # Stop parsing def processSpaceCharacters(self, token): originalPhase = self.parser.phase self.parser.phase = self.parser.phases["inTableText"] self.parser.phase.originalPhase = originalPhase self.parser.phase.processSpaceCharacters(token) def processCharacters(self, token): originalPhase = self.parser.phase self.parser.phase = self.parser.phases["inTableText"] self.parser.phase.originalPhase = originalPhase self.parser.phase.processCharacters(token) def insertText(self, token): # If we get here there must be at least one non-whitespace character # Do the table magic! self.tree.insertFromTable = True self.parser.phases["inBody"].processCharacters(token) self.tree.insertFromTable = False def startTagCaption(self, token): self.clearStackToTableContext() self.tree.activeFormattingElements.append(Marker) self.tree.insertElement(token) self.parser.phase = self.parser.phases["inCaption"] def startTagColgroup(self, token): self.clearStackToTableContext() self.tree.insertElement(token) self.parser.phase = self.parser.phases["inColumnGroup"] def startTagCol(self, token): self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) return token def startTagRowGroup(self, token): self.clearStackToTableContext() self.tree.insertElement(token) self.parser.phase = self.parser.phases["inTableBody"] def startTagImplyTbody(self, token): self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) return token def startTagTable(self, token): self.parser.parseError("unexpected-start-tag-implies-end-tag", {"startName": "table", "endName": "table"}) self.parser.phase.processEndTag(impliedTagToken("table")) if not self.parser.innerHTML: return token def startTagStyleScript(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagInput(self, token): if ("type" in token["data"] and token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): self.parser.parseError("unexpected-hidden-input-in-table") self.tree.insertElement(token) # XXX associate with form self.tree.openElements.pop() else: self.startTagOther(token) def startTagForm(self, token): self.parser.parseError("unexpected-form-in-table") if self.tree.formPointer is None: self.tree.insertElement(token) self.tree.formPointer = self.tree.openElements[-1] self.tree.openElements.pop() def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) # Do the table magic! self.tree.insertFromTable = True self.parser.phases["inBody"].processStartTag(token) self.tree.insertFromTable = False def endTagTable(self, token): if self.tree.elementInScope("table", variant="table"): self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != "table": self.parser.parseError("end-tag-too-early-named", {"gotName": "table", "expectedName": self.tree.openElements[-1].name}) while self.tree.openElements[-1].name != "table": self.tree.openElements.pop() self.tree.openElements.pop() self.parser.resetInsertionMode() else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) # Do the table magic! self.tree.insertFromTable = True self.parser.phases["inBody"].processEndTag(token) self.tree.insertFromTable = False class InTableTextPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.originalPhase = None self.characterTokens = [] def flushCharacters(self): data = "".join([item["data"] for item in self.characterTokens]) if any([item not in spaceCharacters for item in data]): token = {"type": tokenTypes["Characters"], "data": data} self.parser.phases["inTable"].insertText(token) elif data: self.tree.insertText(data) self.characterTokens = [] def processComment(self, token): self.flushCharacters() self.parser.phase = self.originalPhase return token def processEOF(self): self.flushCharacters() self.parser.phase = self.originalPhase return True def processCharacters(self, token): if token["data"] == "\u0000": return self.characterTokens.append(token) def processSpaceCharacters(self, token): # pretty sure we should never reach here self.characterTokens.append(token) # assert False def processStartTag(self, token): self.flushCharacters() self.parser.phase = self.originalPhase return token def processEndTag(self, token): self.flushCharacters() self.parser.phase = self.originalPhase return token class InCaptionPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-caption def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagTableElement) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("caption", self.endTagCaption), ("table", self.endTagTable), (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr"), self.endTagIgnore) ]) self.endTagHandler.default = self.endTagOther def ignoreEndTagCaption(self): return not self.tree.elementInScope("caption", variant="table") def processEOF(self): self.parser.phases["inBody"].processEOF() def processCharacters(self, token): return self.parser.phases["inBody"].processCharacters(token) def startTagTableElement(self, token): self.parser.parseError() # XXX Have to duplicate logic here to find out if the tag is ignored ignoreEndTag = self.ignoreEndTagCaption() self.parser.phase.processEndTag(impliedTagToken("caption")) if not ignoreEndTag: return token def startTagOther(self, token): return self.parser.phases["inBody"].processStartTag(token) def endTagCaption(self, token): if not self.ignoreEndTagCaption(): # AT this code is quite similar to endTagTable in "InTable" self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != "caption": self.parser.parseError("expected-one-end-tag-but-got-another", {"gotName": "caption", "expectedName": self.tree.openElements[-1].name}) while self.tree.openElements[-1].name != "caption": self.tree.openElements.pop() self.tree.openElements.pop() self.tree.clearActiveFormattingElements() self.parser.phase = self.parser.phases["inTable"] else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def endTagTable(self, token): self.parser.parseError() ignoreEndTag = self.ignoreEndTagCaption() self.parser.phase.processEndTag(impliedTagToken("caption")) if not ignoreEndTag: return token def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def endTagOther(self, token): return self.parser.phases["inBody"].processEndTag(token) class InColumnGroupPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-column def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("col", self.startTagCol) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("colgroup", self.endTagColgroup), ("col", self.endTagCol) ]) self.endTagHandler.default = self.endTagOther def ignoreEndTagColgroup(self): return self.tree.openElements[-1].name == "html" def processEOF(self): if self.tree.openElements[-1].name == "html": assert self.parser.innerHTML return else: ignoreEndTag = self.ignoreEndTagColgroup() self.endTagColgroup(impliedTagToken("colgroup")) if not ignoreEndTag: return True def processCharacters(self, token): ignoreEndTag = self.ignoreEndTagColgroup() self.endTagColgroup(impliedTagToken("colgroup")) if not ignoreEndTag: return token def startTagCol(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagOther(self, token): ignoreEndTag = self.ignoreEndTagColgroup() self.endTagColgroup(impliedTagToken("colgroup")) if not ignoreEndTag: return token def endTagColgroup(self, token): if self.ignoreEndTagColgroup(): # innerHTML case assert self.parser.innerHTML self.parser.parseError() else: self.tree.openElements.pop() self.parser.phase = self.parser.phases["inTable"] def endTagCol(self, token): self.parser.parseError("no-end-tag", {"name": "col"}) def endTagOther(self, token): ignoreEndTag = self.ignoreEndTagColgroup() self.endTagColgroup(impliedTagToken("colgroup")) if not ignoreEndTag: return token class InTableBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("tr", self.startTagTr), (("td", "th"), self.startTagTableCell), (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), self.startTagTableOther) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), ("table", self.endTagTable), (("body", "caption", "col", "colgroup", "html", "td", "th", "tr"), self.endTagIgnore) ]) self.endTagHandler.default = self.endTagOther # helper methods def clearStackToTableBodyContext(self): while self.tree.openElements[-1].name not in ("tbody", "tfoot", "thead", "html"): # self.parser.parseError("unexpected-implied-end-tag-in-table", # {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() if self.tree.openElements[-1].name == "html": assert self.parser.innerHTML # the rest def processEOF(self): self.parser.phases["inTable"].processEOF() def processSpaceCharacters(self, token): return self.parser.phases["inTable"].processSpaceCharacters(token) def processCharacters(self, token): return self.parser.phases["inTable"].processCharacters(token) def startTagTr(self, token): self.clearStackToTableBodyContext() self.tree.insertElement(token) self.parser.phase = self.parser.phases["inRow"] def startTagTableCell(self, token): self.parser.parseError("unexpected-cell-in-table-body", {"name": token["name"]}) self.startTagTr(impliedTagToken("tr", "StartTag")) return token def startTagTableOther(self, token): # XXX AT Any ideas on how to share this with endTagTable? if (self.tree.elementInScope("tbody", variant="table") or self.tree.elementInScope("thead", variant="table") or self.tree.elementInScope("tfoot", variant="table")): self.clearStackToTableBodyContext() self.endTagTableRowGroup( impliedTagToken(self.tree.openElements[-1].name)) return token else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def startTagOther(self, token): return self.parser.phases["inTable"].processStartTag(token) def endTagTableRowGroup(self, token): if self.tree.elementInScope(token["name"], variant="table"): self.clearStackToTableBodyContext() self.tree.openElements.pop() self.parser.phase = self.parser.phases["inTable"] else: self.parser.parseError("unexpected-end-tag-in-table-body", {"name": token["name"]}) def endTagTable(self, token): if (self.tree.elementInScope("tbody", variant="table") or self.tree.elementInScope("thead", variant="table") or self.tree.elementInScope("tfoot", variant="table")): self.clearStackToTableBodyContext() self.endTagTableRowGroup( impliedTagToken(self.tree.openElements[-1].name)) return token else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag-in-table-body", {"name": token["name"]}) def endTagOther(self, token): return self.parser.phases["inTable"].processEndTag(token) class InRowPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-row def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("td", "th"), self.startTagTableCell), (("caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"), self.startTagTableOther) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("tr", self.endTagTr), ("table", self.endTagTable), (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), (("body", "caption", "col", "colgroup", "html", "td", "th"), self.endTagIgnore) ]) self.endTagHandler.default = self.endTagOther # helper methods (XXX unify this with other table helper methods) def clearStackToTableRowContext(self): while self.tree.openElements[-1].name not in ("tr", "html"): self.parser.parseError("unexpected-implied-end-tag-in-table-row", {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() def ignoreEndTagTr(self): return not self.tree.elementInScope("tr", variant="table") # the rest def processEOF(self): self.parser.phases["inTable"].processEOF() def processSpaceCharacters(self, token): return self.parser.phases["inTable"].processSpaceCharacters(token) def processCharacters(self, token): return self.parser.phases["inTable"].processCharacters(token) def startTagTableCell(self, token): self.clearStackToTableRowContext() self.tree.insertElement(token) self.parser.phase = self.parser.phases["inCell"] self.tree.activeFormattingElements.append(Marker) def startTagTableOther(self, token): ignoreEndTag = self.ignoreEndTagTr() self.endTagTr(impliedTagToken("tr")) # XXX how are we sure it's always ignored in the innerHTML case? if not ignoreEndTag: return token def startTagOther(self, token): return self.parser.phases["inTable"].processStartTag(token) def endTagTr(self, token): if not self.ignoreEndTagTr(): self.clearStackToTableRowContext() self.tree.openElements.pop() self.parser.phase = self.parser.phases["inTableBody"] else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def endTagTable(self, token): ignoreEndTag = self.ignoreEndTagTr() self.endTagTr(impliedTagToken("tr")) # Reprocess the current tag if the tr end tag was not ignored # XXX how are we sure it's always ignored in the innerHTML case? if not ignoreEndTag: return token def endTagTableRowGroup(self, token): if self.tree.elementInScope(token["name"], variant="table"): self.endTagTr(impliedTagToken("tr")) return token else: self.parser.parseError() def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag-in-table-row", {"name": token["name"]}) def endTagOther(self, token): return self.parser.phases["inTable"].processEndTag(token) class InCellPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-cell def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"), self.startTagTableOther) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ (("td", "th"), self.endTagTableCell), (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) ]) self.endTagHandler.default = self.endTagOther # helper def closeCell(self): if self.tree.elementInScope("td", variant="table"): self.endTagTableCell(impliedTagToken("td")) elif self.tree.elementInScope("th", variant="table"): self.endTagTableCell(impliedTagToken("th")) # the rest def processEOF(self): self.parser.phases["inBody"].processEOF() def processCharacters(self, token): return self.parser.phases["inBody"].processCharacters(token) def startTagTableOther(self, token): if (self.tree.elementInScope("td", variant="table") or self.tree.elementInScope("th", variant="table")): self.closeCell() return token else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def startTagOther(self, token): return self.parser.phases["inBody"].processStartTag(token) def endTagTableCell(self, token): if self.tree.elementInScope(token["name"], variant="table"): self.tree.generateImpliedEndTags(token["name"]) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-cell-end-tag", {"name": token["name"]}) while True: node = self.tree.openElements.pop() if node.name == token["name"]: break else: self.tree.openElements.pop() self.tree.clearActiveFormattingElements() self.parser.phase = self.parser.phases["inRow"] else: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def endTagImply(self, token): if self.tree.elementInScope(token["name"], variant="table"): self.closeCell() return token else: # sometimes innerHTML case self.parser.parseError() def endTagOther(self, token): return self.parser.phases["inBody"].processEndTag(token) class InSelectPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("option", self.startTagOption), ("optgroup", self.startTagOptgroup), ("select", self.startTagSelect), (("input", "keygen", "textarea"), self.startTagInput), ("script", self.startTagScript) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("option", self.endTagOption), ("optgroup", self.endTagOptgroup), ("select", self.endTagSelect) ]) self.endTagHandler.default = self.endTagOther # http://www.whatwg.org/specs/web-apps/current-work/#in-select def processEOF(self): if self.tree.openElements[-1].name != "html": self.parser.parseError("eof-in-select") else: assert self.parser.innerHTML def processCharacters(self, token): if token["data"] == "\u0000": return self.tree.insertText(token["data"]) def startTagOption(self, token): # We need to imply if is the current node. if self.tree.openElements[-1].name == "option": self.tree.openElements.pop() self.tree.insertElement(token) def startTagOptgroup(self, token): if self.tree.openElements[-1].name == "option": self.tree.openElements.pop() if self.tree.openElements[-1].name == "optgroup": self.tree.openElements.pop() self.tree.insertElement(token) def startTagSelect(self, token): self.parser.parseError("unexpected-select-in-select") self.endTagSelect(impliedTagToken("select")) def startTagInput(self, token): self.parser.parseError("unexpected-input-in-select") if self.tree.elementInScope("select", variant="select"): self.endTagSelect(impliedTagToken("select")) return token else: assert self.parser.innerHTML def startTagScript(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-in-select", {"name": token["name"]}) def endTagOption(self, token): if self.tree.openElements[-1].name == "option": self.tree.openElements.pop() else: self.parser.parseError("unexpected-end-tag-in-select", {"name": "option"}) def endTagOptgroup(self, token): # implicitly closes if (self.tree.openElements[-1].name == "option" and self.tree.openElements[-2].name == "optgroup"): self.tree.openElements.pop() # It also closes if self.tree.openElements[-1].name == "optgroup": self.tree.openElements.pop() # But nothing else else: self.parser.parseError("unexpected-end-tag-in-select", {"name": "optgroup"}) def endTagSelect(self, token): if self.tree.elementInScope("select", variant="select"): node = self.tree.openElements.pop() while node.name != "select": node = self.tree.openElements.pop() self.parser.resetInsertionMode() else: # innerHTML case assert self.parser.innerHTML self.parser.parseError() def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-in-select", {"name": token["name"]}) class InSelectInTablePhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), self.startTagTable) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), self.endTagTable) ]) self.endTagHandler.default = self.endTagOther def processEOF(self): self.parser.phases["inSelect"].processEOF() def processCharacters(self, token): return self.parser.phases["inSelect"].processCharacters(token) def startTagTable(self, token): self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) self.endTagOther(impliedTagToken("select")) return token def startTagOther(self, token): return self.parser.phases["inSelect"].processStartTag(token) def endTagTable(self, token): self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) if self.tree.elementInScope(token["name"], variant="table"): self.endTagOther(impliedTagToken("select")) return token def endTagOther(self, token): return self.parser.phases["inSelect"].processEndTag(token) class InForeignContentPhase(Phase): breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul", "var"]) def __init__(self, parser, tree): Phase.__init__(self, parser, tree) def adjustSVGTagNames(self, token): replacements = {"altglyph": "altGlyph", "altglyphdef": "altGlyphDef", "altglyphitem": "altGlyphItem", "animatecolor": "animateColor", "animatemotion": "animateMotion", "animatetransform": "animateTransform", "clippath": "clipPath", "feblend": "feBlend", "fecolormatrix": "feColorMatrix", "fecomponenttransfer": "feComponentTransfer", "fecomposite": "feComposite", "feconvolvematrix": "feConvolveMatrix", "fediffuselighting": "feDiffuseLighting", "fedisplacementmap": "feDisplacementMap", "fedistantlight": "feDistantLight", "feflood": "feFlood", "fefunca": "feFuncA", "fefuncb": "feFuncB", "fefuncg": "feFuncG", "fefuncr": "feFuncR", "fegaussianblur": "feGaussianBlur", "feimage": "feImage", "femerge": "feMerge", "femergenode": "feMergeNode", "femorphology": "feMorphology", "feoffset": "feOffset", "fepointlight": "fePointLight", "fespecularlighting": "feSpecularLighting", "fespotlight": "feSpotLight", "fetile": "feTile", "feturbulence": "feTurbulence", "foreignobject": "foreignObject", "glyphref": "glyphRef", "lineargradient": "linearGradient", "radialgradient": "radialGradient", "textpath": "textPath"} if token["name"] in replacements: token["name"] = replacements[token["name"]] def processCharacters(self, token): if token["data"] == "\u0000": token["data"] = "\uFFFD" elif (self.parser.framesetOK and any(char not in spaceCharacters for char in token["data"])): self.parser.framesetOK = False Phase.processCharacters(self, token) def processStartTag(self, token): currentNode = self.tree.openElements[-1] if (token["name"] in self.breakoutElements or (token["name"] == "font" and set(token["data"].keys()) & set(["color", "face", "size"]))): self.parser.parseError("unexpected-html-element-in-foreign-content", {"name": token["name"]}) while (self.tree.openElements[-1].namespace != self.tree.defaultNamespace and not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): self.tree.openElements.pop() return token else: if currentNode.namespace == namespaces["mathml"]: self.parser.adjustMathMLAttributes(token) elif currentNode.namespace == namespaces["svg"]: self.adjustSVGTagNames(token) self.parser.adjustSVGAttributes(token) self.parser.adjustForeignAttributes(token) token["namespace"] = currentNode.namespace self.tree.insertElement(token) if token["selfClosing"]: self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def processEndTag(self, token): nodeIndex = len(self.tree.openElements) - 1 node = self.tree.openElements[-1] if node.name.translate(asciiUpper2Lower) != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while True: if node.name.translate(asciiUpper2Lower) == token["name"]: # XXX this isn't in the spec but it seems necessary if self.parser.phase == self.parser.phases["inTableText"]: self.parser.phase.flushCharacters() self.parser.phase = self.parser.phase.originalPhase while self.tree.openElements.pop() != node: assert self.tree.openElements new_token = None break nodeIndex -= 1 node = self.tree.openElements[nodeIndex] if node.namespace != self.tree.defaultNamespace: continue else: new_token = self.parser.phase.processEndTag(token) break return new_token class AfterBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) self.endTagHandler.default = self.endTagOther def processEOF(self): # Stop parsing pass def processComment(self, token): # This is needed because data is to be appended to the element # here and not to whatever is currently open. self.tree.insertComment(token, self.tree.openElements[0]) def processCharacters(self, token): self.parser.parseError("unexpected-char-after-body") self.parser.phase = self.parser.phases["inBody"] return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-after-body", {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token def endTagHtml(self, name): if self.parser.innerHTML: self.parser.parseError("unexpected-end-tag-after-body-innerhtml") else: self.parser.phase = self.parser.phases["afterAfterBody"] def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-after-body", {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token class InFramesetPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("frameset", self.startTagFrameset), ("frame", self.startTagFrame), ("noframes", self.startTagNoframes) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("frameset", self.endTagFrameset) ]) self.endTagHandler.default = self.endTagOther def processEOF(self): if self.tree.openElements[-1].name != "html": self.parser.parseError("eof-in-frameset") else: assert self.parser.innerHTML def processCharacters(self, token): self.parser.parseError("unexpected-char-in-frameset") def startTagFrameset(self, token): self.tree.insertElement(token) def startTagFrame(self, token): self.tree.insertElement(token) self.tree.openElements.pop() def startTagNoframes(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-in-frameset", {"name": token["name"]}) def endTagFrameset(self, token): if self.tree.openElements[-1].name == "html": # innerHTML case self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") else: self.tree.openElements.pop() if (not self.parser.innerHTML and self.tree.openElements[-1].name != "frameset"): # If we're not in innerHTML mode and the current node is not a # "frameset" element (anymore) then switch. self.parser.phase = self.parser.phases["afterFrameset"] def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-in-frameset", {"name": token["name"]}) class AfterFramesetPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#after3 def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("noframes", self.startTagNoframes) ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = _utils.MethodDispatcher([ ("html", self.endTagHtml) ]) self.endTagHandler.default = self.endTagOther def processEOF(self): # Stop parsing pass def processCharacters(self, token): self.parser.parseError("unexpected-char-after-frameset") def startTagNoframes(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-after-frameset", {"name": token["name"]}) def endTagHtml(self, token): self.parser.phase = self.parser.phases["afterAfterFrameset"] def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-after-frameset", {"name": token["name"]}) class AfterAfterBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml) ]) self.startTagHandler.default = self.startTagOther def processEOF(self): pass def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processSpaceCharacters(self, token): return self.parser.phases["inBody"].processSpaceCharacters(token) def processCharacters(self, token): self.parser.parseError("expected-eof-but-got-char") self.parser.phase = self.parser.phases["inBody"] return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagOther(self, token): self.parser.parseError("expected-eof-but-got-start-tag", {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token def processEndTag(self, token): self.parser.parseError("expected-eof-but-got-end-tag", {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token class AfterAfterFramesetPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = _utils.MethodDispatcher([ ("html", self.startTagHtml), ("noframes", self.startTagNoFrames) ]) self.startTagHandler.default = self.startTagOther def processEOF(self): pass def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processSpaceCharacters(self, token): return self.parser.phases["inBody"].processSpaceCharacters(token) def processCharacters(self, token): self.parser.parseError("expected-eof-but-got-char") def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagNoFrames(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagOther(self, token): self.parser.parseError("expected-eof-but-got-start-tag", {"name": token["name"]}) def processEndTag(self, token): self.parser.parseError("expected-eof-but-got-end-tag", {"name": token["name"]}) # pylint:enable=unused-argument return { "initial": InitialPhase, "beforeHtml": BeforeHtmlPhase, "beforeHead": BeforeHeadPhase, "inHead": InHeadPhase, "inHeadNoscript": InHeadNoscriptPhase, "afterHead": AfterHeadPhase, "inBody": InBodyPhase, "text": TextPhase, "inTable": InTablePhase, "inTableText": InTableTextPhase, "inCaption": InCaptionPhase, "inColumnGroup": InColumnGroupPhase, "inTableBody": InTableBodyPhase, "inRow": InRowPhase, "inCell": InCellPhase, "inSelect": InSelectPhase, "inSelectInTable": InSelectInTablePhase, "inForeignContent": InForeignContentPhase, "afterBody": AfterBodyPhase, "inFrameset": InFramesetPhase, "afterFrameset": AfterFramesetPhase, "afterAfterBody": AfterAfterBodyPhase, "afterAfterFrameset": AfterAfterFramesetPhase, # XXX after after frameset } def adjust_attributes(token, replacements): if PY3 or _utils.PY27: needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) else: needs_adjustment = frozenset(token['data']) & frozenset(replacements) if needs_adjustment: token['data'] = OrderedDict((replacements.get(k, k), v) for k, v in token['data'].items()) def impliedTagToken(name, type="EndTag", attributes=None, selfClosing=False): if attributes is None: attributes = {} return {"type": tokenTypes[type], "name": name, "data": attributes, "selfClosing": selfClosing} class ParseError(Exception): """Error in parsed document""" pass Simpan