Default page

Edit File: html5parser.py

from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import with_metaclass, viewkeys, PY3

import types

try:
    from collections import OrderedDict
except ImportError:
    from pip._vendor.ordereddict import OrderedDict

from . import _inputstream
from . import _tokenizer

from . import treebuilders
from .treebuilders.base import Marker

from . import _utils
from .constants import (
    spaceCharacters, asciiUpper2Lower,
    specialElements, headingElements, cdataElements, rcdataElements,
    tokenTypes, tagTokenTypes,
    namespaces,
    htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
    adjustForeignAttributes as adjustForeignAttributesMap,
    adjustMathMLAttributes, adjustSVGAttributes,
    E,
    ReparseException
)

def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
    """Parse a string or file-like object into a tree"""
    tb = treebuilders.getTreeBuilder(treebuilder)
    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
    return p.parse(doc, **kwargs)

def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
    tb = treebuilders.getTreeBuilder(treebuilder)
    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
    return p.parseFragment(doc, container=container, **kwargs)

def method_decorator_metaclass(function):
    class Decorated(type):
        def __new__(meta, classname, bases, classDict):
            for attributeName, attribute in classDict.items():
                if isinstance(attribute, types.FunctionType):
                    attribute = function(attribute)

classDict[attributeName] = attribute
            return type.__new__(meta, classname, bases, classDict)
    return Decorated

class HTMLParser(object):
    """HTML parser. Generates a tree structure from a stream of (possibly
        malformed) HTML"""

def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
        """
        strict - raise an exception when a parse error is encountered

tree - a treebuilder class controlling the type of tree that will be
        returned. Built in treebuilders can be accessed through
        html5lib.treebuilders.getTreeBuilder(treeType)
        """

# Raise an exception on the first error encountered
        self.strict = strict

if tree is None:
            tree = treebuilders.getTreeBuilder("etree")
        self.tree = tree(namespaceHTMLElements)
        self.errors = []

self.phases = dict([(name, cls(self, self.tree)) for name, cls in
                            getPhases(debug).items()])

def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):

self.innerHTMLMode = innerHTML
        self.container = container
        self.scripting = scripting
        self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
        self.reset()

try:
            self.mainLoop()
        except ReparseException:
            self.reset()
            self.mainLoop()

def reset(self):
        self.tree.reset()
        self.firstStartTag = False
        self.errors = []
        self.log = []  # only used with debug mode
        # "quirks" / "limited quirks" / "no quirks"
        self.compatMode = "no quirks"

if self.innerHTMLMode:
            self.innerHTML = self.container.lower()

if self.innerHTML in cdataElements:
                self.tokenizer.state = self.tokenizer.rcdataState
            elif self.innerHTML in rcdataElements:
                self.tokenizer.state = self.tokenizer.rawtextState
            elif self.innerHTML == 'plaintext':
                self.tokenizer.state = self.tokenizer.plaintextState
            else:
                # state already is data state
                # self.tokenizer.state = self.tokenizer.dataState
                pass
            self.phase = self.phases["beforeHtml"]
            self.phase.insertHtmlElement()
            self.resetInsertionMode()
        else:
            self.innerHTML = False  # pylint:disable=redefined-variable-type
            self.phase = self.phases["initial"]

self.lastPhase = None

self.beforeRCDataPhase = None

self.framesetOK = True

@property
    def documentEncoding(self):
        """The name of the character encoding
        that was used to decode the input stream,
        or :obj:`None` if that is not determined yet.

"""
        if not hasattr(self, 'tokenizer'):
            return None
        return self.tokenizer.stream.charEncoding[0].name

def isHTMLIntegrationPoint(self, element):
        if (element.name == "annotation-xml" and
                element.namespace == namespaces["mathml"]):
            return ("encoding" in element.attributes and
                    element.attributes["encoding"].translate(
                        asciiUpper2Lower) in
                    ("text/html", "application/xhtml+xml"))
        else:
            return (element.namespace, element.name) in htmlIntegrationPointElements

def isMathMLTextIntegrationPoint(self, element):
        return (element.namespace, element.name) in mathmlTextIntegrationPointElements

def mainLoop(self):
        CharactersToken = tokenTypes["Characters"]
        SpaceCharactersToken = tokenTypes["SpaceCharacters"]
        StartTagToken = tokenTypes["StartTag"]
        EndTagToken = tokenTypes["EndTag"]
        CommentToken = tokenTypes["Comment"]
        DoctypeToken = tokenTypes["Doctype"]
        ParseErrorToken = tokenTypes["ParseError"]

for token in self.normalizedTokens():
            prev_token = None
            new_token = token
            while new_token is not None:
                prev_token = new_token
                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
                currentNodeNamespace = currentNode.namespace if currentNode else None
                currentNodeName = currentNode.name if currentNode else None

type = new_token["type"]

if type == ParseErrorToken:
                    self.parseError(new_token["data"], new_token.get("datavars", {}))
                    new_token = None
                else:
                    if (len(self.tree.openElements) == 0 or
                        currentNodeNamespace == self.tree.defaultNamespace or
                        (self.isMathMLTextIntegrationPoint(currentNode) and
                         ((type == StartTagToken and
                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
                          type in (CharactersToken, SpaceCharactersToken))) or
                        (currentNodeNamespace == namespaces["mathml"] and
                         currentNodeName == "annotation-xml" and
                         type == StartTagToken and
                         token["name"] == "svg") or
                        (self.isHTMLIntegrationPoint(currentNode) and
                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
                        phase = self.phase
                    else:
                        phase = self.phases["inForeignContent"]

if type == CharactersToken:
                        new_token = phase.processCharacters(new_token)
                    elif type == SpaceCharactersToken:
                        new_token = phase.processSpaceCharacters(new_token)
                    elif type == StartTagToken:
                        new_token = phase.processStartTag(new_token)
                    elif type == EndTagToken:
                        new_token = phase.processEndTag(new_token)
                    elif type == CommentToken:
                        new_token = phase.processComment(new_token)
                    elif type == DoctypeToken:
                        new_token = phase.processDoctype(new_token)

if (type == StartTagToken and prev_token["selfClosing"] and
                    not prev_token["selfClosingAcknowledged"]):
                self.parseError("non-void-element-with-trailing-solidus",
                                {"name": prev_token["name"]})

# When the loop finishes it's EOF
        reprocess = True
        phases = []
        while reprocess:
            phases.append(self.phase)
            reprocess = self.phase.processEOF()
            if reprocess:
                assert self.phase not in phases

def normalizedTokens(self):
        for token in self.tokenizer:
            yield self.normalizeToken(token)

def parse(self, stream, *args, **kwargs):
        """Parse a HTML document into a well-formed tree

stream - a filelike object or string containing the HTML to be parsed

The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

scripting - treat noscript elements as if javascript was turned on
        """
        self._parse(stream, False, None, *args, **kwargs)
        return self.tree.getDocument()

def parseFragment(self, stream, *args, **kwargs):
        """Parse a HTML fragment into a well-formed tree fragment

container - name of the element we're setting the innerHTML property
        if set to None, default to 'div'

stream - a filelike object or string containing the HTML to be parsed

scripting - treat noscript elements as if javascript was turned on
        """
        self._parse(stream, True, *args, **kwargs)
        return self.tree.getFragment()

def parseError(self, errorcode="XXX-undefined-error", datavars=None):
        # XXX The idea is to make errorcode mandatory.
        if datavars is None:
            datavars = {}
        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
        if self.strict:
            raise ParseError(E[errorcode] % datavars)

def normalizeToken(self, token):
        """ HTML5 specific normalizations to the token stream """

if token["type"] == tokenTypes["StartTag"]:
            raw = token["data"]
            token["data"] = OrderedDict(raw)
            if len(raw) > len(token["data"]):
                # we had some duplicated attribute, fix so first wins
                token["data"].update(raw[::-1])

return token

def adjustMathMLAttributes(self, token):
        adjust_attributes(token, adjustMathMLAttributes)

def adjustSVGAttributes(self, token):
        adjust_attributes(token, adjustSVGAttributes)

def adjustForeignAttributes(self, token):
        adjust_attributes(token, adjustForeignAttributesMap)

def reparseTokenNormal(self, token):
        # pylint:disable=unused-argument
        self.parser.phase()

def resetInsertionMode(self):
        # The name of this method is mostly historical. (It's also used in the
        # specification.)
        last = False
        newModes = {
            "select": "inSelect",
            "td": "inCell",
            "th": "inCell",
            "tr": "inRow",
            "tbody": "inTableBody",
            "thead": "inTableBody",
            "tfoot": "inTableBody",
            "caption": "inCaption",
            "colgroup": "inColumnGroup",
            "table": "inTable",
            "head": "inBody",
            "body": "inBody",
            "frameset": "inFrameset",
            "html": "beforeHead"
        }
        for node in self.tree.openElements[::-1]:
            nodeName = node.name
            new_phase = None
            if node == self.tree.openElements[0]:
                assert self.innerHTML
                last = True
                nodeName = self.innerHTML
            # Check for conditions that should only happen in the innerHTML
            # case
            if nodeName in ("select", "colgroup", "head", "html"):
                assert self.innerHTML

if not last and node.namespace != self.tree.defaultNamespace:
                continue

if nodeName in newModes:
                new_phase = self.phases[newModes[nodeName]]
                break
            elif last:
                new_phase = self.phases["inBody"]
                break

self.phase = new_phase

def parseRCDataRawtext(self, token, contentType):
        """Generic RCDATA/RAWTEXT Parsing algorithm
        contentType - RCDATA or RAWTEXT
        """
        assert contentType in ("RAWTEXT", "RCDATA")

self.tree.insertElement(token)

if contentType == "RAWTEXT":
            self.tokenizer.state = self.tokenizer.rawtextState
        else:
            self.tokenizer.state = self.tokenizer.rcdataState

self.originalPhase = self.phase

self.phase = self.phases["text"]

@_utils.memoize
def getPhases(debug):
    def log(function):
        """Logger that records which phase processes each token"""
        type_names = dict((value, key) for key, value in
                          tokenTypes.items())

def wrapped(self, *args, **kwargs):
            if function.__name__.startswith("process") and len(args) > 0:
                token = args[0]
                try:
                    info = {"type": type_names[token['type']]}
                except:
                    raise
                if token['type'] in tagTokenTypes:
                    info["name"] = token['name']

self.parser.log.append((self.parser.tokenizer.state.__name__,
                                        self.parser.phase.__class__.__name__,
                                        self.__class__.__name__,
                                        function.__name__,
                                        info))
                return function(self, *args, **kwargs)
            else:
                return function(self, *args, **kwargs)
        return wrapped

def getMetaclass(use_metaclass, metaclass_func):
        if use_metaclass:
            return method_decorator_metaclass(metaclass_func)
        else:
            return type

# pylint:disable=unused-argument
    class Phase(with_metaclass(getMetaclass(debug, log))):
        """Base class for helper object that implements each phase of processing
        """

def __init__(self, parser, tree):
            self.parser = parser
            self.tree = tree

def processEOF(self):
            raise NotImplementedError

def processComment(self, token):
            # For most phases the following is correct. Where it's not it will be
            # overridden.
            self.tree.insertComment(token, self.tree.openElements[-1])

def processDoctype(self, token):
            self.parser.parseError("unexpected-doctype")

def processCharacters(self, token):
            self.tree.insertText(token["data"])

def processSpaceCharacters(self, token):
            self.tree.insertText(token["data"])

def processStartTag(self, token):
            return self.startTagHandler[token["name"]](token)

def startTagHtml(self, token):
            if not self.parser.firstStartTag and token["name"] == "html":
                self.parser.parseError("non-html-root")
            # XXX Need a check here to see if the first start tag token emitted is
            # this token... If it's not, invoke self.parser.parseError().
            for attr, value in token["data"].items():
                if attr not in self.tree.openElements[0].attributes:
                    self.tree.openElements[0].attributes[attr] = value
            self.parser.firstStartTag = False

def processEndTag(self, token):
            return self.endTagHandler[token["name"]](token)

class InitialPhase(Phase):
        def processSpaceCharacters(self, token):
            pass

def processComment(self, token):
            self.tree.insertComment(token, self.tree.document)

def processDoctype(self, token):
            name = token["name"]
            publicId = token["publicId"]
            systemId = token["systemId"]
            correct = token["correct"]

if (name != "html" or publicId is not None or
                    systemId is not None and systemId != "about:legacy-compat"):
                self.parser.parseError("unknown-doctype")

if publicId is None:
                publicId = ""

self.tree.insertDoctype(token)

if publicId != "":
                publicId = publicId.translate(asciiUpper2Lower)

if (not correct or token["name"] != "html" or
                    publicId.startswith(
                        ("+//silmaril//dtd html pro v0r11 19970101//",
                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
                         "-//as//dtd html 3.0 aswedit + extensions//",
                         "-//ietf//dtd html 2.0 level 1//",
                         "-//ietf//dtd html 2.0 level 2//",
                         "-//ietf//dtd html 2.0 strict level 1//",
                         "-//ietf//dtd html 2.0 strict level 2//",
                         "-//ietf//dtd html 2.0 strict//",
                         "-//ietf//dtd html 2.0//",
                         "-//ietf//dtd html 2.1e//",
                         "-//ietf//dtd html 3.0//",
                         "-//ietf//dtd html 3.2 final//",
                         "-//ietf//dtd html 3.2//",
                         "-//ietf//dtd html 3//",
                         "-//ietf//dtd html level 0//",
                         "-//ietf//dtd html level 1//",
                         "-//ietf//dtd html level 2//",
                         "-//ietf//dtd html level 3//",
                         "-//ietf//dtd html strict level 0//",
                         "-//ietf//dtd html strict level 1//",
                         "-//ietf//dtd html strict level 2//",
                         "-//ietf//dtd html strict level 3//",
                         "-//ietf//dtd html strict//",
                         "-//ietf//dtd html//",
                         "-//metrius//dtd metrius presentational//",
                         "-//microsoft//dtd internet explorer 2.0 html strict//",
                         "-//microsoft//dtd internet explorer 2.0 html//",
                         "-//microsoft//dtd internet explorer 2.0 tables//",
                         "-//microsoft//dtd internet explorer 3.0 html strict//",
                         "-//microsoft//dtd internet explorer 3.0 html//",
                         "-//microsoft//dtd internet explorer 3.0 tables//",
                         "-//netscape comm. corp.//dtd html//",
                         "-//netscape comm. corp.//dtd strict html//",
                         "-//o'reilly and associates//dtd html 2.0//",
                         "-//o'reilly and associates//dtd html extended 1.0//",
                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
                         "-//spyglass//dtd html 2.0 extended//",
                         "-//sq//dtd html 2.0 hotmetal + extensions//",
                         "-//sun microsystems corp.//dtd hotjava html//",
                         "-//sun microsystems corp.//dtd hotjava strict html//",
                         "-//w3c//dtd html 3 1995-03-24//",
                         "-//w3c//dtd html 3.2 draft//",
                         "-//w3c//dtd html 3.2 final//",
                         "-//w3c//dtd html 3.2//",
                         "-//w3c//dtd html 3.2s draft//",
                         "-//w3c//dtd html 4.0 frameset//",
                         "-//w3c//dtd html 4.0 transitional//",
                         "-//w3c//dtd html experimental 19960712//",
                         "-//w3c//dtd html experimental 970421//",
                         "-//w3c//dtd w3 html//",
                         "-//w3o//dtd w3 html 3.0//",
                         "-//webtechs//dtd mozilla html 2.0//",
                         "-//webtechs//dtd mozilla html//")) or
                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
                                 "-/w3c/dtd html 4.0 transitional/en",
                                 "html") or
                    publicId.startswith(
                        ("-//w3c//dtd html 4.01 frameset//",
                         "-//w3c//dtd html 4.01 transitional//")) and
                    systemId is None or
                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
                self.parser.compatMode = "quirks"
            elif (publicId.startswith(
                    ("-//w3c//dtd xhtml 1.0 frameset//",
                     "-//w3c//dtd xhtml 1.0 transitional//")) or
                  publicId.startswith(
                      ("-//w3c//dtd html 4.01 frameset//",
                       "-//w3c//dtd html 4.01 transitional//")) and
                  systemId is not None):
                self.parser.compatMode = "limited quirks"

self.parser.phase = self.parser.phases["beforeHtml"]

def anythingElse(self):
            self.parser.compatMode = "quirks"
            self.parser.phase = self.parser.phases["beforeHtml"]

def processCharacters(self, token):
            self.parser.parseError("expected-doctype-but-got-chars")
            self.anythingElse()
            return token

def processStartTag(self, token):
            self.parser.parseError("expected-doctype-but-got-start-tag",
                                   {"name": token["name"]})
            self.anythingElse()
            return token

def processEndTag(self, token):
            self.parser.parseError("expected-doctype-but-got-end-tag",
                                   {"name": token["name"]})
            self.anythingElse()
            return token

def processEOF(self):
            self.parser.parseError("expected-doctype-but-got-eof")
            self.anythingElse()
            return True

class BeforeHtmlPhase(Phase):
        # helper methods
        def insertHtmlElement(self):
            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
            self.parser.phase = self.parser.phases["beforeHead"]

# other
        def processEOF(self):
            self.insertHtmlElement()
            return True

def processComment(self, token):
            self.tree.insertComment(token, self.tree.document)

def processSpaceCharacters(self, token):
            pass

def processCharacters(self, token):
            self.insertHtmlElement()
            return token

def processStartTag(self, token):
            if token["name"] == "html":
                self.parser.firstStartTag = True
            self.insertHtmlElement()
            return token

def processEndTag(self, token):
            if token["name"] not in ("head", "body", "html", "br"):
                self.parser.parseError("unexpected-end-tag-before-html",
                                       {"name": token["name"]})
            else:
                self.insertHtmlElement()
                return token

class BeforeHeadPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("head", self.startTagHead)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                (("head", "body", "html", "br"), self.endTagImplyHead)
            ])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return True

def processSpaceCharacters(self, token):
            pass

def processCharacters(self, token):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return token

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagHead(self, token):
            self.tree.insertElement(token)
            self.tree.headPointer = self.tree.openElements[-1]
            self.parser.phase = self.parser.phases["inHead"]

def startTagOther(self, token):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return token

def endTagImplyHead(self, token):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return token

def endTagOther(self, token):
            self.parser.parseError("end-tag-after-implied-root",
                                   {"name": token["name"]})

class InHeadPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("title", self.startTagTitle),
                (("noframes", "style"), self.startTagNoFramesStyle),
                ("noscript", self.startTagNoscript),
                ("script", self.startTagScript),
                (("base", "basefont", "bgsound", "command", "link"),
                 self.startTagBaseLinkCommand),
                ("meta", self.startTagMeta),
                ("head", self.startTagHead)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("head", self.endTagHead),
                (("br", "html", "body"), self.endTagHtmlBodyBr)
            ])
            self.endTagHandler.default = self.endTagOther

# the real thing
        def processEOF(self):
            self.anythingElse()
            return True

def processCharacters(self, token):
            self.anythingElse()
            return token

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagHead(self, token):
            self.parser.parseError("two-heads-are-not-better-than-one")

def startTagBaseLinkCommand(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

def startTagMeta(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

attributes = token["data"]
            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
                if "charset" in attributes:
                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
                elif ("content" in attributes and
                      "http-equiv" in attributes and
                      attributes["http-equiv"].lower() == "content-type"):
                    # Encoding it as UTF-8 here is a hack, as really we should pass
                    # the abstract Unicode string, and just use the
                    # ContentAttrParser on that, but using UTF-8 allows all chars
                    # to be encoded and as a ASCII-superset works.
                    data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
                    parser = _inputstream.ContentAttrParser(data)
                    codec = parser.parse()
                    self.parser.tokenizer.stream.changeEncoding(codec)

def startTagTitle(self, token):
            self.parser.parseRCDataRawtext(token, "RCDATA")

def startTagNoFramesStyle(self, token):
            # Need to decide whether to implement the scripting-disabled case
            self.parser.parseRCDataRawtext(token, "RAWTEXT")

def startTagNoscript(self, token):
            if self.parser.scripting:
                self.parser.parseRCDataRawtext(token, "RAWTEXT")
            else:
                self.tree.insertElement(token)
                self.parser.phase = self.parser.phases["inHeadNoscript"]

def startTagScript(self, token):
            self.tree.insertElement(token)
            self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
            self.parser.originalPhase = self.parser.phase
            self.parser.phase = self.parser.phases["text"]

def startTagOther(self, token):
            self.anythingElse()
            return token

def endTagHead(self, token):
            node = self.parser.tree.openElements.pop()
            assert node.name == "head", "Expected head got %s" % node.name
            self.parser.phase = self.parser.phases["afterHead"]

def endTagHtmlBodyBr(self, token):
            self.anythingElse()
            return token

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def anythingElse(self):
            self.endTagHead(impliedTagToken("head"))

class InHeadNoscriptPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
                (("head", "noscript"), self.startTagHeadNoscript),
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("noscript", self.endTagNoscript),
                ("br", self.endTagBr),
            ])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            self.parser.parseError("eof-in-head-noscript")
            self.anythingElse()
            return True

def processComment(self, token):
            return self.parser.phases["inHead"].processComment(token)

def processCharacters(self, token):
            self.parser.parseError("char-in-head-noscript")
            self.anythingElse()
            return token

def processSpaceCharacters(self, token):
            return self.parser.phases["inHead"].processSpaceCharacters(token)

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagBaseLinkCommand(self, token):
            return self.parser.phases["inHead"].processStartTag(token)

def startTagHeadNoscript(self, token):
            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

def startTagOther(self, token):
            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
            self.anythingElse()
            return token

def endTagNoscript(self, token):
            node = self.parser.tree.openElements.pop()
            assert node.name == "noscript", "Expected noscript got %s" % node.name
            self.parser.phase = self.parser.phases["inHead"]

def endTagBr(self, token):
            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
            self.anythingElse()
            return token

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def anythingElse(self):
            # Caller must raise parse error first!
            self.endTagNoscript(impliedTagToken("noscript"))

class AfterHeadPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("body", self.startTagBody),
                ("frameset", self.startTagFrameset),
                (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
                  "style", "title"),
                 self.startTagFromHead),
                ("head", self.startTagHead)
            ])
            self.startTagHandler.default = self.startTagOther
            self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
                                                           self.endTagHtmlBodyBr)])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            self.anythingElse()
            return True

def processCharacters(self, token):
            self.anythingElse()
            return token

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagBody(self, token):
            self.parser.framesetOK = False
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inBody"]

def startTagFrameset(self, token):
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inFrameset"]

def startTagFromHead(self, token):
            self.parser.parseError("unexpected-start-tag-out-of-my-head",
                                   {"name": token["name"]})
            self.tree.openElements.append(self.tree.headPointer)
            self.parser.phases["inHead"].processStartTag(token)
            for node in self.tree.openElements[::-1]:
                if node.name == "head":
                    self.tree.openElements.remove(node)
                    break

def startTagHead(self, token):
            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

def startTagOther(self, token):
            self.anythingElse()
            return token

def endTagHtmlBodyBr(self, token):
            self.anythingElse()
            return token

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def anythingElse(self):
            self.tree.insertElement(impliedTagToken("body", "StartTag"))
            self.parser.phase = self.parser.phases["inBody"]
            self.parser.framesetOK = True

class InBodyPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
        # the really-really-really-very crazy mode
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

# Set this to the default handler
            self.processSpaceCharacters = self.processSpaceCharactersNonPre

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                (("base", "basefont", "bgsound", "command", "link", "meta",
                  "script", "style", "title"),
                 self.startTagProcessInHead),
                ("body", self.startTagBody),
                ("frameset", self.startTagFrameset),
                (("address", "article", "aside", "blockquote", "center", "details",
                  "dir", "div", "dl", "fieldset", "figcaption", "figure",
                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
                  "section", "summary", "ul"),
                 self.startTagCloseP),
                (headingElements, self.startTagHeading),
                (("pre", "listing"), self.startTagPreListing),
                ("form", self.startTagForm),
                (("li", "dd", "dt"), self.startTagListItem),
                ("plaintext", self.startTagPlaintext),
                ("a", self.startTagA),
                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
                  "strong", "tt", "u"), self.startTagFormatting),
                ("nobr", self.startTagNobr),
                ("button", self.startTagButton),
                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
                ("xmp", self.startTagXmp),
                ("table", self.startTagTable),
                (("area", "br", "embed", "img", "keygen", "wbr"),
                 self.startTagVoidFormatting),
                (("param", "source", "track"), self.startTagParamSource),
                ("input", self.startTagInput),
                ("hr", self.startTagHr),
                ("image", self.startTagImage),
                ("isindex", self.startTagIsIndex),
                ("textarea", self.startTagTextarea),
                ("iframe", self.startTagIFrame),
                ("noscript", self.startTagNoscript),
                (("noembed", "noframes"), self.startTagRawtext),
                ("select", self.startTagSelect),
                (("rp", "rt"), self.startTagRpRt),
                (("option", "optgroup"), self.startTagOpt),
                (("math"), self.startTagMath),
                (("svg"), self.startTagSvg),
                (("caption", "col", "colgroup", "frame", "head",
                  "tbody", "td", "tfoot", "th", "thead",
                  "tr"), self.startTagMisplaced)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("body", self.endTagBody),
                ("html", self.endTagHtml),
                (("address", "article", "aside", "blockquote", "button", "center",
                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
                  "section", "summary", "ul"), self.endTagBlock),
                ("form", self.endTagForm),
                ("p", self.endTagP),
                (("dd", "dt", "li"), self.endTagListItem),
                (headingElements, self.endTagHeading),
                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
                  "strike", "strong", "tt", "u"), self.endTagFormatting),
                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
                ("br", self.endTagBr),
            ])
            self.endTagHandler.default = self.endTagOther

def isMatchingFormattingElement(self, node1, node2):
            return (node1.name == node2.name and
                    node1.namespace == node2.namespace and
                    node1.attributes == node2.attributes)

# helper
        def addFormattingElement(self, token):
            self.tree.insertElement(token)
            element = self.tree.openElements[-1]

matchingElements = []
            for node in self.tree.activeFormattingElements[::-1]:
                if node is Marker:
                    break
                elif self.isMatchingFormattingElement(node, element):
                    matchingElements.append(node)

assert len(matchingElements) <= 3
            if len(matchingElements) == 3:
                self.tree.activeFormattingElements.remove(matchingElements[-1])
            self.tree.activeFormattingElements.append(element)

# the real deal
        def processEOF(self):
            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
                                          "tfoot", "th", "thead", "tr", "body",
                                          "html"))
            for node in self.tree.openElements[::-1]:
                if node.name not in allowed_elements:
                    self.parser.parseError("expected-closing-tag-but-got-eof")
                    break
            # Stop parsing

def processSpaceCharactersDropNewline(self, token):
            # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
            # want to drop leading newlines
            data = token["data"]
            self.processSpaceCharacters = self.processSpaceCharactersNonPre
            if (data.startswith("\n") and
                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
                    not self.tree.openElements[-1].hasContent()):
                data = data[1:]
            if data:
                self.tree.reconstructActiveFormattingElements()
                self.tree.insertText(data)

def processCharacters(self, token):
            if token["data"] == "\u0000":
                # The tokenizer should always emit null on its own
                return
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertText(token["data"])
            # This must be bad for performance
            if (self.parser.framesetOK and
                any([char not in spaceCharacters
                     for char in token["data"]])):
                self.parser.framesetOK = False

def processSpaceCharactersNonPre(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertText(token["data"])

def startTagProcessInHead(self, token):
            return self.parser.phases["inHead"].processStartTag(token)

def startTagBody(self, token):
            self.parser.parseError("unexpected-start-tag", {"name": "body"})
            if (len(self.tree.openElements) == 1 or
                    self.tree.openElements[1].name != "body"):
                assert self.parser.innerHTML
            else:
                self.parser.framesetOK = False
                for attr, value in token["data"].items():
                    if attr not in self.tree.openElements[1].attributes:
                        self.tree.openElements[1].attributes[attr] = value

def startTagFrameset(self, token):
            self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
            if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
                assert self.parser.innerHTML
            elif not self.parser.framesetOK:
                pass
            else:
                if self.tree.openElements[1].parent:
                    self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
                while self.tree.openElements[-1].name != "html":
                    self.tree.openElements.pop()
                self.tree.insertElement(token)
                self.parser.phase = self.parser.phases["inFrameset"]

def startTagCloseP(self, token):
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            self.tree.insertElement(token)

def startTagPreListing(self, token):
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            self.tree.insertElement(token)
            self.parser.framesetOK = False
            self.processSpaceCharacters = self.processSpaceCharactersDropNewline

def startTagForm(self, token):
            if self.tree.formPointer:
                self.parser.parseError("unexpected-start-tag", {"name": "form"})
            else:
                if self.tree.elementInScope("p", variant="button"):
                    self.endTagP(impliedTagToken("p"))
                self.tree.insertElement(token)
                self.tree.formPointer = self.tree.openElements[-1]

def startTagListItem(self, token):
            self.parser.framesetOK = False

stopNamesMap = {"li": ["li"],
                            "dt": ["dt", "dd"],
                            "dd": ["dt", "dd"]}
            stopNames = stopNamesMap[token["name"]]
            for node in reversed(self.tree.openElements):
                if node.name in stopNames:
                    self.parser.phase.processEndTag(
                        impliedTagToken(node.name, "EndTag"))
                    break
                if (node.nameTuple in specialElements and
                        node.name not in ("address", "div", "p")):
                    break

if self.tree.elementInScope("p", variant="button"):
                self.parser.phase.processEndTag(
                    impliedTagToken("p", "EndTag"))

self.tree.insertElement(token)

def startTagPlaintext(self, token):
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            self.tree.insertElement(token)
            self.parser.tokenizer.state = self.parser.tokenizer.plaintextState

def startTagHeading(self, token):
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            if self.tree.openElements[-1].name in headingElements:
                self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
                self.tree.openElements.pop()
            self.tree.insertElement(token)

def startTagA(self, token):
            afeAElement = self.tree.elementInActiveFormattingElements("a")
            if afeAElement:
                self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                       {"startName": "a", "endName": "a"})
                self.endTagFormatting(impliedTagToken("a"))
                if afeAElement in self.tree.openElements:
                    self.tree.openElements.remove(afeAElement)
                if afeAElement in self.tree.activeFormattingElements:
                    self.tree.activeFormattingElements.remove(afeAElement)
            self.tree.reconstructActiveFormattingElements()
            self.addFormattingElement(token)

def startTagFormatting(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.addFormattingElement(token)

def startTagNobr(self, token):
            self.tree.reconstructActiveFormattingElements()
            if self.tree.elementInScope("nobr"):
                self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                       {"startName": "nobr", "endName": "nobr"})
                self.processEndTag(impliedTagToken("nobr"))
                # XXX Need tests that trigger the following
                self.tree.reconstructActiveFormattingElements()
            self.addFormattingElement(token)

def startTagButton(self, token):
            if self.tree.elementInScope("button"):
                self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                       {"startName": "button", "endName": "button"})
                self.processEndTag(impliedTagToken("button"))
                return token
            else:
                self.tree.reconstructActiveFormattingElements()
                self.tree.insertElement(token)
                self.parser.framesetOK = False

def startTagAppletMarqueeObject(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(token)
            self.tree.activeFormattingElements.append(Marker)
            self.parser.framesetOK = False

def startTagXmp(self, token):
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            self.tree.reconstructActiveFormattingElements()
            self.parser.framesetOK = False
            self.parser.parseRCDataRawtext(token, "RAWTEXT")

def startTagTable(self, token):
            if self.parser.compatMode != "quirks":
                if self.tree.elementInScope("p", variant="button"):
                    self.processEndTag(impliedTagToken("p"))
            self.tree.insertElement(token)
            self.parser.framesetOK = False
            self.parser.phase = self.parser.phases["inTable"]

def startTagVoidFormatting(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True
            self.parser.framesetOK = False

def startTagInput(self, token):
            framesetOK = self.parser.framesetOK
            self.startTagVoidFormatting(token)
            if ("type" in token["data"] and
                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
                # input type=hidden doesn't change framesetOK
                self.parser.framesetOK = framesetOK

def startTagParamSource(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

def startTagHr(self, token):
            if self.tree.elementInScope("p", variant="button"):
                self.endTagP(impliedTagToken("p"))
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True
            self.parser.framesetOK = False

def startTagImage(self, token):
            # No really...
            self.parser.parseError("unexpected-start-tag-treated-as",
                                   {"originalName": "image", "newName": "img"})
            self.processStartTag(impliedTagToken("img", "StartTag",
                                                 attributes=token["data"],
                                                 selfClosing=token["selfClosing"]))

def startTagIsIndex(self, token):
            self.parser.parseError("deprecated-tag", {"name": "isindex"})
            if self.tree.formPointer:
                return
            form_attrs = {}
            if "action" in token["data"]:
                form_attrs["action"] = token["data"]["action"]
            self.processStartTag(impliedTagToken("form", "StartTag",
                                                 attributes=form_attrs))
            self.processStartTag(impliedTagToken("hr", "StartTag"))
            self.processStartTag(impliedTagToken("label", "StartTag"))
            # XXX Localization ...
            if "prompt" in token["data"]:
                prompt = token["data"]["prompt"]
            else:
                prompt = "This is a searchable index. Enter search keywords: "
            self.processCharacters(
                {"type": tokenTypes["Characters"], "data": prompt})
            attributes = token["data"].copy()
            if "action" in attributes:
                del attributes["action"]
            if "prompt" in attributes:
                del attributes["prompt"]
            attributes["name"] = "isindex"
            self.processStartTag(impliedTagToken("input", "StartTag",
                                                 attributes=attributes,
                                                 selfClosing=token["selfClosing"]))
            self.processEndTag(impliedTagToken("label"))
            self.processStartTag(impliedTagToken("hr", "StartTag"))
            self.processEndTag(impliedTagToken("form"))

def startTagTextarea(self, token):
            self.tree.insertElement(token)
            self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
            self.parser.framesetOK = False

def startTagIFrame(self, token):
            self.parser.framesetOK = False
            self.startTagRawtext(token)

def startTagNoscript(self, token):
            if self.parser.scripting:
                self.startTagRawtext(token)
            else:
                self.startTagOther(token)

def startTagRawtext(self, token):
            """iframe, noembed noframes, noscript(if scripting enabled)"""
            self.parser.parseRCDataRawtext(token, "RAWTEXT")

def startTagOpt(self, token):
            if self.tree.openElements[-1].name == "option":
                self.parser.phase.processEndTag(impliedTagToken("option"))
            self.tree.reconstructActiveFormattingElements()
            self.parser.tree.insertElement(token)

def startTagSelect(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(token)
            self.parser.framesetOK = False
            if self.parser.phase in (self.parser.phases["inTable"],
                                     self.parser.phases["inCaption"],
                                     self.parser.phases["inColumnGroup"],
                                     self.parser.phases["inTableBody"],
                                     self.parser.phases["inRow"],
                                     self.parser.phases["inCell"]):
                self.parser.phase = self.parser.phases["inSelectInTable"]
            else:
                self.parser.phase = self.parser.phases["inSelect"]

def startTagRpRt(self, token):
            if self.tree.elementInScope("ruby"):
                self.tree.generateImpliedEndTags()
                if self.tree.openElements[-1].name != "ruby":
                    self.parser.parseError()
            self.tree.insertElement(token)

def startTagMath(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.parser.adjustMathMLAttributes(token)
            self.parser.adjustForeignAttributes(token)
            token["namespace"] = namespaces["mathml"]
            self.tree.insertElement(token)
            # Need to get the parse error right for the case where the token
            # has a namespace not equal to the xmlns attribute
            if token["selfClosing"]:
                self.tree.openElements.pop()
                token["selfClosingAcknowledged"] = True

def startTagSvg(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.parser.adjustSVGAttributes(token)
            self.parser.adjustForeignAttributes(token)
            token["namespace"] = namespaces["svg"]
            self.tree.insertElement(token)
            # Need to get the parse error right for the case where the token
            # has a namespace not equal to the xmlns attribute
            if token["selfClosing"]:
                self.tree.openElements.pop()
                token["selfClosingAcknowledged"] = True

def startTagMisplaced(self, token):
            """ Elements that should be children of other elements that have a
            different insertion mode; here they are ignored
            "caption", "col", "colgroup", "frame", "frameset", "head",
            "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
            "tr", "noscript"
            """
            self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})

def startTagOther(self, token):
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(token)

def endTagP(self, token):
            if not self.tree.elementInScope("p", variant="button"):
                self.startTagCloseP(impliedTagToken("p", "StartTag"))
                self.parser.parseError("unexpected-end-tag", {"name": "p"})
                self.endTagP(impliedTagToken("p", "EndTag"))
            else:
                self.tree.generateImpliedEndTags("p")
                if self.tree.openElements[-1].name != "p":
                    self.parser.parseError("unexpected-end-tag", {"name": "p"})
                node = self.tree.openElements.pop()
                while node.name != "p":
                    node = self.tree.openElements.pop()

def endTagBody(self, token):
            if not self.tree.elementInScope("body"):
                self.parser.parseError()
                return
            elif self.tree.openElements[-1].name != "body":
                for node in self.tree.openElements[2:]:
                    if node.name not in frozenset(("dd", "dt", "li", "optgroup",
                                                   "option", "p", "rp", "rt",
                                                   "tbody", "td", "tfoot",
                                                   "th", "thead", "tr", "body",
                                                   "html")):
                        # Not sure this is the correct name for the parse error
                        self.parser.parseError(
                            "expected-one-end-tag-but-got-another",
                            {"gotName": "body", "expectedName": node.name})
                        break
            self.parser.phase = self.parser.phases["afterBody"]

def endTagHtml(self, token):
            # We repeat the test for the body end tag token being ignored here
            if self.tree.elementInScope("body"):
                self.endTagBody(impliedTagToken("body"))
                return token

def endTagBlock(self, token):
            # Put us back in the right whitespace handling mode
            if token["name"] == "pre":
                self.processSpaceCharacters = self.processSpaceCharactersNonPre
            inScope = self.tree.elementInScope(token["name"])
            if inScope:
                self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != token["name"]:
                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
            if inScope:
                node = self.tree.openElements.pop()
                while node.name != token["name"]:
                    node = self.tree.openElements.pop()

def endTagForm(self, token):
            node = self.tree.formPointer
            self.tree.formPointer = None
            if node is None or not self.tree.elementInScope(node):
                self.parser.parseError("unexpected-end-tag",
                                       {"name": "form"})
            else:
                self.tree.generateImpliedEndTags()
                if self.tree.openElements[-1] != node:
                    self.parser.parseError("end-tag-too-early-ignored",
                                           {"name": "form"})
                self.tree.openElements.remove(node)

def endTagListItem(self, token):
            if token["name"] == "li":
                variant = "list"
            else:
                variant = None
            if not self.tree.elementInScope(token["name"], variant=variant):
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
            else:
                self.tree.generateImpliedEndTags(exclude=token["name"])
                if self.tree.openElements[-1].name != token["name"]:
                    self.parser.parseError(
                        "end-tag-too-early",
                        {"name": token["name"]})
                node = self.tree.openElements.pop()
                while node.name != token["name"]:
                    node = self.tree.openElements.pop()

def endTagHeading(self, token):
            for item in headingElements:
                if self.tree.elementInScope(item):
                    self.tree.generateImpliedEndTags()
                    break
            if self.tree.openElements[-1].name != token["name"]:
                self.parser.parseError("end-tag-too-early", {"name": token["name"]})

for item in headingElements:
                if self.tree.elementInScope(item):
                    item = self.tree.openElements.pop()
                    while item.name not in headingElements:
                        item = self.tree.openElements.pop()
                    break

def endTagFormatting(self, token):
            """The much-feared adoption agency algorithm"""
            # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
            # XXX Better parseError messages appreciated.

# Step 1
            outerLoopCounter = 0

# Step 2
            while outerLoopCounter < 8:

# Step 3
                outerLoopCounter += 1

# Step 4:

# Let the formatting element be the last element in
                # the list of active formatting elements that:
                # - is between the end of the list and the last scope
                # marker in the list, if any, or the start of the list
                # otherwise, and
                # - has the same tag name as the token.
                formattingElement = self.tree.elementInActiveFormattingElements(
                    token["name"])
                if (not formattingElement or
                    (formattingElement in self.tree.openElements and
                     not self.tree.elementInScope(formattingElement.name))):
                    # If there is no such node, then abort these steps
                    # and instead act as described in the "any other
                    # end tag" entry below.
                    self.endTagOther(token)
                    return

# Otherwise, if there is such a node, but that node is
                # not in the stack of open elements, then this is a
                # parse error; remove the element from the list, and
                # abort these steps.
                elif formattingElement not in self.tree.openElements:
                    self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
                    self.tree.activeFormattingElements.remove(formattingElement)
                    return

# Otherwise, if there is such a node, and that node is
                # also in the stack of open elements, but the element
                # is not in scope, then this is a parse error; ignore
                # the token, and abort these steps.
                elif not self.tree.elementInScope(formattingElement.name):
                    self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
                    return

# Otherwise, there is a formatting element and that
                # element is in the stack and is in scope. If the
                # element is not the current node, this is a parse
                # error. In any case, proceed with the algorithm as
                # written in the following steps.
                else:
                    if formattingElement != self.tree.openElements[-1]:
                        self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})

# Step 5:

# Let the furthest block be the topmost node in the
                # stack of open elements that is lower in the stack
                # than the formatting element, and is an element in
                # the special category. There might not be one.
                afeIndex = self.tree.openElements.index(formattingElement)
                furthestBlock = None
                for element in self.tree.openElements[afeIndex:]:
                    if element.nameTuple in specialElements:
                        furthestBlock = element
                        break

# Step 6:

# If there is no furthest block, then the UA must
                # first pop all the nodes from the bottom of the stack
                # of open elements, from the current node up to and
                # including the formatting element, then remove the
                # formatting element from the list of active
                # formatting elements, and finally abort these steps.
                if furthestBlock is None:
                    element = self.tree.openElements.pop()
                    while element != formattingElement:
                        element = self.tree.openElements.pop()
                    self.tree.activeFormattingElements.remove(element)
                    return

# Step 7
                commonAncestor = self.tree.openElements[afeIndex - 1]

# Step 8:
                # The bookmark is supposed to help us identify where to reinsert
                # nodes in step 15. We have to ensure that we reinsert nodes after
                # the node before the active formatting element. Note the bookmark
                # can move in step 9.7
                bookmark = self.tree.activeFormattingElements.index(formattingElement)

# Step 9
                lastNode = node = furthestBlock
                innerLoopCounter = 0

index = self.tree.openElements.index(node)
                while innerLoopCounter < 3:
                    innerLoopCounter += 1
                    # Node is element before node in open elements
                    index -= 1
                    node = self.tree.openElements[index]
                    if node not in self.tree.activeFormattingElements:
                        self.tree.openElements.remove(node)
                        continue
                    # Step 9.6
                    if node == formattingElement:
                        break
                    # Step 9.7
                    if lastNode == furthestBlock:
                        bookmark = self.tree.activeFormattingElements.index(node) + 1
                    # Step 9.8
                    clone = node.cloneNode()
                    # Replace node with clone
                    self.tree.activeFormattingElements[
                        self.tree.activeFormattingElements.index(node)] = clone
                    self.tree.openElements[
                        self.tree.openElements.index(node)] = clone
                    node = clone
                    # Step 9.9
                    # Remove lastNode from its parents, if any
                    if lastNode.parent:
                        lastNode.parent.removeChild(lastNode)
                    node.appendChild(lastNode)
                    # Step 9.10
                    lastNode = node

# Step 10
                # Foster parent lastNode if commonAncestor is a
                # table, tbody, tfoot, thead, or tr we need to foster
                # parent the lastNode
                if lastNode.parent:
                    lastNode.parent.removeChild(lastNode)

if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
                    parent, insertBefore = self.tree.getTableMisnestedNodePosition()
                    parent.insertBefore(lastNode, insertBefore)
                else:
                    commonAncestor.appendChild(lastNode)

# Step 11
                clone = formattingElement.cloneNode()

# Step 12
                furthestBlock.reparentChildren(clone)

# Step 13
                furthestBlock.appendChild(clone)

# Step 14
                self.tree.activeFormattingElements.remove(formattingElement)
                self.tree.activeFormattingElements.insert(bookmark, clone)

# Step 15
                self.tree.openElements.remove(formattingElement)
                self.tree.openElements.insert(
                    self.tree.openElements.index(furthestBlock) + 1, clone)

def endTagAppletMarqueeObject(self, token):
            if self.tree.elementInScope(token["name"]):
                self.tree.generateImpliedEndTags()
            if self.tree.openElements[-1].name != token["name"]:
                self.parser.parseError("end-tag-too-early", {"name": token["name"]})

if self.tree.elementInScope(token["name"]):
                element = self.tree.openElements.pop()
                while element.name != token["name"]:
                    element = self.tree.openElements.pop()
                self.tree.clearActiveFormattingElements()

def endTagBr(self, token):
            self.parser.parseError("unexpected-end-tag-treated-as",
                                   {"originalName": "br", "newName": "br element"})
            self.tree.reconstructActiveFormattingElements()
            self.tree.insertElement(impliedTagToken("br", "StartTag"))
            self.tree.openElements.pop()

def endTagOther(self, token):
            for node in self.tree.openElements[::-1]:
                if node.name == token["name"]:
                    self.tree.generateImpliedEndTags(exclude=token["name"])
                    if self.tree.openElements[-1].name != token["name"]:
                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
                    while self.tree.openElements.pop() != node:
                        pass
                    break
                else:
                    if node.nameTuple in specialElements:
                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
                        break

class TextPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)
            self.startTagHandler = _utils.MethodDispatcher([])
            self.startTagHandler.default = self.startTagOther
            self.endTagHandler = _utils.MethodDispatcher([
                ("script", self.endTagScript)])
            self.endTagHandler.default = self.endTagOther

def processCharacters(self, token):
            self.tree.insertText(token["data"])

def processEOF(self):
            self.parser.parseError("expected-named-closing-tag-but-got-eof",
                                   {"name": self.tree.openElements[-1].name})
            self.tree.openElements.pop()
            self.parser.phase = self.parser.originalPhase
            return True

def startTagOther(self, token):
            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']

def endTagScript(self, token):
            node = self.tree.openElements.pop()
            assert node.name == "script"
            self.parser.phase = self.parser.originalPhase
            # The rest of this method is all stuff that only happens if
            # document.write works

def endTagOther(self, token):
            self.tree.openElements.pop()
            self.parser.phase = self.parser.originalPhase

class InTablePhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)
            self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("caption", self.startTagCaption),
                ("colgroup", self.startTagColgroup),
                ("col", self.startTagCol),
                (("tbody", "tfoot", "thead"), self.startTagRowGroup),
                (("td", "th", "tr"), self.startTagImplyTbody),
                ("table", self.startTagTable),
                (("style", "script"), self.startTagStyleScript),
                ("input", self.startTagInput),
                ("form", self.startTagForm)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("table", self.endTagTable),
                (("body", "caption", "col", "colgroup", "html", "tbody", "td",
                  "tfoot", "th", "thead", "tr"), self.endTagIgnore)
            ])
            self.endTagHandler.default = self.endTagOther

# helper methods
        def clearStackToTableContext(self):
            # "clear the stack back to a table context"
            while self.tree.openElements[-1].name not in ("table", "html"):
                # self.parser.parseError("unexpected-implied-end-tag-in-table",
                #  {"name":  self.tree.openElements[-1].name})
                self.tree.openElements.pop()
            # When the current node is <html> it's an innerHTML case

# processing methods
        def processEOF(self):
            if self.tree.openElements[-1].name != "html":
                self.parser.parseError("eof-in-table")
            else:
                assert self.parser.innerHTML
            # Stop parsing

def processSpaceCharacters(self, token):
            originalPhase = self.parser.phase
            self.parser.phase = self.parser.phases["inTableText"]
            self.parser.phase.originalPhase = originalPhase
            self.parser.phase.processSpaceCharacters(token)

def processCharacters(self, token):
            originalPhase = self.parser.phase
            self.parser.phase = self.parser.phases["inTableText"]
            self.parser.phase.originalPhase = originalPhase
            self.parser.phase.processCharacters(token)

def insertText(self, token):
            # If we get here there must be at least one non-whitespace character
            # Do the table magic!
            self.tree.insertFromTable = True
            self.parser.phases["inBody"].processCharacters(token)
            self.tree.insertFromTable = False

def startTagCaption(self, token):
            self.clearStackToTableContext()
            self.tree.activeFormattingElements.append(Marker)
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inCaption"]

def startTagColgroup(self, token):
            self.clearStackToTableContext()
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inColumnGroup"]

def startTagCol(self, token):
            self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
            return token

def startTagRowGroup(self, token):
            self.clearStackToTableContext()
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inTableBody"]

def startTagImplyTbody(self, token):
            self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
            return token

def startTagTable(self, token):
            self.parser.parseError("unexpected-start-tag-implies-end-tag",
                                   {"startName": "table", "endName": "table"})
            self.parser.phase.processEndTag(impliedTagToken("table"))
            if not self.parser.innerHTML:
                return token

def startTagStyleScript(self, token):
            return self.parser.phases["inHead"].processStartTag(token)

def startTagInput(self, token):
            if ("type" in token["data"] and
                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
                self.parser.parseError("unexpected-hidden-input-in-table")
                self.tree.insertElement(token)
                # XXX associate with form
                self.tree.openElements.pop()
            else:
                self.startTagOther(token)

def startTagForm(self, token):
            self.parser.parseError("unexpected-form-in-table")
            if self.tree.formPointer is None:
                self.tree.insertElement(token)
                self.tree.formPointer = self.tree.openElements[-1]
                self.tree.openElements.pop()

def startTagOther(self, token):
            self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
            # Do the table magic!
            self.tree.insertFromTable = True
            self.parser.phases["inBody"].processStartTag(token)
            self.tree.insertFromTable = False

def endTagTable(self, token):
            if self.tree.elementInScope("table", variant="table"):
                self.tree.generateImpliedEndTags()
                if self.tree.openElements[-1].name != "table":
                    self.parser.parseError("end-tag-too-early-named",
                                           {"gotName": "table",
                                            "expectedName": self.tree.openElements[-1].name})
                while self.tree.openElements[-1].name != "table":
                    self.tree.openElements.pop()
                self.tree.openElements.pop()
                self.parser.resetInsertionMode()
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def endTagIgnore(self, token):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
            # Do the table magic!
            self.tree.insertFromTable = True
            self.parser.phases["inBody"].processEndTag(token)
            self.tree.insertFromTable = False

class InTableTextPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)
            self.originalPhase = None
            self.characterTokens = []

def flushCharacters(self):
            data = "".join([item["data"] for item in self.characterTokens])
            if any([item not in spaceCharacters for item in data]):
                token = {"type": tokenTypes["Characters"], "data": data}
                self.parser.phases["inTable"].insertText(token)
            elif data:
                self.tree.insertText(data)
            self.characterTokens = []

def processComment(self, token):
            self.flushCharacters()
            self.parser.phase = self.originalPhase
            return token

def processEOF(self):
            self.flushCharacters()
            self.parser.phase = self.originalPhase
            return True

def processCharacters(self, token):
            if token["data"] == "\u0000":
                return
            self.characterTokens.append(token)

def processSpaceCharacters(self, token):
            # pretty sure we should never reach here
            self.characterTokens.append(token)
    #        assert False

def processStartTag(self, token):
            self.flushCharacters()
            self.parser.phase = self.originalPhase
            return token

def processEndTag(self, token):
            self.flushCharacters()
            self.parser.phase = self.originalPhase
            return token

class InCaptionPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
                  "thead", "tr"), self.startTagTableElement)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("caption", self.endTagCaption),
                ("table", self.endTagTable),
                (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
                  "thead", "tr"), self.endTagIgnore)
            ])
            self.endTagHandler.default = self.endTagOther

def ignoreEndTagCaption(self):
            return not self.tree.elementInScope("caption", variant="table")

def processEOF(self):
            self.parser.phases["inBody"].processEOF()

def processCharacters(self, token):
            return self.parser.phases["inBody"].processCharacters(token)

def startTagTableElement(self, token):
            self.parser.parseError()
            # XXX Have to duplicate logic here to find out if the tag is ignored
            ignoreEndTag = self.ignoreEndTagCaption()
            self.parser.phase.processEndTag(impliedTagToken("caption"))
            if not ignoreEndTag:
                return token

def startTagOther(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def endTagCaption(self, token):
            if not self.ignoreEndTagCaption():
                # AT this code is quite similar to endTagTable in "InTable"
                self.tree.generateImpliedEndTags()
                if self.tree.openElements[-1].name != "caption":
                    self.parser.parseError("expected-one-end-tag-but-got-another",
                                           {"gotName": "caption",
                                            "expectedName": self.tree.openElements[-1].name})
                while self.tree.openElements[-1].name != "caption":
                    self.tree.openElements.pop()
                self.tree.openElements.pop()
                self.tree.clearActiveFormattingElements()
                self.parser.phase = self.parser.phases["inTable"]
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def endTagTable(self, token):
            self.parser.parseError()
            ignoreEndTag = self.ignoreEndTagCaption()
            self.parser.phase.processEndTag(impliedTagToken("caption"))
            if not ignoreEndTag:
                return token

def endTagIgnore(self, token):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def endTagOther(self, token):
            return self.parser.phases["inBody"].processEndTag(token)

class InColumnGroupPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-column

def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("col", self.startTagCol)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("colgroup", self.endTagColgroup),
                ("col", self.endTagCol)
            ])
            self.endTagHandler.default = self.endTagOther

def ignoreEndTagColgroup(self):
            return self.tree.openElements[-1].name == "html"

def processEOF(self):
            if self.tree.openElements[-1].name == "html":
                assert self.parser.innerHTML
                return
            else:
                ignoreEndTag = self.ignoreEndTagColgroup()
                self.endTagColgroup(impliedTagToken("colgroup"))
                if not ignoreEndTag:
                    return True

def processCharacters(self, token):
            ignoreEndTag = self.ignoreEndTagColgroup()
            self.endTagColgroup(impliedTagToken("colgroup"))
            if not ignoreEndTag:
                return token

def startTagCol(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

def startTagOther(self, token):
            ignoreEndTag = self.ignoreEndTagColgroup()
            self.endTagColgroup(impliedTagToken("colgroup"))
            if not ignoreEndTag:
                return token

def endTagColgroup(self, token):
            if self.ignoreEndTagColgroup():
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()
            else:
                self.tree.openElements.pop()
                self.parser.phase = self.parser.phases["inTable"]

def endTagCol(self, token):
            self.parser.parseError("no-end-tag", {"name": "col"})

def endTagOther(self, token):
            ignoreEndTag = self.ignoreEndTagColgroup()
            self.endTagColgroup(impliedTagToken("colgroup"))
            if not ignoreEndTag:
                return token

class InTableBodyPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)
            self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("tr", self.startTagTr),
                (("td", "th"), self.startTagTableCell),
                (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
                 self.startTagTableOther)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
                ("table", self.endTagTable),
                (("body", "caption", "col", "colgroup", "html", "td", "th",
                  "tr"), self.endTagIgnore)
            ])
            self.endTagHandler.default = self.endTagOther

# helper methods
        def clearStackToTableBodyContext(self):
            while self.tree.openElements[-1].name not in ("tbody", "tfoot",
                                                          "thead", "html"):
                # self.parser.parseError("unexpected-implied-end-tag-in-table",
                #  {"name": self.tree.openElements[-1].name})
                self.tree.openElements.pop()
            if self.tree.openElements[-1].name == "html":
                assert self.parser.innerHTML

# the rest
        def processEOF(self):
            self.parser.phases["inTable"].processEOF()

def processSpaceCharacters(self, token):
            return self.parser.phases["inTable"].processSpaceCharacters(token)

def processCharacters(self, token):
            return self.parser.phases["inTable"].processCharacters(token)

def startTagTr(self, token):
            self.clearStackToTableBodyContext()
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inRow"]

def startTagTableCell(self, token):
            self.parser.parseError("unexpected-cell-in-table-body",
                                   {"name": token["name"]})
            self.startTagTr(impliedTagToken("tr", "StartTag"))
            return token

def startTagTableOther(self, token):
            # XXX AT Any ideas on how to share this with endTagTable?
            if (self.tree.elementInScope("tbody", variant="table") or
                self.tree.elementInScope("thead", variant="table") or
                    self.tree.elementInScope("tfoot", variant="table")):
                self.clearStackToTableBodyContext()
                self.endTagTableRowGroup(
                    impliedTagToken(self.tree.openElements[-1].name))
                return token
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def startTagOther(self, token):
            return self.parser.phases["inTable"].processStartTag(token)

def endTagTableRowGroup(self, token):
            if self.tree.elementInScope(token["name"], variant="table"):
                self.clearStackToTableBodyContext()
                self.tree.openElements.pop()
                self.parser.phase = self.parser.phases["inTable"]
            else:
                self.parser.parseError("unexpected-end-tag-in-table-body",
                                       {"name": token["name"]})

def endTagTable(self, token):
            if (self.tree.elementInScope("tbody", variant="table") or
                self.tree.elementInScope("thead", variant="table") or
                    self.tree.elementInScope("tfoot", variant="table")):
                self.clearStackToTableBodyContext()
                self.endTagTableRowGroup(
                    impliedTagToken(self.tree.openElements[-1].name))
                return token
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def endTagIgnore(self, token):
            self.parser.parseError("unexpected-end-tag-in-table-body",
                                   {"name": token["name"]})

def endTagOther(self, token):
            return self.parser.phases["inTable"].processEndTag(token)

class InRowPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)
            self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                (("td", "th"), self.startTagTableCell),
                (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
                  "tr"), self.startTagTableOther)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("tr", self.endTagTr),
                ("table", self.endTagTable),
                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
                (("body", "caption", "col", "colgroup", "html", "td", "th"),
                 self.endTagIgnore)
            ])
            self.endTagHandler.default = self.endTagOther

# helper methods (XXX unify this with other table helper methods)
        def clearStackToTableRowContext(self):
            while self.tree.openElements[-1].name not in ("tr", "html"):
                self.parser.parseError("unexpected-implied-end-tag-in-table-row",
                                       {"name": self.tree.openElements[-1].name})
                self.tree.openElements.pop()

def ignoreEndTagTr(self):
            return not self.tree.elementInScope("tr", variant="table")

# the rest
        def processEOF(self):
            self.parser.phases["inTable"].processEOF()

def processSpaceCharacters(self, token):
            return self.parser.phases["inTable"].processSpaceCharacters(token)

def processCharacters(self, token):
            return self.parser.phases["inTable"].processCharacters(token)

def startTagTableCell(self, token):
            self.clearStackToTableRowContext()
            self.tree.insertElement(token)
            self.parser.phase = self.parser.phases["inCell"]
            self.tree.activeFormattingElements.append(Marker)

def startTagTableOther(self, token):
            ignoreEndTag = self.ignoreEndTagTr()
            self.endTagTr(impliedTagToken("tr"))
            # XXX how are we sure it's always ignored in the innerHTML case?
            if not ignoreEndTag:
                return token

def startTagOther(self, token):
            return self.parser.phases["inTable"].processStartTag(token)

def endTagTr(self, token):
            if not self.ignoreEndTagTr():
                self.clearStackToTableRowContext()
                self.tree.openElements.pop()
                self.parser.phase = self.parser.phases["inTableBody"]
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def endTagTable(self, token):
            ignoreEndTag = self.ignoreEndTagTr()
            self.endTagTr(impliedTagToken("tr"))
            # Reprocess the current tag if the tr end tag was not ignored
            # XXX how are we sure it's always ignored in the innerHTML case?
            if not ignoreEndTag:
                return token

def endTagTableRowGroup(self, token):
            if self.tree.elementInScope(token["name"], variant="table"):
                self.endTagTr(impliedTagToken("tr"))
                return token
            else:
                self.parser.parseError()

def endTagIgnore(self, token):
            self.parser.parseError("unexpected-end-tag-in-table-row",
                                   {"name": token["name"]})

def endTagOther(self, token):
            return self.parser.phases["inTable"].processEndTag(token)

class InCellPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)
            self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
                  "thead", "tr"), self.startTagTableOther)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                (("td", "th"), self.endTagTableCell),
                (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
                (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
            ])
            self.endTagHandler.default = self.endTagOther

# helper
        def closeCell(self):
            if self.tree.elementInScope("td", variant="table"):
                self.endTagTableCell(impliedTagToken("td"))
            elif self.tree.elementInScope("th", variant="table"):
                self.endTagTableCell(impliedTagToken("th"))

# the rest
        def processEOF(self):
            self.parser.phases["inBody"].processEOF()

def processCharacters(self, token):
            return self.parser.phases["inBody"].processCharacters(token)

def startTagTableOther(self, token):
            if (self.tree.elementInScope("td", variant="table") or
                    self.tree.elementInScope("th", variant="table")):
                self.closeCell()
                return token
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def startTagOther(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def endTagTableCell(self, token):
            if self.tree.elementInScope(token["name"], variant="table"):
                self.tree.generateImpliedEndTags(token["name"])
                if self.tree.openElements[-1].name != token["name"]:
                    self.parser.parseError("unexpected-cell-end-tag",
                                           {"name": token["name"]})
                    while True:
                        node = self.tree.openElements.pop()
                        if node.name == token["name"]:
                            break
                else:
                    self.tree.openElements.pop()
                self.tree.clearActiveFormattingElements()
                self.parser.phase = self.parser.phases["inRow"]
            else:
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def endTagIgnore(self, token):
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

def endTagImply(self, token):
            if self.tree.elementInScope(token["name"], variant="table"):
                self.closeCell()
                return token
            else:
                # sometimes innerHTML case
                self.parser.parseError()

def endTagOther(self, token):
            return self.parser.phases["inBody"].processEndTag(token)

class InSelectPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("option", self.startTagOption),
                ("optgroup", self.startTagOptgroup),
                ("select", self.startTagSelect),
                (("input", "keygen", "textarea"), self.startTagInput),
                ("script", self.startTagScript)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("option", self.endTagOption),
                ("optgroup", self.endTagOptgroup),
                ("select", self.endTagSelect)
            ])
            self.endTagHandler.default = self.endTagOther

# http://www.whatwg.org/specs/web-apps/current-work/#in-select
        def processEOF(self):
            if self.tree.openElements[-1].name != "html":
                self.parser.parseError("eof-in-select")
            else:
                assert self.parser.innerHTML

def processCharacters(self, token):
            if token["data"] == "\u0000":
                return
            self.tree.insertText(token["data"])

def startTagOption(self, token):
            # We need to imply </option> if <option> is the current node.
            if self.tree.openElements[-1].name == "option":
                self.tree.openElements.pop()
            self.tree.insertElement(token)

def startTagOptgroup(self, token):
            if self.tree.openElements[-1].name == "option":
                self.tree.openElements.pop()
            if self.tree.openElements[-1].name == "optgroup":
                self.tree.openElements.pop()
            self.tree.insertElement(token)

def startTagSelect(self, token):
            self.parser.parseError("unexpected-select-in-select")
            self.endTagSelect(impliedTagToken("select"))

def startTagInput(self, token):
            self.parser.parseError("unexpected-input-in-select")
            if self.tree.elementInScope("select", variant="select"):
                self.endTagSelect(impliedTagToken("select"))
                return token
            else:
                assert self.parser.innerHTML

def startTagScript(self, token):
            return self.parser.phases["inHead"].processStartTag(token)

def startTagOther(self, token):
            self.parser.parseError("unexpected-start-tag-in-select",
                                   {"name": token["name"]})

def endTagOption(self, token):
            if self.tree.openElements[-1].name == "option":
                self.tree.openElements.pop()
            else:
                self.parser.parseError("unexpected-end-tag-in-select",
                                       {"name": "option"})

def endTagOptgroup(self, token):
            # </optgroup> implicitly closes <option>
            if (self.tree.openElements[-1].name == "option" and
                    self.tree.openElements[-2].name == "optgroup"):
                self.tree.openElements.pop()
            # It also closes </optgroup>
            if self.tree.openElements[-1].name == "optgroup":
                self.tree.openElements.pop()
            # But nothing else
            else:
                self.parser.parseError("unexpected-end-tag-in-select",
                                       {"name": "optgroup"})

def endTagSelect(self, token):
            if self.tree.elementInScope("select", variant="select"):
                node = self.tree.openElements.pop()
                while node.name != "select":
                    node = self.tree.openElements.pop()
                self.parser.resetInsertionMode()
            else:
                # innerHTML case
                assert self.parser.innerHTML
                self.parser.parseError()

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag-in-select",
                                   {"name": token["name"]})

class InSelectInTablePhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
                 self.startTagTable)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
                 self.endTagTable)
            ])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            self.parser.phases["inSelect"].processEOF()

def processCharacters(self, token):
            return self.parser.phases["inSelect"].processCharacters(token)

def startTagTable(self, token):
            self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
            self.endTagOther(impliedTagToken("select"))
            return token

def startTagOther(self, token):
            return self.parser.phases["inSelect"].processStartTag(token)

def endTagTable(self, token):
            self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
            if self.tree.elementInScope(token["name"], variant="table"):
                self.endTagOther(impliedTagToken("select"))
                return token

def endTagOther(self, token):
            return self.parser.phases["inSelect"].processEndTag(token)

class InForeignContentPhase(Phase):
        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
                                      "center", "code", "dd", "div", "dl", "dt",
                                      "em", "embed", "h1", "h2", "h3",
                                      "h4", "h5", "h6", "head", "hr", "i", "img",
                                      "li", "listing", "menu", "meta", "nobr",
                                      "ol", "p", "pre", "ruby", "s", "small",
                                      "span", "strong", "strike", "sub", "sup",
                                      "table", "tt", "u", "ul", "var"])

def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

def adjustSVGTagNames(self, token):
            replacements = {"altglyph": "altGlyph",
                            "altglyphdef": "altGlyphDef",
                            "altglyphitem": "altGlyphItem",
                            "animatecolor": "animateColor",
                            "animatemotion": "animateMotion",
                            "animatetransform": "animateTransform",
                            "clippath": "clipPath",
                            "feblend": "feBlend",
                            "fecolormatrix": "feColorMatrix",
                            "fecomponenttransfer": "feComponentTransfer",
                            "fecomposite": "feComposite",
                            "feconvolvematrix": "feConvolveMatrix",
                            "fediffuselighting": "feDiffuseLighting",
                            "fedisplacementmap": "feDisplacementMap",
                            "fedistantlight": "feDistantLight",
                            "feflood": "feFlood",
                            "fefunca": "feFuncA",
                            "fefuncb": "feFuncB",
                            "fefuncg": "feFuncG",
                            "fefuncr": "feFuncR",
                            "fegaussianblur": "feGaussianBlur",
                            "feimage": "feImage",
                            "femerge": "feMerge",
                            "femergenode": "feMergeNode",
                            "femorphology": "feMorphology",
                            "feoffset": "feOffset",
                            "fepointlight": "fePointLight",
                            "fespecularlighting": "feSpecularLighting",
                            "fespotlight": "feSpotLight",
                            "fetile": "feTile",
                            "feturbulence": "feTurbulence",
                            "foreignobject": "foreignObject",
                            "glyphref": "glyphRef",
                            "lineargradient": "linearGradient",
                            "radialgradient": "radialGradient",
                            "textpath": "textPath"}

if token["name"] in replacements:
                token["name"] = replacements[token["name"]]

def processCharacters(self, token):
            if token["data"] == "\u0000":
                token["data"] = "\uFFFD"
            elif (self.parser.framesetOK and
                  any(char not in spaceCharacters for char in token["data"])):
                self.parser.framesetOK = False
            Phase.processCharacters(self, token)

def processStartTag(self, token):
            currentNode = self.tree.openElements[-1]
            if (token["name"] in self.breakoutElements or
                (token["name"] == "font" and
                 set(token["data"].keys()) & set(["color", "face", "size"]))):
                self.parser.parseError("unexpected-html-element-in-foreign-content",
                                       {"name": token["name"]})
                while (self.tree.openElements[-1].namespace !=
                       self.tree.defaultNamespace and
                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
                    self.tree.openElements.pop()
                return token

else:
                if currentNode.namespace == namespaces["mathml"]:
                    self.parser.adjustMathMLAttributes(token)
                elif currentNode.namespace == namespaces["svg"]:
                    self.adjustSVGTagNames(token)
                    self.parser.adjustSVGAttributes(token)
                self.parser.adjustForeignAttributes(token)
                token["namespace"] = currentNode.namespace
                self.tree.insertElement(token)
                if token["selfClosing"]:
                    self.tree.openElements.pop()
                    token["selfClosingAcknowledged"] = True

def processEndTag(self, token):
            nodeIndex = len(self.tree.openElements) - 1
            node = self.tree.openElements[-1]
            if node.name.translate(asciiUpper2Lower) != token["name"]:
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

while True:
                if node.name.translate(asciiUpper2Lower) == token["name"]:
                    # XXX this isn't in the spec but it seems necessary
                    if self.parser.phase == self.parser.phases["inTableText"]:
                        self.parser.phase.flushCharacters()
                        self.parser.phase = self.parser.phase.originalPhase
                    while self.tree.openElements.pop() != node:
                        assert self.tree.openElements
                    new_token = None
                    break
                nodeIndex -= 1

node = self.tree.openElements[nodeIndex]
                if node.namespace != self.tree.defaultNamespace:
                    continue
                else:
                    new_token = self.parser.phase.processEndTag(token)
                    break
            return new_token

class AfterBodyPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            # Stop parsing
            pass

def processComment(self, token):
            # This is needed because data is to be appended to the <html> element
            # here and not to whatever is currently open.
            self.tree.insertComment(token, self.tree.openElements[0])

def processCharacters(self, token):
            self.parser.parseError("unexpected-char-after-body")
            self.parser.phase = self.parser.phases["inBody"]
            return token

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagOther(self, token):
            self.parser.parseError("unexpected-start-tag-after-body",
                                   {"name": token["name"]})
            self.parser.phase = self.parser.phases["inBody"]
            return token

def endTagHtml(self, name):
            if self.parser.innerHTML:
                self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
            else:
                self.parser.phase = self.parser.phases["afterAfterBody"]

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag-after-body",
                                   {"name": token["name"]})
            self.parser.phase = self.parser.phases["inBody"]
            return token

class InFramesetPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("frameset", self.startTagFrameset),
                ("frame", self.startTagFrame),
                ("noframes", self.startTagNoframes)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("frameset", self.endTagFrameset)
            ])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            if self.tree.openElements[-1].name != "html":
                self.parser.parseError("eof-in-frameset")
            else:
                assert self.parser.innerHTML

def processCharacters(self, token):
            self.parser.parseError("unexpected-char-in-frameset")

def startTagFrameset(self, token):
            self.tree.insertElement(token)

def startTagFrame(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()

def startTagNoframes(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagOther(self, token):
            self.parser.parseError("unexpected-start-tag-in-frameset",
                                   {"name": token["name"]})

def endTagFrameset(self, token):
            if self.tree.openElements[-1].name == "html":
                # innerHTML case
                self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
            else:
                self.tree.openElements.pop()
            if (not self.parser.innerHTML and
                    self.tree.openElements[-1].name != "frameset"):
                # If we're not in innerHTML mode and the current node is not a
                # "frameset" element (anymore) then switch.
                self.parser.phase = self.parser.phases["afterFrameset"]

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag-in-frameset",
                                   {"name": token["name"]})

class AfterFramesetPhase(Phase):
        # http://www.whatwg.org/specs/web-apps/current-work/#after3
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("noframes", self.startTagNoframes)
            ])
            self.startTagHandler.default = self.startTagOther

self.endTagHandler = _utils.MethodDispatcher([
                ("html", self.endTagHtml)
            ])
            self.endTagHandler.default = self.endTagOther

def processEOF(self):
            # Stop parsing
            pass

def processCharacters(self, token):
            self.parser.parseError("unexpected-char-after-frameset")

def startTagNoframes(self, token):
            return self.parser.phases["inHead"].processStartTag(token)

def startTagOther(self, token):
            self.parser.parseError("unexpected-start-tag-after-frameset",
                                   {"name": token["name"]})

def endTagHtml(self, token):
            self.parser.phase = self.parser.phases["afterAfterFrameset"]

def endTagOther(self, token):
            self.parser.parseError("unexpected-end-tag-after-frameset",
                                   {"name": token["name"]})

class AfterAfterBodyPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml)
            ])
            self.startTagHandler.default = self.startTagOther

def processEOF(self):
            pass

def processComment(self, token):
            self.tree.insertComment(token, self.tree.document)

def processSpaceCharacters(self, token):
            return self.parser.phases["inBody"].processSpaceCharacters(token)

def processCharacters(self, token):
            self.parser.parseError("expected-eof-but-got-char")
            self.parser.phase = self.parser.phases["inBody"]
            return token

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagOther(self, token):
            self.parser.parseError("expected-eof-but-got-start-tag",
                                   {"name": token["name"]})
            self.parser.phase = self.parser.phases["inBody"]
            return token

def processEndTag(self, token):
            self.parser.parseError("expected-eof-but-got-end-tag",
                                   {"name": token["name"]})
            self.parser.phase = self.parser.phases["inBody"]
            return token

class AfterAfterFramesetPhase(Phase):
        def __init__(self, parser, tree):
            Phase.__init__(self, parser, tree)

self.startTagHandler = _utils.MethodDispatcher([
                ("html", self.startTagHtml),
                ("noframes", self.startTagNoFrames)
            ])
            self.startTagHandler.default = self.startTagOther

def processEOF(self):
            pass

def processComment(self, token):
            self.tree.insertComment(token, self.tree.document)

def processSpaceCharacters(self, token):
            return self.parser.phases["inBody"].processSpaceCharacters(token)

def processCharacters(self, token):
            self.parser.parseError("expected-eof-but-got-char")

def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

def startTagNoFrames(self, token):
            return self.parser.phases["inHead"].processStartTag(token)

def startTagOther(self, token):
            self.parser.parseError("expected-eof-but-got-start-tag",
                                   {"name": token["name"]})

def processEndTag(self, token):
            self.parser.parseError("expected-eof-but-got-end-tag",
                                   {"name": token["name"]})
    # pylint:enable=unused-argument

return {
        "initial": InitialPhase,
        "beforeHtml": BeforeHtmlPhase,
        "beforeHead": BeforeHeadPhase,
        "inHead": InHeadPhase,
        "inHeadNoscript": InHeadNoscriptPhase,
        "afterHead": AfterHeadPhase,
        "inBody": InBodyPhase,
        "text": TextPhase,
        "inTable": InTablePhase,
        "inTableText": InTableTextPhase,
        "inCaption": InCaptionPhase,
        "inColumnGroup": InColumnGroupPhase,
        "inTableBody": InTableBodyPhase,
        "inRow": InRowPhase,
        "inCell": InCellPhase,
        "inSelect": InSelectPhase,
        "inSelectInTable": InSelectInTablePhase,
        "inForeignContent": InForeignContentPhase,
        "afterBody": AfterBodyPhase,
        "inFrameset": InFramesetPhase,
        "afterFrameset": AfterFramesetPhase,
        "afterAfterBody": AfterAfterBodyPhase,
        "afterAfterFrameset": AfterAfterFramesetPhase,
        # XXX after after frameset
    }

def adjust_attributes(token, replacements):
    if PY3 or _utils.PY27:
        needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
    else:
        needs_adjustment = frozenset(token['data']) & frozenset(replacements)
    if needs_adjustment:
        token['data'] = OrderedDict((replacements.get(k, k), v)
                                    for k, v in token['data'].items())

def impliedTagToken(name, type="EndTag", attributes=None,
                    selfClosing=False):
    if attributes is None:
        attributes = {}
    return {"type": tokenTypes[type], "name": name, "data": attributes,
            "selfClosing": selfClosing}

class ParseError(Exception):
    """Error in parsed document"""
    pass