Skip to content

AttributeError: 'unicode' object has no attribute 'tag' #115

@aleray

Description

@aleray

Hi,

I'm facing an issue with this code: "AttributeError: 'unicode' object has no attribute 'tag'"

import html5lib


parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("lxml"), namespaceHTMLElements=False)
serializer = html5lib.serializer.HTMLSerializer(omit_optional_tags=False)
walker = html5lib.treewalkers.getTreeWalker("lxml")


# works
src = u"experiences"
tree = parser.parseFragment(src, container="div")
stream = walker(tree)
output = serializer.serialize(stream)
print("\n".join(output))

# Doesn't work
src = u"exp\xe9riences"
tree = parser.parseFragment(src, container="div")
stream = walker(tree)
output = serializer.serialize(stream)
print("\n".join(output))

I think the error lies in the isstring method of FragmentWrapper class in treewalker/lxmletree.py

Changing:

def ensure_str(s):
    if s is None:
        return None
    elif isinstance(s, text_type):
        return s
    else:
        return s.decode("utf-8", "strict")


class FragmentWrapper(object):
    def __init__(self, fragment_root, obj):
        ...
        self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
        # Support for bytes here is Py2
        if self.isstring:
            self.obj = ensure_str(self.obj)

to

def ensure_str(s):
    if s is None:
        return None
    elif isinstance(s, text_type):
        return s
    else:
        return s.decode("utf-8", "strict")


class FragmentWrapper(object):
    def __init__(self, fragment_root, obj):
        ...
        self.isstring = isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, text_type)
        # Support for bytes here is Py2
        if self.isstring:
            self.obj = ensure_str(self.obj)

seems to do the job... What do you think?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions