Enhance Parser when dealing with leading and trailing whitespaces

This commit is contained in:
Dan 2019-07-28 15:11:18 +02:00
parent e1c6e6ecc1
commit 8cdcf90b10

View File

@ -86,7 +86,8 @@ class Parser(HTMLParser):
for entities in self.tag_entities.values():
for entity in entities:
entity.length += len(data)
entity.offset += len(data) - len(data.lstrip()) # Ignore left whitespaces for offsets
entity.length += len(data.strip()) # Ignore all whitespaces (left + right) for lengths
self.text += data