from cofunctions import *
import re, sys
pat = re.compile(r"(\S+)|(<[^>]*>)")
text = "<foo> This is a <b> foo file </b> you know. </foo>"
def run():
parser = costart(parse_items)
next(parser)
try:
for m in pat.finditer(text):
token = m.group(0)
print("Feeding:", repr(token))
parser.send(token)
parser.send(None)
except StopIteration as e:
tree = e.covalue
print(tree)
@cofunction
def parse_elem(cocall, opening_tag):
name = opening_tag[1:-1]
closing_tag = "</%s>" % name
items = yield cocall(parse_items, closing_tag)
coreturn((name, items))
@cofunction
def parse_items(cocall, closing_tag = None):
elems = []
while 1:
token = yield
if not token:
break
if is_opening_tag(token):
elems.append((yield cocall(parse_elem, token)))
elif token == closing_tag:
break
else:
elems.append(token)
coreturn(elems)
def is_opening_tag(token):
return token.startswith("<") and not token.startswith("</")
run()