lxml iterparse fills memory despite on clear



I'm trying to parse xml. First iterparse works correctly, but second starts to fill memory. If remove the first iterparse, then nothing changes. Xml is valid.



def clear_element(e):
e.clear()
while e.getprevious() is not None:
del e.getparent()[0]

def import_xml(request):
f = 'file.xml'
offers = etree.iterparse(f, events=('end',), tag='offer')
for event, offer in offers:
# processing
# works correctly
clear_element(offer)

categories = etree.iterparse(f, events=('end',), tag='category')
for event, category in categories:
# using memory
clear_element(category)


XML:



<shop>
<categories>
<category>name</category>
<category>name</category>
<category>name</category>
~ 1000 categories
</categories>
<offers>
<offer>
<inner_tag>data</inner_tag>
<inner_tag>data</inner_tag>
</offer>
<offer>
<inner_tag>data</inner_tag>
<inner_tag>data</inner_tag>
</offer>
~ 450000 offers
</offers>
</shop>

No comments:

Post a Comment