from xml.dom import minidom from html2text import HTML2Text from slugify import slugify from datetime import date, datetime h = HTML2Text() path = "./content/poems/" author = 'theo' category = 'poèmes' def getTitle(poem): titleNode = poem.getElementsByTagName("title")[0].firstChild if titleNode is None : return "" return titleNode.data def getContent(poem): contentNode = poem.getElementsByTagName("content")[0].firstChild if contentNode is None : return "" return h.handle(contentNode.data) def getDate(poem): contentNode = poem.getElementsByTagName("published")[0].firstChild if contentNode is None : return None # format: 2017-09-03T05:59:00.003-07:00 datetime_object = datetime.strptime(contentNode.data.split('T')[0], '%Y-%m-%d') return datetime_object.strftime("%Y-%m-%d") def write2file(title, content, date_pub): slug = slugify(title) today_date = date.today().strftime("%Y-%m-%d") f = open( path + slug + '.md', 'w') print("Title: " + title, file=f) print("Authors:" + author, file=f) print("Date: "+ date_pub, file=f) print("Modified: "+ today_date, file=f) print("Category: " + category, file=f) print("Tags: ", file=f) print("Slug: "+ slug, file=f) print(content, file=f) doc = minidom.parse('blog-05-07-2020.xml') poems = doc.getElementsByTagName("entry") i = 0 for poem in poems: title = getTitle(poem) content = getContent(poem) date_pub = getDate(poem) if(title is not None and content is not None and i> 57) : write2file(title, content, date_pub) i = i + 1