58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
from xml.dom import minidom
|
|
from html2text import HTML2Text
|
|
from slugify import slugify
|
|
from datetime import date, datetime
|
|
|
|
h = HTML2Text()
|
|
path = "./content/poems/"
|
|
author = 'theo'
|
|
category = 'poèmes'
|
|
|
|
def getTitle(poem):
|
|
titleNode = poem.getElementsByTagName("title")[0].firstChild
|
|
if titleNode is None :
|
|
return ""
|
|
return titleNode.data
|
|
|
|
def getContent(poem):
|
|
contentNode = poem.getElementsByTagName("content")[0].firstChild
|
|
if contentNode is None :
|
|
return ""
|
|
return h.handle(contentNode.data)
|
|
|
|
def getDate(poem):
|
|
contentNode = poem.getElementsByTagName("published")[0].firstChild
|
|
if contentNode is None :
|
|
return None
|
|
|
|
# format: 2017-09-03T05:59:00.003-07:00
|
|
datetime_object = datetime.strptime(contentNode.data.split('T')[0], '%Y-%m-%d')
|
|
return datetime_object.strftime("%Y-%m-%d")
|
|
|
|
|
|
def write2file(title, content, date_pub):
|
|
slug = slugify(title)
|
|
today_date = date.today().strftime("%Y-%m-%d")
|
|
f = open( path + slug + '.md', 'w')
|
|
print("Title: " + title, file=f)
|
|
print("Authors:" + author, file=f)
|
|
print("Date: "+ date_pub, file=f)
|
|
print("Modified: "+ today_date, file=f)
|
|
print("Category: " + category, file=f)
|
|
print("Tags: ", file=f)
|
|
print("Slug: "+ slug, file=f)
|
|
print(content, file=f)
|
|
|
|
doc = minidom.parse('blog-05-07-2020.xml')
|
|
poems = doc.getElementsByTagName("entry")
|
|
|
|
i = 0
|
|
|
|
for poem in poems:
|
|
title = getTitle(poem)
|
|
content = getContent(poem)
|
|
date_pub = getDate(poem)
|
|
if(title is not None and content is not None and i> 57) :
|
|
write2file(title, content, date_pub)
|
|
i = i + 1
|