waoff_parser/parser.py

57 lines
2.1 KiB
Python

from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from icalendar import Calendar, Event
from pytz import timezone
from os import listdir
html_dirname = input("In which dir are the html files ? (default : html/)")
html_dirname = html_dirname if html_dirname != "" else "html"
ics_filename = input("To which ICS file should I output ? (default : ./waoff-calendar.ics)")
ics_filename = ics_filename if ics_filename != "" else "waoff-calendar.ics"
local_tz = input("Which timezone should I place the events to ? (default : Europe/Paris)")
local_tz = timezone(local_tz) if local_tz != "" else timezone("Europe/Paris")
cal = Calendar()
# takes an EST timezone and converts it to the local one
def convert_timezone(est_dt):
# if local timezone is Paris, correct for daylight saving time
if(local_tz == timezone("Europe/Paris")):
return est_dt.astimezone(local_tz) - timedelta(hours = 1)
return est_dt.astimezone(local_tz)
# adds calendar event for each film
def handleFilm(p) :
title = p.find('a', {'class':'Film_title'})
runtime = p.find('div', {'class':'Film_runtime'})
summary = p.find('div', {'class' : 'Film_summary'})
time = p.find('span', {'class': 'start_time'})
time_formatted = datetime.strptime(time.string, "Starts at %I:%M %p EST on %B %d")
time_formatted = time_formatted.replace(year=2020)
time_formatted = time_formatted.replace(tzinfo = timezone("EST"))
time_formatted = convert_timezone(time_formatted)
event = Event()
event.add('description', summary.string)
event.add('summary', title.string)
event.add('dtstart', time_formatted)
event.add('dtend', time_formatted + timedelta(hours = 1))
cal.add_component(event)
# read every html file in the html directory
for filename in listdir(html_dirname):
file = open(html_dirname + '/' + filename, 'r')
page = file.read()
soup = BeautifulSoup(page)
for p in soup.find_all('div', {'class': 'Film'}):
handleFilm(p)
# write events to ics file
f = open(ics_filename, 'wb')
f.write(cal.to_ical())
f.close()
print("Done !")