From d5592be0ccdae16e219a1054a2de4776483ed50f Mon Sep 17 00:00:00 2001 From: theo1 Date: Sat, 30 May 2020 16:46:19 +0200 Subject: [PATCH] First commit of a python parser --- .gitignore | 2 + README.md | 16 +++ index.html | 27 +++++ parser.py | 37 ++++++ requirements.txt | 3 + waoff-calendar.ics | 273 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 358 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 index.html create mode 100644 parser.py create mode 100644 requirements.txt create mode 100644 waoff-calendar.ics diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5fc6d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# ignore local venv +waoffvenv/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..7af54db --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# We Are One : A Global Film Festival parser + +## What does it do ? + +_This short scripts parses an HTML page from the official WAOFF schedule webpage (http://www.weareoneglobalfestival.com/schedule) and parses it to create an iCalendar-formatted file, that you can directly import in your calendar app._ + +**Note** : the webpage is JS-based, which means you can't just `wget` it to extract the HTML page. You'll need to browse to the schedule page you want, go to the console, and type : +```js +console.log(document.getElementsByTagName('html')[0].innerHTML); +``` +, then copy-paste it to and html file locally, that you'll browse with the script. + +## TODO +- [ ] fix the start time problem in the ICS file +- [ ] handle timezone (the time on the website is EST) +- [ ] interactive choice of HTML page and name of calendar diff --git a/index.html b/index.html new file mode 100644 index 0000000..5f0fc7a --- /dev/null +++ b/index.html @@ -0,0 +1,27 @@ +<--- Sample HTML page from http://www.weareoneglobalfestival.com/schedule ---> + +Festival Schedule | We Are One: A Global Film Festival
COMING TO YOUTUBE MAY 29 -JUNE 7, 2020
We Are One Festival White logo
TYPE OF FILM
GENRE


MayJune



360 VR
Alteration
Starts at 07:00 AM EST on May 29
Program
Annecy Shorts for Families
Starts at 07:00 AM EST on May 29
Short Film
Bilby
Starts at 07:00 AM EST on May 29
Short Film
Bird Karma
Starts at 07:00 AM EST on May 29
360 VR
Bloodless
Starts at 07:00 AM EST on May 29
360 VR
Crow: The Legend
Starts at 07:00 AM EST on May 29
360 VR
Daughters of Chibok
Starts at 07:00 AM EST on May 29
360 VR
Extravaganza
Starts at 07:00 AM EST on May 29
360 VR
Ghost Fleet VR
Starts at 07:00 AM EST on May 29
360 VR
Isle of the Dead
Starts at 07:00 AM EST on May 29
360 VR
Ivory Burn
Starts at 07:00 AM EST on May 29
Web Series
Jaws — Assembling a Top-Tier Team (feat. TierZoo)
Starts at 07:00 AM EST on May 29
Short Film
Marooned
Starts at 07:00 AM EST on May 29
360 VR
Minotaur
Starts at 07:00 AM EST on May 29
360 VR
My Africa
Starts at 07:00 AM EST on May 29
360 VR
On/Off
Starts at 07:00 AM EST on May 29
360 VR
Passenger
Starts at 07:00 AM EST on May 29
Web Series
Sébastien Tellier on Paris’ rooftop | A Take Away Show
Starts at 07:00 AM EST on May 29
360 VR
Step To The Line
Starts at 07:00 AM EST on May 29
Web Series
The Stories That Prepared Us
Starts at 07:00 AM EST on May 29
360 VR
Traveling While Black
Starts at 07:00 AM EST on May 29
360 VR
The Waiting Room
Starts at 07:00 AM EST on May 29
Short Film
And Then The Bear
Starts at 08:00 AM EST on May 29
Short Film
The Distance Between Us And The Sky
Starts at 08:00 AM EST on May 29
Short Film
White Echo
Starts at 08:00 AM EST on May 29
Panel
Cinema Cafe with Jackie Chan
Starts at 09:30 AM EST on May 29
Television
Losing Alice
Starts at 10:45 AM EST on May 29
Feature Film
Electric Swan
Starts at 01:00 PM EST on May 29
Feature Film
Crazy World
Starts at 02:00 PM EST on May 29
Feature Film
Rudeboy: The Story of Trojan Records
Starts at 03:30 PM EST on May 29
Short Film
Cru - Raw
Starts at 05:15 PM EST on May 29
Short Film
Egg
Starts at 05:15 PM EST on May 29
Short Film
The Light Side
Starts at 05:15 PM EST on May 29
Short Film
Motorcycle Drive By
Starts at 05:15 PM EST on May 29
Short Film
No More Wings
Starts at 05:15 PM EST on May 29
Short Film
TOTO
Starts at 05:15 PM EST on May 29
Program
Tribeca 2020 Shorts Program
Starts at 05:15 PM EST on May 29
Short Film
When I Write It
Starts at 05:15 PM EST on May 29
Short Film
Circus Person
Starts at 07:05 PM EST on May 29
+debugger eval code:1:9 + diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..2c02978 --- /dev/null +++ b/parser.py @@ -0,0 +1,37 @@ +from datetime import datetime +from bs4 import BeautifulSoup +from icalendar import Calendar, Event +import pytz + +file = open('index.html', 'r') +page = file.read() +cal = Calendar() + +def handleFilm(p) : + title = p.find('a', {'class':'Film_title'}) + print(title.string) + runtime = p.find('div', {'class':'Film_runtime'}) + time = p.find('span', {'class': 'start_time'}) + # Starts at 05:15 PM EST on May 29 + time_formatted = datetime.strptime(time.string, "Starts at %I:%M %p EST on %B %d") + time_formatted = time_formatted.replace(year=2020) + tz_time = pytz.timezone('US/Eastern') + tz_time.localize(time_formatted) + print(time_formatted) + print("Europe : " + str(tz_time)) + summary = p.find('div', {'class' : 'Film_summary'}) + + event = Event() + event.add('description', summary.string) + event.add('summary', title.string) + event.add('dtstart', time_formatted) + + cal.add_component(event) + +soup = BeautifulSoup(page) +for p in soup.find_all('div', {'class': 'Film'}): + handleFilm(p) + +f = open("waoff-calendar.ics", 'wb') +f.write(cal.to_ical()) +f.close() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3df2ba3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +beautifulsoup4 +icalendar +pytz diff --git a/waoff-calendar.ics b/waoff-calendar.ics new file mode 100644 index 0000000..a587f25 --- /dev/null +++ b/waoff-calendar.ics @@ -0,0 +1,273 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +SUMMARY:Alteration +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:When Alexandro volunteered for a dream experiment\, he never i + magined that he would be subjected to a form of Artificial Intelligence wh + o aims to digitize his subconscious. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Annecy Shorts for Families +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:None +END:VEVENT +BEGIN:VEVENT +SUMMARY:Bilby +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This sweet short from DreamWorks Animation Studios finds a lon + esome bilby tangled up with a helpless baby bird in the deadly desert of A + ustralia. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Bird Karma +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:A crafty\, long-legged bird chases a mesmerizing fish through + a foggy pond in this sprightly short\, produced by DreamWorks Animation St + udios. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Bloodless +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:Portraying the final moments of a sex worker murdered by a US + soldier in South Korea\, this piece brings historical atrocities to light + through a concrete personal experience. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Crow: The Legend +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:The carefree animals imagine spring is endless. But when the v + ery first winter arrives\, can Crow (John Legend) make the ultimate sacrif + ice to save his friends?\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Daughters of Chibok +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This VR experience deals with the aftermath of the kidnapping + of 276 schoolgirls in 2014 in Nigeria\, and explores topical global issues + of gender rights and the right to education.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Extravaganza +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:A busy executive (Paul Scheer) tests a VR headset that promise + s to be the future of entertainment...but is actually anything but forward + thinking. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Ghost Fleet VR +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This harrowing look at slavery in the Thai fishing industry is + told through the experience of one man's harrowing ordeal to escape a pri + son of water after 10 years at sea.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Isle of the Dead +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:A timeless voyage from an everyday apartment toward our final + destination\, guided by Charon\, ferryman of the Underworld. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Ivory Burn +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This film bears witness to the burning of over 100 tons of ele + phant tusks and rhinoceros horn: a symbolic and visceral clarion call to t + he poaching and illegal trade syndicates. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Jaws — Assembling a Top-Tier Team (feat. TierZoo) +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This video essay by YouTube creators of Lessons From The Scree + nplay analyzes the classic blockbuster Jaws to examine how a screenwriter + can craft a dynamic team of characters. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Marooned +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:In this stylized DreamWorks Animation Studios short set in the + not-too-distant future\, a cantankerous and selfish robot is put to the t + est while stranded on an abandoned lunar outpost.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Minotaur +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:A mythical passage through the archetypal hero’s journey. Th + rough abstractions\, we experience their corresponding emotional states: c + alm\, love\, joy\, surprise\, fear\, anger/hate\, and death/rebirth\, lead + ing again to calm. +END:VEVENT +BEGIN:VEVENT +SUMMARY:My Africa +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This mixed-reality experience transports viewers to an elephan + t sanctuary in Kenya\, where a community is reknitting the bonds that have + long enabled people and wildlife to coexist.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:On/Off +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:Behind the closed doors of an intensive care unit\, ON/OFF loo + ks at how difficult it is for healthcare professionals to confront and “ + manage” death on a daily basis. \n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Passenger +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:This abstracted and dreamlike experience places you in the bac + kseat of a taxi\, and recreates the geographic and visual dislocation of f + inding a new home in a foreign land.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Sébastien Tellier on Paris’ rooftop | A Take Away Show +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:French artist Sébastien Tellier serenades Paris from one of i + ts highest point of view: the roof of Le Théâtre du Châtelet\, in the h + eart of the city +END:VEVENT +BEGIN:VEVENT +SUMMARY:Step To The Line +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:Shot entirely in a maximum security prison\, this piece shows + how release from incarceration can be just as jarring as intake and how pa + rallel lives diverge when someone serves time.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:The Stories That Prepared Us +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:A telling of the story of Coronavirus through moments in criti + cally-acclaimed film and television. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Traveling While Black +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:Confronting the way we understand and talk about race in Ameri + ca\, Traveling While Black immerses the viewer in the long history of rest + ricted movement for Black Americans. \n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:The Waiting Room +DTSTART;VALUE=DATE-TIME:20200529T070000 +DESCRIPTION:An unflinching record of Victoria Mapplebeck’s journey from + breast cancer diagnosis to recovery\, The Waiting Room considers what we c + an or can’t control when our bodies fail us. \n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:And Then The Bear +DTSTART;VALUE=DATE-TIME:20200529T080000 +DESCRIPTION:Houses will burn. Men and women will tremble. But the children + will come together and howl as they dance on the ashes like wild bears in + this vivid animation. +END:VEVENT +BEGIN:VEVENT +SUMMARY:The Distance Between Us And The Sky +DTSTART;VALUE=DATE-TIME:20200529T080000 +DESCRIPTION:In this Short Film Palme d’Or winner\, two strangers (Ioko I + oannis Kotidis\, Nikos Zeginogolu) meet one night at a gas station. One is + there to refuel\; the other is stranded. +END:VEVENT +BEGIN:VEVENT +SUMMARY:The Short Film Selection in Competition at the 72nd Festival de Ca + nnes: Program 2 +DTSTART;VALUE=DATE-TIME:20200529T080000 +DESCRIPTION:None +END:VEVENT +BEGIN:VEVENT +SUMMARY:White Echo +DTSTART;VALUE=DATE-TIME:20200529T080000 +DESCRIPTION:Chloë Sevigny’s ethereal séance-story sees a young woman\, + Carla (Kate Lyn Sheil)\, explore and wield her inner power while on vacat + ion with friends. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Cinema Cafe with Jackie Chan +DTSTART;VALUE=DATE-TIME:20200529T093000 +DESCRIPTION:Recorded live from the Sundance Film Festival\, each Cinema Ca + fé invigorates the culture of conversation. Our informal chats round up s + pecial guests for thought-provoking discussions between Festival filmmaker + s and journalists. Cinema Cafe with Jac... +END:VEVENT +BEGIN:VEVENT +SUMMARY:Losing Alice +DTSTART;VALUE=DATE-TIME:20200529T104500 +DESCRIPTION:Fascination spirals into Faustian bargain after an ambitious f + emale film director meets—and obsesses over—a younger femme-fatale scr + eenwriter. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Electric Swan +DTSTART;VALUE=DATE-TIME:20200529T130000 +DESCRIPTION:An apartment building in Buenos Aires begins to tremble and pr + ovokes an otherworldly nausea throughout the city in this magical realist + skewering of its class divisions.\n\n +END:VEVENT +BEGIN:VEVENT +SUMMARY:Crazy World +DTSTART;VALUE=DATE-TIME:20200529T140000 +DESCRIPTION:Pint-sized kung fu masters face off with the evil Tiger Mafia + in this action flick from Uganda's no-budget\, gonzo super-studio\, Wakali + wood. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Rudeboy: The Story of Trojan Records +DTSTART;VALUE=DATE-TIME:20200529T153000 +DESCRIPTION:Featuring Jamaican reggae and ska legends like Lee “Scratch + ” Perry and Marcia Griffiths\, Rudeboy chronicles a multicultural revolu + tion on the dancefloors of late ’60s and early ’70s Britain. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Cru - Raw +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:A young chef (Jeanne Werne) must learn that in this kitchen\, + a lot of blood\, sweat\, and tears go into making every dish. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Egg +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:An action-packed romance and Americana western adventure about + an egg's epic Hollywood journey from farm to table. +END:VEVENT +BEGIN:VEVENT +SUMMARY:The Light Side +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:An aging Sith Lord must come to grips with his past and discov + er why humility may be the greatest force in the galaxy. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Motorcycle Drive By +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:Third Eye Blind cannot finish their new album in time for a ma + ssive tour. Their fans still show\, breaking attendance records\, and high + lighting the importance of the band's deep cuts. +END:VEVENT +BEGIN:VEVENT +SUMMARY:No More Wings +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:At a divergent point in their lives\, two lifelong friends (Iv + anno Jeremiah\, Parys Jordon) meet at their favorite South London fried ch + icken shop. +END:VEVENT +BEGIN:VEVENT +SUMMARY:TOTO +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:Rosa Forlano\, a 90 year old Nonna\, falls in love with a robo + t while teaching it how to make spaghetti. Unfortunately\, her family reci + pe is erased by a software update. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Tribeca 2020 Shorts Program +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:None +END:VEVENT +BEGIN:VEVENT +SUMMARY:When I Write It +DTSTART;VALUE=DATE-TIME:20200529T171500 +DESCRIPTION:Two Oakland teens (Leila Mottley\, Ajai Kasim) explore what it + means to be young\, Black and committed to making art in their rapidly ch + anging city. +END:VEVENT +BEGIN:VEVENT +SUMMARY:Circus Person +DTSTART;VALUE=DATE-TIME:20200529T190500 +DESCRIPTION:Left by her fiancé for another woman\, a grieving painter (Br + itt Lower) joins a one-ring circus to reclaim her forgotten wildness. +END:VEVENT +END:VCALENDAR