genshin-recette-calculator/main.py

from bs4 import BeautifulSoup
import urllib.request
import time

items = {}
def get_ingredients (text):
    ingredients = {}
    soup = BeautifulSoup(text, 'lxml')

    table = soup.findAll('table', {'class':'add_stat_table'})[0]
    for child in table.children:
        a = tuple(tuple(child.children)[0].children)[0]
        ingredients[a.attrs['href']] = int(tuple(a.children)[1].text.split('( x')[-1][:-2])

    return ingredients

def get_ingredients2 (text):
    ingredients = {}
    soup = BeautifulSoup(text, 'lxml')

    table = soup.findAll('table', {'class':'add_stat_table'})[0]
    key = ''
    for child in tuple(tuple(table.children)[0].children)[0].children:
        if child.has_attr('href'):
            key = child.text
            ingredients[key] = None
        elif child.text.startswith('x'):
            ingredients[key] = int(child.text[2:])
    # On skip les arbres et les cailloux…
    for i in ingredients:
        if ingredients[i] is None:
            return {}
    return ingredients

def request(url):
    time.sleep(1)
    r = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    r = urllib.request.urlopen(r)
    return r.read()

def recetter_un_objet (url):
    hostname = '/'.join(url.split('/')[:3])
    try:
        ingredients = get_ingredients(request(url))
    except Exception as e:
        print('Error parsing ', url)
        raise e

    for ingredient in ingredients:
        try:
            i2 = get_ingredients2(request(hostname + ingredient))
        except Exception as e:
            print('Error parsing ', url)
            raise e
        for i in i2:
            if i not in items:
                items[i] = 0
            items[i] += i2[i] * ingredients[ingredient]

if __name__ == '__main__':
    print('Compteur de recette. Fonctionne avec https://genshin.honeyhunterworld.com en septembre 2021. On lit le fichier urls.txt')
    with open('urls.txt', 'r') as f:
        for line in f:
            line = line.strip()
            print('parsing ', line)
            recetter_un_objet(line)
    print(items)