1
0
mirror of https://github.com/libretro/libretro-database synced 2024-11-23 14:26:39 +00:00
libretro-database/scripts/scraper.py
overkill fef6ba4fc8
Remove unused imports
I removed two unused `import`s to meet recommendations from LGTM (8faf03bb36/files/scripts/scraper.py).
2021-03-21 18:00:19 -04:00

39 lines
1.2 KiB
Python

from lxml import html
import requests
import os
from chtwrite import cheatwriter
#cheat scraper for http://bsfree.shadowflareindustries.com/ navigate to the system and codetype you want, copy url
#run the script with "python scraper.py 'url'"
baseurl = 'http://bsfree.org/'
supported = "Gameboy", "Gameboy Advance", "Sega Game Gear", "Genesis", "Nintendo Entertainment System", "Sega Master System", "Playstation", "Super Nintendo", "Sega Saturn"
page = requests.get(baseurl)
tree = html.fromstring(page.text)
sysurl = tree.xpath('//td[@class="codedescalt"]//a/@href')
system = tree.xpath('//td[@class="codedescalt"]/a[@href]/text()')
supsys = [system.index(sup) for sup in supported]
for idx3 in supsys:
page2 = requests.get(baseurl + sysurl[idx3])
tree2 = html.fromstring(page2.text)
cdtype = tree2.xpath('//td[@class="codedescalt"]//a/@href')
nmtype = tree2.xpath('//td[@class="codedescalt"]/a[@href]/text()')
outdir = system[idx3]
if not os.path.exists(outdir):
os.mkdir(outdir)
for idxnum, chttype in enumerate(cdtype):
contentdir = outdir + "/" + nmtype[idxnum]
if not os.path.exists(contentdir):
os.mkdir(contentdir)
print "created: " + contentdir
cheatwriter( baseurl=baseurl, chttype=chttype, outdir=contentdir )