mirror of
https://github.com/libretro/libretro-database
synced 2024-11-23 14:26:39 +00:00
fef6ba4fc8
I removed two unused `import`s to meet recommendations from LGTM (8faf03bb36/files/scripts/scraper.py
).
39 lines
1.2 KiB
Python
39 lines
1.2 KiB
Python
from lxml import html
|
|
import requests
|
|
import os
|
|
from chtwrite import cheatwriter
|
|
#cheat scraper for http://bsfree.shadowflareindustries.com/ navigate to the system and codetype you want, copy url
|
|
#run the script with "python scraper.py 'url'"
|
|
|
|
|
|
baseurl = 'http://bsfree.org/'
|
|
supported = "Gameboy", "Gameboy Advance", "Sega Game Gear", "Genesis", "Nintendo Entertainment System", "Sega Master System", "Playstation", "Super Nintendo", "Sega Saturn"
|
|
|
|
|
|
page = requests.get(baseurl)
|
|
tree = html.fromstring(page.text)
|
|
|
|
sysurl = tree.xpath('//td[@class="codedescalt"]//a/@href')
|
|
system = tree.xpath('//td[@class="codedescalt"]/a[@href]/text()')
|
|
|
|
supsys = [system.index(sup) for sup in supported]
|
|
|
|
for idx3 in supsys:
|
|
page2 = requests.get(baseurl + sysurl[idx3])
|
|
|
|
tree2 = html.fromstring(page2.text)
|
|
|
|
cdtype = tree2.xpath('//td[@class="codedescalt"]//a/@href')
|
|
nmtype = tree2.xpath('//td[@class="codedescalt"]/a[@href]/text()')
|
|
outdir = system[idx3]
|
|
if not os.path.exists(outdir):
|
|
os.mkdir(outdir)
|
|
for idxnum, chttype in enumerate(cdtype):
|
|
|
|
contentdir = outdir + "/" + nmtype[idxnum]
|
|
if not os.path.exists(contentdir):
|
|
os.mkdir(contentdir)
|
|
print "created: " + contentdir
|
|
cheatwriter( baseurl=baseurl, chttype=chttype, outdir=contentdir )
|
|
|