2015-05-03 03:38:33 +02:00
from lxml import html
2015-05-22 01:14:18 +02:00
import sys
2015-05-03 03:38:33 +02:00
import requests
import os
2015-05-22 01:14:18 +02:00
import re
from chtwrite import cheatwriter
2015-05-03 03:38:33 +02:00
#cheat scraper for http://bsfree.shadowflareindustries.com/ navigate to the system and codetype you want, copy url
#run the script with "python scraper.py 'url'"
2015-05-22 01:14:18 +02:00
baseurl = ' http://bsfree.org/ '
supported = " Gameboy " , " Gameboy Advance " , " Sega Game Gear " , " Genesis " , " Nintendo Entertainment System " , " Sega Master System " , " Playstation " , " Super Nintendo " , " Sega Saturn "
2015-05-03 03:38:33 +02:00
2015-05-22 01:14:18 +02:00
page = requests . get ( baseurl )
2015-05-03 03:38:33 +02:00
tree = html . fromstring ( page . text )
2015-05-22 01:14:18 +02:00
sysurl = tree . xpath ( ' //td[@class= " codedescalt " ]//a/@href ' )
system = tree . xpath ( ' //td[@class= " codedescalt " ]/a[@href]/text() ' )
2015-05-03 03:38:33 +02:00
2015-05-22 01:14:18 +02:00
supsys = [ system . index ( sup ) for sup in supported ]
2015-05-03 03:38:33 +02:00
2015-05-22 01:14:18 +02:00
for idx3 in supsys :
page2 = requests . get ( baseurl + sysurl [ idx3 ] )
tree2 = html . fromstring ( page2 . text )
cdtype = tree2 . xpath ( ' //td[@class= " codedescalt " ]//a/@href ' )
nmtype = tree2 . xpath ( ' //td[@class= " codedescalt " ]/a[@href]/text() ' )
outdir = system [ idx3 ]
if not os . path . exists ( outdir ) :
os . mkdir ( outdir )
for idxnum , chttype in enumerate ( cdtype ) :
2015-05-03 03:38:33 +02:00
2015-05-22 01:14:18 +02:00
contentdir = outdir + " / " + nmtype [ idxnum ]
if not os . path . exists ( contentdir ) :
os . mkdir ( contentdir )
print " created: " + contentdir
2015-05-24 04:07:15 +02:00
cheatwriter ( baseurl = baseurl , chttype = chttype , outdir = contentdir )
2015-05-03 03:38:33 +02:00