scripts/scraper.py

from lxml import html
import sys
import requests
import os
import re
from chtwrite import cheatwriter
#cheat scraper for http://bsfree.shadowflareindustries.com/ navigate to the system and codetype you want, copy url
#run the script with "python scraper.py 'url'"


baseurl = 'http://bsfree.org/'
supported = "Gameboy", "Gameboy Advance", "Sega Game Gear", "Genesis", "Nintendo Entertainment System", "Sega Master System", "Playstation", "Super Nintendo", "Sega Saturn"


page = requests.get(baseurl)
tree = html.fromstring(page.text)

sysurl = tree.xpath('//td[@class="codedescalt"]//a/@href')
system = tree.xpath('//td[@class="codedescalt"]/a[@href]/text()')

supsys = [system.index(sup) for sup in supported]

for idx3 in supsys:
	page2 = requests.get(baseurl + sysurl[idx3])

	tree2 = html.fromstring(page2.text)

	cdtype = tree2.xpath('//td[@class="codedescalt"]//a/@href')
	nmtype = tree2.xpath('//td[@class="codedescalt"]/a[@href]/text()')
	outdir = system[idx3]
	if not os.path.exists(outdir):
		os.mkdir(outdir)
	for idxnum, chttype in enumerate(cdtype):
	
		contentdir = outdir + "/" + nmtype[idxnum]
		if not os.path.exists(contentdir):
			os.mkdir(contentdir)
			print "created: " + contentdir
		cheatwriter( baseurl=baseurl, chttype=chttype, outdir=contentdir )
fixed previous error and added more games 2015-05-03 03:38:33 +02:00			`from lxml import html`
redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`import sys`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00			`import requests`
			`import os`
redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`import re`
			`from chtwrite import cheatwriter`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00			`#cheat scraper for http://bsfree.shadowflareindustries.com/ navigate to the system and codetype you want, copy url`
			`#run the script with "python scraper.py 'url'"`

redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00
			`baseurl = 'http://bsfree.org/'`
			`supported = "Gameboy", "Gameboy Advance", "Sega Game Gear", "Genesis", "Nintendo Entertainment System", "Sega Master System", "Playstation", "Super Nintendo", "Sega Saturn"`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00

redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`page = requests.get(baseurl)`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00			`tree = html.fromstring(page.text)`

redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`sysurl = tree.xpath('//td[@class="codedescalt"]//a/@href')`
			`system = tree.xpath('//td[@class="codedescalt"]/a[@href]/text()')`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00
redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`supsys = [system.index(sup) for sup in supported]`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00
redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`for idx3 in supsys:`
			`page2 = requests.get(baseurl + sysurl[idx3])`

			`tree2 = html.fromstring(page2.text)`

			`cdtype = tree2.xpath('//td[@class="codedescalt"]//a/@href')`
			`nmtype = tree2.xpath('//td[@class="codedescalt"]/a[@href]/text()')`
			`outdir = system[idx3]`
			`if not os.path.exists(outdir):`
			`os.mkdir(outdir)`
			`for idxnum, chttype in enumerate(cdtype):`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00
redid all the files from scratch and made the script do all of the work 2015-05-22 01:14:18 +02:00			`contentdir = outdir + "/" + nmtype[idxnum]`
			`if not os.path.exists(contentdir):`
			`os.mkdir(contentdir)`
			`print "created: " + contentdir`
all filenames scrubbed of all special characters 2015-05-24 04:07:15 +02:00			`cheatwriter( baseurl=baseurl, chttype=chttype, outdir=contentdir )`
fixed previous error and added more games 2015-05-03 03:38:33 +02:00