master
Raw Download raw file
 1/* scrapem.js
 2 * 
 3 * Scrapes mp3's off of hypem.com
 4 * Rabbit Hole / Software Stack:
 5 * 	nodejs - needed to interface with a db for storage / program exercise
 6 * 		(not yet implemented)
 7 * 	spookyjs - needed to drive this crazy train from nodejs
 8 * 		(not yet implemented)
 9 * 	casperjs - needed for binary file downloading
10 * 		(tested with 1.0.0)
11 * 	phantomjs - needed for webkit browser awesomeness
12 * 		(tested with 1.8.0)
13 *
14 * Thanks to http://userscripts.org/scripts/review/112466 for the inspiration
15 */
16
17/* TODO
18 *  - add a flat file db to remember songs that have been downloaded before
19 */
20
21var casper = require('casper').create({
22	pageSettings: { 
23		webSecurityEnabled: false 
24	},
25	verbose: true,
26	logLevel: "error" //"debug"
27});
28
29// Basic informational usage message
30function usage(){
31	console.log('my-hypem: usage: casperjs scraper.js --url=<target>');
32	console.log('\texample: casperjs scraper.js --url=http://hypem.com/popular');
33}
34
35// run a test mode (default false)
36var test = false;
37
38// Check for help or required url option
39if (casper.cli.has("h") || casper.cli.has("help")) {
40	usage();
41	casper.exit(0);	
42} else if (casper.cli.has("test") || casper.cli.has("t")) {
43	test = true;
44} else if (!casper.cli.has("url")) {
45	console.log("Error - url option required!");
46	usage();
47	casper.exit(1);
48}
49
50var target = casper.cli.get("url");
51// console.log("Opening connection to " + target);
52
53var TrackList = [];
54var DataList = {"tracks": []};
55casper.start(target, function() {
56//    console.log("Connected to " + target);
57        TrackList = this.evaluate(function () {
58            return displayList.tracks;
59        });
60});
61casper.then(function(){
62    if (TrackList === undefined || TrackList.length < 1){
63        console.log("Tracklist Empty! Exiting!");
64	casper.exit(1);	
65    } else {
66        casper.each(TrackList, function(self, track){
67            link = 'http://hypem.com/track/'+track.id
68            self.thenOpen(link, function() {
69		name = track.artist + " - " + track.song + ".mp3"
70		url = "http://hypem.com/serve/f/509/"+track.id+'/'+track.key;
71                likes = this.getHTML({type: 'xpath', path: '//*[@id="favcount_'+track.id+'"]'});
72		//this.echo("Downloading: "+ likes + name);
73		//this.download(url,name);
74		DataList.tracks.push({
75                    "id"     : track.id,
76                    "key"    : track.key,
77                    "artist" : track.artist,
78                    "song"   : track.song,
79                    "likes"  : likes
80                });
81            });
82        });
83    }
84});
85casper.then(function(){
86	this.echo(JSON.stringify(DataList)).exit();
87});
88casper.run();