master
1/* scrapem.js
2 *
3 * Scrapes mp3's off of hypem.com
4 * Rabbit Hole / Software Stack:
5 * nodejs - needed to interface with a db for storage / program exercise
6 * (not yet implemented)
7 * spookyjs - needed to drive this crazy train from nodejs
8 * (not yet implemented)
9 * casperjs - needed for binary file downloading
10 * (tested with 1.0.0)
11 * phantomjs - needed for webkit browser awesomeness
12 * (tested with 1.8.0)
13 *
14 * Thanks to http://userscripts.org/scripts/review/112466 for the inspiration
15 */
16
17/* TODO
18 * - add a flat file db to remember songs that have been downloaded before
19 */
20
21var casper = require('casper').create({
22 pageSettings: {
23 webSecurityEnabled: false
24 },
25 verbose: true,
26 logLevel: "error" //"debug"
27});
28
29// Basic informational usage message
30function usage(){
31 console.log('my-hypem: usage: casperjs scraper.js --url=<target>');
32 console.log('\texample: casperjs scraper.js --url=http://hypem.com/popular');
33}
34
35// run a test mode (default false)
36var test = false;
37
38// Check for help or required url option
39if (casper.cli.has("h") || casper.cli.has("help")) {
40 usage();
41 casper.exit(0);
42} else if (casper.cli.has("test") || casper.cli.has("t")) {
43 test = true;
44} else if (!casper.cli.has("url")) {
45 console.log("Error - url option required!");
46 usage();
47 casper.exit(1);
48}
49
50var target = casper.cli.get("url");
51// console.log("Opening connection to " + target);
52
53var TrackList = [];
54var DataList = {"tracks": []};
55casper.start(target, function() {
56// console.log("Connected to " + target);
57 TrackList = this.evaluate(function () {
58 return displayList.tracks;
59 });
60});
61casper.then(function(){
62 if (TrackList === undefined || TrackList.length < 1){
63 console.log("Tracklist Empty! Exiting!");
64 casper.exit(1);
65 } else {
66 casper.each(TrackList, function(self, track){
67 link = 'http://hypem.com/track/'+track.id
68 self.thenOpen(link, function() {
69 name = track.artist + " - " + track.song + ".mp3"
70 url = "http://hypem.com/serve/f/509/"+track.id+'/'+track.key;
71 likes = this.getHTML({type: 'xpath', path: '//*[@id="favcount_'+track.id+'"]'});
72 //this.echo("Downloading: "+ likes + name);
73 //this.download(url,name);
74 DataList.tracks.push({
75 "id" : track.id,
76 "key" : track.key,
77 "artist" : track.artist,
78 "song" : track.song,
79 "likes" : likes
80 });
81 });
82 });
83 }
84});
85casper.then(function(){
86 this.echo(JSON.stringify(DataList)).exit();
87});
88casper.run();