import org.serviio.library.metadata.*
import org.serviio.library.online.*
import org.serviio.util.*
import groovy.json.*

/**
 * Content URL extractor plugin for Hypem audio feeds.
 * http://hypem.com/feed/popular/3day/1/feed.xml
 * The AUTH cookie might expire with time, it will then have to be replaced with a
 * new one (from the browser)
 * cd /usr/bin/serviio-0.6.2/
 *  groovy -cp lib/serviio.jar:lib/slf4j-api.jar:lib/slf4j-log4j12.jar:lib/log4j.jar:lib/org.restlet.jar plugins/Hypem.groovy
 * @author Petr Nejedly and ttguy
 *  ver 3.0 ttguy This version is based on version with the file timestamped  24 Jul 2012 as found in the all plugins pack for serviio 1.0.1 Updated (27/08/12)
 * Changes: 3.0  URLs changed their structure as did content info JSON. Had to fix some regular expression matching strings.
 * Uses org.ccil.cowan.tagsoup.Parser to parse some html now.
 *  Have noticed that each feed item could have more than one audio file to return. eg http://hypem.com/track/1pfbz/Alison+Valentine-Peanut+Butter+%28Moon+Boots+Remix%29
*  But I am not sure the plugin archtecture 
 * can handle that.
 */
class Hypem extends FeedItemUrlExtractor {
	final version='3.0'
	final VALID_FEED_URL = '^(?:https?://)?(?:www\\.)?hypem\\.com/feed/.*$'
	final USER_AGENT = 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1'
	final COOKIES = ['AUTH':'03%3A2c74bfb5c1711f11115de69c8aa345f9%3A1342866525%3A1524063429%3AH9-GB']

	
	String getExtractorName() {
		return getClass().getName()
	}
	
	boolean extractorMatches(URL feedUrl) {
		log("version $version")
		return feedUrl ==~ VALID_FEED_URL
	}
	
	ContentURLContainer extractUrl(Map links, PreferredQuality requestedQuality) {
		def linkUrl = links.default
		def contentUrl
	//	println "linkUrl $linkUrl"
	//	def matcher = linkUrl =~ '^.+/item/(\\w+)/'  // old
//                           http://hypem.com/track/1pbh9/Lana+Del+Rey-Summertime+Sadness+%28Hannes+Fischer+Radio+Edit%29
		def matcher = linkUrl =~ '^.+/track/(\\w+)/' // new
//                 (.+) means one or more any char and the () means capture the match.
//               We have special pattern syntax for whitespace \s, word characters \w, digits \d, and their complements:
		assert matcher != null
		assert matcher.hasGroup()
		
		def itemId = matcher[0][1]   // in the example this is 1pbh9
	//	println "itemId $itemId"
	
		// load the video web page
		String pageHtml = openURL(linkUrl, USER_AGENT, COOKIES)
		String timestamp = new Date().getTime()
	//	println "timestamp $timestamp"
	//	println "pageHtml $pageHtml"


//                  Uses TagSoup to parse html http://www.maclovin.de/2010/02/robust-html-parsing-the-groovy-way/ 	
//                           vvvvvvvvvvvvvvvvvvvvvv  and uses 
		String contentId =GetContentID(pageHtml)
//		println "contentId $contentId"

		String infoUrl = "http://hypem.com/serve/source/$itemId/$contentId?_=$timestamp"
//            info URL is like http://hypem.com/serve/source/1pfsr/1dda19010f3ce6acec8dfda1957e8ab3?_=1346566447204
 		println "infoUrl $infoUrl"
		String contentInfo = openURL(new URL(infoUrl), USER_AGENT, COOKIES)
		//println "contentInfo $contentInfo" 
          // contentInfo is of the form {"itemid":"1pfsr","url":"http:\/\/t10a.hypem.com\/sec\/bd026bd138e6074e40e3600b45efff77\/5042f5e0\/archive\/509\/15\/495719.mp3","final":true}
//  At one point the info URL http://hypem.com/serve/source/1pbh9/6351c5cff26caa0d8bdf6ee4c187d840?_=1346071175218
//  was giving 404
// Possible Reasons
// • You have attempted to access a private area.
// • You have been using automated tools to access the Hype Machine.
// • Your host has made a large number of requests, utilized a large amount of bandwidth, or engaged in other types of disruptive activity in the past 72 hours.
//  This was while debugging this plug in. Whether I would get this durring actual running of it I dunno - ttguy

		// get file URL
		def contentMatcher = contentInfo =~ '(?s)\"url\":\"(.+?)\"'
//             {"itemid":"1pbh9","type":"SC","url":"http:\/\/api.soundcloud.com\/tracks\/57290854\/stream?consumer_key=nH8p0jYOkoVEZgJukRlG6w"}
		assert contentMatcher != null
		contentUrl = contentMatcher[0][1].replaceAll('\\\\','')
		
		return new ContentURLContainer(fileType: MediaFileType.AUDIO, contentUrl: contentUrl)
	}

	/*
	So instead of using regular expression matching to pull the content ID
	this version of the script parses the html and pulls out the json data from it.
	It then takes the key from the first entry in the JSON data
	*/
	String GetContentID(String fileContentAsString)
	{
		println("test")
		
	
	
	//println (fileContentAsString)
	@Grab(group='org.ccil.cowan.tagsoup', module='tagsoup', version='1.2' )
	def tagsoupParser = new org.ccil.cowan.tagsoup.Parser()
	def slurper = new XmlSlurper(tagsoupParser)
	//def slurper = new XmlSlurper()


	def htmlParser = slurper.parseText(fileContentAsString )

	 

	String jsonData =  htmlParser.'**'.findAll{ it.@type == 'application/json'}[0]
	println "jsonData $jsonData"
	JsonSlurper jslurper = new JsonSlurper()
	 Object result = jslurper.parseText(jsonData)
	//println result.tracks[0].key
	//println result.tracks.size() // sometimes this is 2 implying we might have more than one audio file per feed
	//println result.tracks[1].key
	return result.tracks[0].key



   
	}
	
	static void main(args) {
		// this is just to test
		Hypem extractor = new Hypem()
	//	extractor.GetContentID()


		assert extractor.extractorMatches( new URL("http://hypem.com/feed/popular/3day/1/feed.xml") )
		assert !extractor.extractorMatches( new URL("http://google.com/feeds/api/standardfeeds/top_rated?time=today") )
		//                                    http://hypem.com/track/1pbh9/Lana+Del+Rey-Summertime+Sadness+%28Hannes+Fischer+Radio+Edit%29
		//Map videoLinks =  ['default': new URL('http://hypem.com/track/1pfsr/GRiZ-Smash+The+Funk')]
               Map videoLinks = ['default': new URL('http://hypem.com/track/1pfbz/Alison+Valentine-Peanut+Butter+%28Moon+Boots+Remix%29')]
 	//	Map videoLinks = ['default': new URL('http://hypem.com/track/1pdtm/Elite+Gymnastics-Andreja+4-Ever')]				

		ContentURLContainer result = extractor.extractUrl(videoLinks, PreferredQuality.MEDIUM)
		println "Result: $result"
		 
	}
}