Back
import java.util.regex.Matcher import java.util.regex.Pattern import java.text.DateFormat import org.apache.commons.net.ftp.FTPClient def fNames = [ 'Timestamp', 'GroovyBlogs', 'GroovyTweets', 'GroovyMailList', 'GrailsMailList', 'GR8Forums' ] def fragments = [:] //XMLSlurper form HTML parsing def htmlSlurp = new XmlSlurper(new org.cyberneko.html.parsers.SAXParser()) //XMLSlurper form XML parsing def xmlSlurp = new XmlSlurper() def fragment def t0 /* =========================================================================================== 1. Obtain information and generate the model. ============================================================================================== */ fNames.each{fName -> t0 = System.currentTimeMillis() println "** $fName" try{ fragment = "parse$fName"(htmlSlurp, xmlSlurp) fragments."$fName" = fragment }catch(Exception x){ fragments."$fName" = x.message } printTS(t0) } /* =========================================================================================== 2. parse the Template and generate index.html ============================================================================================== */ def idx = new File("web/index.html") def tpl = new File("web/index.tpl.html").text idx.delete() fragments.each{k,v-> //println "Reemplazando $k por $v" tpl = tpl.replaceAll("##$k##",Matcher.quoteReplacement(v)) } idx.write(tpl) /* =========================================================================================== 3. Upload index.html to web server. ============================================================================================== */ def ftpServer = "ftp.host.name" def ftpUser = "ftp.user.name" def ftpPass = "ftp.user.password" def ftpClient = new FTPClient() ftpClient.connect( ftpServer ) ftpClient.login( ftpUser, ftpPass ) println "Connected to $ftpServer. $ftpClient.replyString" ftpClient.changeWorkingDirectory( './httpdocs' ) println "Directory changed. $ftpClient.replyString" ftpClient.enterLocalPassiveMode() idx.withInputStream{ fis -> ftpClient.storeFile( idx.name, fis ) } println "Upload completed: $ftpClient.replyString" ftpClient.logout() ftpClient.disconnect() /* =========================================================================================== ++ Auxiliary functions to parse each individual information source ++ ============================================================================================== */ /* parse GR8Forums (html) */ def parseGR8Forums(htmlSlurp, xmlSlurp){ def u = 'http://gr8forums.org/search.php?search_id=active_topics' println "\tProcesando $u" def html = htmlSlurp.parse(u) def result = '
' def elements = html.'**'.findAll{it.name()=='A' && it.@class.toString().equals('topictitle')} println "\tEnlaces encontrados: ${elements.size()}" def i = 0 elements.each{ result += "
${it.text().trim()}
" } result += '
' return result } /* parse Groovy mail list (RSS Feed) */ def parseGroovyMailList(htmlSlurp, xmlSlurp){ def u = 'http://www.nabble.com/codehaus---Groovy-ft11866.xml' println "\tProcesando $u" feed = xmlSlurp.parse(u) def result = '
' def i = 0 println "\tLeyendo ${feed.entry.size()} elementos..." feed.entry.each{ result += "
${it.title}
" } result += '
' return result } /* parse Grails mail list (RSS Feed) */ def parseGrailsMailList(htmlSlurp, xmlSlurp){ def u = 'http://www.nabble.com/grails---user-ft11861.xml' println "\tProcesando $u" feed = xmlSlurp.parse(u) def result = '
' def i = 0 println "\tLeyendo ${feed.entry.size()} elementos..." feed.entry.each{ result += "
${it.title}
" } result += '
' return result } /* Small hack to generate Timestamp as an item in the model. */ def parseTimestamp(htmlSlurp, xmlSlurp){ new Date().toString() } /* parse GroovyBlogs.org (html) */ def parseGroovyBlogs(htmlSlurp, xmlSlurp){ def u = 'http://groovyblogs.org' println "\tProcesando $u" def html = htmlSlurp.parse(u) def result = '
' def elements = html.'**'.findAll{it.name()=='A' && it.@href.toString().contains('/jump/') && it.text()} println "\tEnlaces encontrados: ${elements.size()}" def i = 0 elements.each{ result += "
${it.text().trim()}
" } result += '
' return result } /* parse GroovyTweets.org (html) */ def parseGroovyTweets(htmlSlurp, xmlSlurp){ def u = 'http://groovytweets.org' println "\tProcesando $u" def html = htmlSlurp.parse(u) def result = '
' def elements = html.'**'.findAll{it.name()=='DIV' && it.@id.toString().startsWith('tweet')} println "\tTweets encontrados: ${elements.size()}" def name, status, links, nodes def i = 0 elements.each{ name = it.'**'.findAll{it.name()=='A' && it.@class == 'screenNameLink'}[0] name = "
${name.text().trim()}
" status = it.'**'.findAll{it.name()=='SPAN' && it.@class == 'statusText'}[0] /* GroovyTweets uses EasyThumb to embed thumbs in every link. if the tweet has no links we can just take status.text(). if it has one or more links, we have to parse it more deeply to remove the spans. */ nodes = status.'**'.findAll{it.name()=='SPAN' && it.@class=='thumb'} nodes.each{thumb-> thumb.replaceBody("") } status = replaceLinks(status.text()) status = replaceTwitterSearches(status) status = replaceTwitterUsers(status) result += "
$name:
${status}
" } result +='
' return result } /* =========================================================================================== ++ Utilities ++ ============================================================================================== */ def printTS(t0){ def t = System.currentTimeMillis() - t0 println "$t ms." } def replaceTwitterSearches(input){ def s = findTwitterSearches(input) s.each{ input = input.replaceAll(it[0..it.size()-1],"
$it
") } return input } def replaceTwitterUsers(input){ def s = findTwitterUsers(input) s.each{ input = input.replaceAll(it[0..it.size()-1],"
$it
") } return input } def replaceLinks(input){ def urls = findUrls(input) urls.each{ input = input.replaceAll(it,"
$it
") } return input } def findUrls(text) { def URL_PATTERN = Pattern.compile("\\b(https?|ftp|file)://[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|]",Pattern.CASE_INSENSITIVE) def links = applyPattern(URL_PATTERN, text) return links; } def findTwitterSearches(text){ def pattern = Pattern.compile("\\#\\w+\\W",Pattern.CASE_INSENSITIVE) def items = applyPattern(pattern, text) return items; } def findTwitterUsers(text){ def pattern = Pattern.compile("@\\w+\\s",Pattern.CASE_INSENSITIVE) def items = applyPattern(pattern, text) return items; } def applyPattern(pat, text){ Matcher m = pat.matcher(text); def items = [] while (m.find()) { items.add(m.group()); } return items }