Wednesday, September 28, 2005

[Tcl] RSS Feed

#! /usr/local/bin/tclsh


if { $argc == 0 } {
puts stderr "Usage: $argv0 url \[url\] ..."
exit 1
}


package require http
package require tdom
package require uri


http::config -useragent {Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv: 1.7.2) Gecko/20040804 Netscape/7.2 (ax)} -accept {text/xml,application/xml,application/xhtml+xml,text/html;q=0.9;text/plain;q=0.8,image/png,*/*,q=0.5}


proc rss2dom { url } {
set timeout 8000; # in milliseconds
set s [http::geturl $url -timeout $timeout]
set ncode [http::ncode $s]
set data [http::data $s]
http::cleanup $s
if { $ncode == 200 } {
 if { [catch {dom parse $data} root] == 0 } {
  set doc [$root documentElement]
  return $doc
 } else {
  return -code error -errorinfo "Error (dom) -  $root"
 }
} else {
 http::cleanup $s
 return -code error -errorinfo "Error (http) - Code=$ncode"
}
}


proc rssExplore { doc } {
set max 15
set channel [$doc selectNodes {//*[local-name()='channel']}]
       set rssTitle [[$channel selectNodes {*[local-name()='title']/text()}] nodeValue]
       set rssLink  [[$channel selectNodes {*[local-name()='link']/text()}] nodeValue]
set rc1 [list $rssTitle $rssLink]

set rc2 {}
set rssItem  [$doc selectNodes {//*[local-name()='item']}]
foreach i [lrange $rssItem 0 [expr {$max-1}]] {
 set t [[$i selectNodes {*[local-name()='title']/text()}] nodeValue]
 set l [[$i selectNodes {*[local-name()='link']/text()}] nodeValue]
 set t [string trim $t]
 set l [string trim $l]

 # fix up url if it does not start with http/https
 if { [string match {http*} $l] == 0 } {
  if { [string match {/*} $l] == 0 } {
   set l [join [list $rssLink $l] {/}]
  } else {
   array set uri [uri::split $rssLink]
   set l "http://$uri(host):$uri(port)$l"
  }
 }

 lappend rc2 $t
 lappend rc2 $l
}
return [list $rc1 $rc2]
}



set fp [open POPUP.html w]
# put in your own html->body open and close tags
puts $fp ""
puts $fp ""
puts $fp ""
foreach url $argv {
if { [catch {rss2dom $url} doc] == 0 } {
 foreach { header items } [rssExplore $doc] {
  foreach { t l } $header {}
  puts $fp [format {%s     [rss]     [top]} $l $t $url]
  puts $fp "
    " foreach { t l } $items { puts $fp [format {
  • %s} $l $t] } puts $fp "
" } } } puts $fp "
" puts $fp "
"

Calling the above rss.tcl in a UNIX script

#! /bin/sh

./rss.tcl  'http://www.sun.com/rss/news-rss.xml'  'http://www.sun.com/rss/events-rss.xml'  'http://www.sun.com/rss/books-just-published-rss.xml'  'http://developers.sun.com/rss/solaris.xml'  'http://www.forbes.com/technology/index.xml'  'http://www.oreillynet.com/meerkat/?_fl=rss10&t=ALL&c=916'  'http://www.cnet.com/4914-6022_1-0.xml?author=Wood:Molly&maxhits=5'  'http://lwn.net/headlines/newrss'  'http://www.linuxjournal.com/node/feed'  'http://rssnewsapps.ziffdavis.com/eweeklinux.xml'  'http://www.utilitycomputing.com/news/recent10.xml'  'http://rssnewsapps.ziffdavis.com/eweeksecurity.xml'  'http://www.sans.org/rr/rss/'  'http://www.sans.org/newsletters/newsbites/rss/'  'http://www.sans.org/newsletters/risk/rss/'  'http://www.nwfusion.com/rss/datacenter.xml'  'http://www.nwfusion.com/rss/utility.xml'  'http://rssnewsapps.ziffdavis.com/eweek_infrastructure.xml'


if [ $? -eq 0 ]; then
/usr/local/bin/curl   --silent   --output /dev/null   --user web:master   -F upfile=@POPUP.html   -F uri=/   http://192.168.2.3/upfile2uri/index_add2.cgi
fi


1 Comments:

Blogger Azhar Hussein said...

Hi :)

I absolutely love your blog! I'll definitely bookmark it, keep up the great work.

Please check out my site if you get some time: DollarBuddy

10:41 AM  

Post a Comment

<< Home