Wednesday, September 28, 2005

[Tcl] Download Accelerator in Tcl

#! /usr/local/bin/tclsh
#
# Download Accelerator in Tcl with thread
# - inspired by Download Accelerator Plus (www.speedbit.com)
# - output follows 'wget' (lazy to craft the GUI)
#
#
# Written by: Chan Chi Hung
# Date: 4 Nov 2004
# History:
# 1.0 - without resume capability
# 1.1 - with resume capability
#             intermediate files are hidden
# 1.2 - check header Accept-Ranges before settting the nthread
#     - in resume, the nthread has to follow the previous setting
#     - http progress, similar to wget (see below)
#       1.3 - fix the status bar during resume
#           - fix nthreads when thread has not reached max and terminated
#
# Intermediate files:
#  hidden files .[urlBasename $url]-
#               .[urlBasename $url]#nthreads


#
# simulate wget output
#
# wget http://www.abc123.com/abc/video/abc.mpg
# --16:04:19--  http://www.abc123.com/abc/video/abc.mpg
#            => `abc.mpg'
# Resolving www.abc123.com... 195.222.13.16
# Connecting to www.abc123.com[195.222.13.16]:80... connected.
# HTTP request sent, awaiting response... 200 OK
# Length: 546,140 [video/mpeg]
# 
# 100%[====================================>] 546,140      6.51K/s  ETA 00:00 
# 16:06:36 (3.91 KB/s) - `abc.mpg' saved [546140/546140]


if { $argc < 1 || $argc > 2 } {
 puts stderr "Usage: $argv0  \[#threads\]"
 puts stderr "       default #threads is 4"
 exit 1
}



proc comma {num {sep ,}} {
 while {[regsub {^([-+]?\d+)(\d\d\d)} $num "\\1$sep\\2" num]} {}
 return $num
}


proc now {} {
 return [clock format [clock seconds] -format {%H:%M:%S}]
}


proc lremove { l v } {
 foreach i $v {
  set ind [lsearch $l $i]
  if { $ind == -1 } { continue }
  set indm1 [expr {$ind-1}]
  set indp1 [expr {$ind+1}]
  set l [concat [lrange $l 0 $indm1] [lrange $l $indp1 end]]
 }
 return $l
}


proc urlSize { url } {
 global validate

 if { [info exists validate] == 0 } {
  set validate [http::geturl $url -validate 1]
 }
 set code [http::ncode $validate]
 if { $code != 200 } {
  puts stderr "Error. http return code=$code"
  exit 2
 }
 set size [set ${validate}(totalsize)]

 return $size
}



proc urlType { url } {
 global validate

 if { [info exists validate] == 0 } {
  set validate [http::geturl $url -validate 1]
 }
 return [set ${validate}(type)]
}


proc isAcceptRanges { url } {
 global validate

 if { [info exists validate] == 0 } {
  set validate [http::geturl $url -validate 1]
 }
 array set www [set ${validate}(meta)]
 if { [info exists www(Accept-Ranges)] == 1 } {
  return 1
 } else {
  return 0
 }
}


#
# get basename of url
#
proc urlBasename { url } {
 array set www [uri::split $url]
 set fname [lindex [split $www(path) /] end]
 return $fname
}


#
# work out the byte range
#
proc byteRanges { size nthreads } {
 set step [expr $size/$nthreads]
 set p0 -1
 set p1 -1
 set br {}
 for { set i 0 } { $i < $nthreads } { incr i } {
  set p0 [expr $p1 + 1]
  if { $i == [expr {$nthreads-1}] } {
   set p1 $size
  } else {
   set p1 [expr $p0 + $step]
   }
  lappend br $p0
  lappend br $p1
  set p0 $p1
 }
 return $br
}


#
# fix up nthreads
# if server does not support accept-range, nthreads=1
# if '#nthreads' file exists, get from there
#
proc fixNthreads { url nthreads } {
 set rc $nthreads

 # if server cannot support byte range, nthreads=1
 if { [isAcceptRanges $url] == 0 } {
  set rc 1
 }

 # in resume mode, nthreads now and previous has to tally
 set fname [urlBasename $url]
 set ntFilename ".${fname}#nthreads"
 if { [file exists $ntFilename] } {
  set fp [open $ntFilename r]
  set rc [read $fp]
  close $fp
 } else {
  set fp [open $ntFilename w]
  puts $fp $nthreads
  close $fp
  set rc $nthreads
 }
 return $rc
}




# MAIN PROGRAM STARTS HERE


package require Thread
package require http
package require uri


set url [lindex $argv 0]
set nthreads 4
if { $argc == 2 } {
 set nthreads [lindex $argv 1]
}
tsv::set dap url $url
tsv::set dap t0 [clock seconds]


puts "--[now]-- $url"
puts "\t=> [urlBasename $url]"


#
# if resume is needed, set resumeSize to sum of file size
#
set resume [glob -nocomplain [format {.%s-*} [urlBasename $url]]]
if { [llength $resume] > 0 } {
 set rs 0
 foreach i $resume {
  incr rs [file size $i]
 }
 tsv::set dap resumeSize $rs
} else {
 tsv::set dap resumeSize 0
}


set nthreads [fixNthreads $url $nthreads]


#
# create and initialise thread pool
#
puts -nonewline "Setting up thread pool of $nthreads threads ... "
set tpool [tpool::create -minworkers $nthreads -maxworkers $nthreads   -idletime 20 -initcmd {
 package require http
 package require uri

 proc dl { seq p0 p1 } {
  set url [tsv::get dap url]
  array set www [uri::split $url]
  set fname [lindex [split $www(path) /] end]
  set fname [format {.%s-%d} $fname $seq]

  # resume
  if { [file exists $fname] == 1 } {
   set size [file size $fname]
   if { $size >= [expr $p1-$p0+1] } {
    return
   }
   set p0 [expr $p0+$size]
  }

  set fpi [open $fname a]
  fconfigure $fpi -translation binary
  set s [http::geturl $url -channel $fpi -binary 1    -progress httpProgress    -headers [list Range bytes=$p0-$p1]]
  close $fpi
 }
 proc httpProgress { token total current } {
  upvar #0 $token state

  tsv::set dap thread[thread::id] $current

  # calculate
  set max [tsv::get dap size]
  set sum [tsv::get dap resumeSize]
  foreach t [thread::names] {
   if { $t == 1 } { continue }
   incr sum [tsv::get dap thread$t]
  }

  # progress status 
  set t0 [tsv::get dap t0]
  set size [tsv::get dap size]
  set percent [expr {100*$sum/$max}]
  set elapse [expr [clock seconds] - $t0]
  set kbps [expr {$sum*8.0/(1024.0*$elapse)}]
  set eta [expr [clock seconds]-$t0]
  set etam [expr $eta/60]
  set etas [expr $eta-$etam*60]
  set status [format {%3d%%[%-51s] %6.2fKbps  ETA %02d:%02d}    $percent    "[string repeat = [expr $percent/2]]>"    $kbps    $etam    $etas]
  puts -nonewline "$status\r"
  flush stdout
 }
}]
puts "Done"


#
# submit job to thread pool, work out the byte range for each thread
#
puts -nonewline "Submitting jobs to all threads ... "
set joblist {}
set seq 1
set size [urlSize $url]
tsv::set dap size $size
foreach { p0 p1 } [byteRanges $size $nthreads] {
 lappend joblist [tpool::post $tpool [list dl $seq $p0 $p1]] 
 incr seq 
}
puts "Done"


puts "Length: [comma $size] \[[urlType $url]\]"



#
# monitor thread pool til completion
#
while 1 {
 set f [tpool::wait $tpool $joblist]
 set joblist [lremove $joblist $f]
 if { [llength $joblist] == 0 } { break }
 after 100
}


#
# consolidation
#
puts "\n"
puts -nonewline "Download completed. Consolidating ... "
set fnameo [urlBasename $url]
set fpo [open $fnameo w]
fconfigure $fpo -translation binary
for { set seq 1 } { $seq <= $nthreads } { incr seq } {
 set fnamei [format {.%s-%d} $fnameo $seq]
 set fpi [open $fnamei r]
 fconfigure $fpi -translation binary
 fcopy $fpi $fpo -size [file size $fnamei]
 close $fpi
}
close $fpo
puts "Done"


#
# cleanup
#
foreach i [glob -nocomplain ".${fnameo}*"] {
 file delete -force $i
}


puts ""
puts "--[now]-- $fnameo ([file size $fnameo]/$size)"


0 Comments:

Post a Comment

<< Home