#!/bin/bash
# xkcd
# Usage: xkcd [range START] [range END]
# Downloads and assembles 'webcomics' by Randall Munroe from xkcd.com
# (http://xkcd.com) in batches (from START to END).
# (See Body below)
# G O Free =:)
#
[email protected]# Friday, January 29, 2010
# [all xkcd comics by Randall Munroe (http://xkcd.com/about/)]
# [see also: http://en.wikipedia.org/wiki/Xkcd]
# [Thank you]
#======================================================================
#
# Functions [5]: (get_current_number, get_range, read_tag; get_image,
# (assemble_comic)
#
#----------------------------------------------------------------------
# 1) get_current_number: Called from main body, gets the number of the
# latest xkcd comic
#----------------------------------------------------------------------
#
function get_current_number
{
# Signal function start
echo "[fn: get_current_number]"
# Read current webpage and extract number
# of current comic
CURRENT_LINE=$(curl -s http://xkcd.com | grep "<h3>Permanent link to this comic:")
CURRENT_LINE=${CURRENT_LINE%*/</h3>}
CURRENT_LINE=${CURRENT_LINE#<h3>*}
CURRENT_NUMBER=${CURRENT_LINE##*/}
# Display link and number of current
# comic
echo "CURRENT_LINE: $CURRENT_LINE"
echo "CURRENT_NUMBER: $CURRENT_NUMBER"
echo
}
#
#----------------------------------------------------------------------
# 2) get_range: Called from main body, checks that given command
# arguments give a valid range (START to END)
#----------------------------------------------------------------------
#
function get_range
{
# Signal function start
echo "[fn: get_range]"
# Define standard error message
arg_error="Usage: xkcd [range START] [range END]"
# Check that at least one argument was
# given, and ignore more than two
arg_numb=$#
if [ $arg_numb = 0 ]; then echo "NO RANGE: $arg_error"; exit; fi
if [ $arg_numb -gt 2 ]; then echo "TOO MANY ARGUMENTS: $arg_error"; shift; shift; echo "IGNORING:
[email protected]";fi
# Set start and end of range
START=$1
END=$2
END=${END:=$START}
# Check that range is valid: integers
# only, not less than one or greater
# than current number, END is greater
# than START
if !(echo $START | grep -q "^[0-9]*$") || !(echo $END | grep -q "^[0-9]*$") then echo "INTEGERS ONLY: $arg_error"; exit; fi
if [ $START -lt 1 ]; then echo "START must be greater than zero: $arg_error"; exit; fi
if [ $START -gt $CURRENT_NUMBER ]; then echo "START of range larger then CURRENT... Setting START to CURRENT: $CURRENT_NUMBER"; START=$CURRENT_NUMBER; fi
if [ $END -gt $CURRENT_NUMBER ]; then echo "END of range larger then CURRENT... Setting END to CURRENT: $CURRENT_NUMBER"; END=$CURRENT_NUMBER; fi
NUMBER=$(( $END - $START + 1))
if [ $START -gt $END ]; then echo "START must be less than END: $arg_error"; exit; fi
# Display range
echo "Start: $START End: $END Number: $NUMBER"
echo
}
#
#----------------------------------------------------------------------
# 3) read_tag: Called from main body, for each comic in range, read the
# web page ('curl', 'grep'), parse the image tag for descriptors (image
# URL, file name, extension, title, and alt), and stores the details in
# a text file (DETAILS_FILE).
#----------------------------------------------------------------------
#
function read_tag
{
# Signal function start
echo "[fn: read_tag]"
echo "Reading xkcd webpage(s)... Comics: $START to $END..."; echo ">>>"
# Count comics processed
let -i count=0
for (( COMIC_NUMBER = $START; COMIC_NUMBER <= $END; COMIC_NUMBER++ )); do
let count=$count+1
# Set target URL and location of
# details text file (DETAILS_FILE)
WEBPAGE="http://xkcd.com/$COMIC_NUMBER/"
DETAILS_FILE="$XKCD_parts/$COMIC_NUMBER.txt"
# Display target URL
echo "$count) [$COMIC_NUMBER] Reading WEBPAGE: $WEBPAGE"
# Read webpage and extract image tag
IMG_TAG=$(curl -s $WEBPAGE | grep \<img\ src=\"http://imgs.xkcd.com/comics/*)
# Parse image tag for descriptors
IMG_TAG=${IMG_TAG%*<br/>}
IMG_URL=${IMG_TAG##*\<img\ src=\"}
IMG_URL=${IMG_URL%%\"*}
FILE_ORIG=${IMG_URL##http://imgs.xkcd.com/comics/}
FILENAME=${FILE_ORIG%*.*}
EXT=${FILE_ORIG#*.*}
TITLE=${IMG_TAG##*title=\"}
TITLE=${TITLE%%\"*}
ALT=${IMG_TAG##*alt=\"}
ALT=${ALT%%\"*}
# Display details of comic
echo "IMG_TAG: $IMG_TAG"
echo "IMG_URL: $IMG_URL"
echo "FILE_ORIG: $FILE_ORIG"
echo "FILENAME: $FILENAME"
echo "EXT: $EXT"
echo "TITLE: $TITLE"
echo "ALT: $ALT"
# Store details in a text file
echo "Creating DETAILS_FILE: $DETAILS_FILE"
echo -e "$COMIC_NUMBER\n$IMG_URL\n$FILE_ORIG\n$FILENAME\n$EXT\n$TITLE\n$ALT" > $DETAILS_FILE
echo
done
}
#
#----------------------------------------------------------------------
# 4) get_image: Called from main body, this function reads the URL for
# each comic from the details file, downloads the image ('wget'), and
# saves it with the details file (in $XKCD/parts (or $XKCD_parts).
#----------------------------------------------------------------------
#
function get_image
{
# Signal function start
echo "[fn: get_image]"
echo "Getting picture(s)... Comics: $START to $END..."; echo ">>>"
# SET the Internal Field Separator
# (IFS) to new line (/n) only
# After saving the current setting
# http://en.wikipedia.org/wiki/Internal_field_separator
old_IFS=$IFS
IFS=$'\n'
# Count comics processed
let -i count=0
for (( COMIC_NUMBER = $START; COMIC_NUMBER <= $END; COMIC_NUMBER++ )); do
let count=$count+1
# Get location of details file
DETAILS_FILE="$XKCD_parts/$COMIC_NUMBER.txt"
# Display target details file for each
# comic
echo "$count) [$COMIC_NUMBER] Reading DETAILS_FILE: $DETAILS_FILE"
# Read lines of details file into an
# array
LINES=($(cat $DETAILS_FILE))
# Assign values in array to relevant
# variables: here: image URL,
# extension (with comic number)
# COMIC_NUMBER=${LINES[0]} #Here for reference only
IMG_URL=${LINES[1]} # IMAGE URL
# FILE_ORIG=${LINES[2]} #Here for reference only
# FILENAME=${LINES[3]} #Here for reference only
EXT=${LINES[4]} # EXTENSION
# TITLE=${LINES[5]} #Here for reference only
# ALT=${LINES[6]} #Here for reference only
# Display target image URL
echo "IMG_URL: $IMG_URL"
# echo "FILE_ORIG: $FILE_ORIG" #Here for reference only
# echo "FILENAME: $FILENAME" #Here for reference only
# Display original image extension
echo "EXT: $EXT"
# Set destination file
IMG_FILE="$XKCD_parts/$COMIC_NUMBER.$EXT"
# Signal attempt to get image
echo "Getting Image: $IMG_URL"
echo "Saving To: $IMG_FILE"
# Get image from the internet
# Save it to destination file
wget -q -O $IMG_FILE $IMG_URL
done
# Restore original IFS
IFS=$old_IFS
#IFS=' /t/n'
}
#
#----------------------------------------------------------------------
# 5) assemble_comic: Called from main body, this function runs the
# 'convert' program to assemble the image with a title and caption and
# saves it in the the main directory ($XKCD) with a number, original
# filename and (possibly altered) extension (.png).
#----------------------------------------------------------------------
#
function assemble_comic
{
# Signal function start
echo; echo "[fn: assemble_comic]"
echo "Assembling comic(s) (image+title+alt)... Comics: $START to $END... "; echo ">>>"
# SET the Internal Field Separator
# (IFS) to new line (/n) only
old_IFS=$IFS
IFS=$'\n'
# Count comics processed
let -i count=0
for (( COMIC_NUMBER = $START; COMIC_NUMBER <= $END; COMIC_NUMBER++ )); do
let count=$count+1
# Get location of details file
DETAILS_FILE="$XKCD_parts/$COMIC_NUMBER.txt"
# Display target details file for each
# comic
echo "$count) [$COMIC_NUMBER] Reading DETAILS_FILE: $DETAILS_FILE"
# Read lines of details file into an
# array
LINES=($(cat $DETAILS_FILE))
# Assign values in array to relevant
# variables: filename, extension,
# image file, title, and alt.
FILENAME=${LINES[3]}
EXT=${LINES[4]}
IMG_FILE="$XKCD_parts/$COMIC_NUMBER.$EXT"
TITLE=${LINES[5]}
ALT=${LINES[6]}
# Display parts to assemble
echo "Using IMG_FILE: $IMG_FILE"
echo "Using TITLE: $TITLE"
echo "Using ALT: $ALT"
# Set filename for finished comic
# with its number and original
# filename
COMIC_FILE="$XKCD/$COMIC_NUMBER--$FILENAME.png"
# Signal attempt to assemble comic
echo "Assembling to COMIC_FILE: $COMIC_FILE"
# Assemble comic: Apply title and
# caption to image
convert $IMG_FILE -background White -pointsize 20 label:"$ALT\n" +swap -gravity Center -append $COMIC_FILE
convert $COMIC_FILE -background White -pointsize 16 -size 420x caption:"\n\n\n$TITLE" -gravity Center -append $COMIC_FILE
# Signal assembly completed
echo "[convert] $IMG_FILE --> $COMIC_FILE"
echo
done
IFS=$old_IFS
}
#
#======================================================================
# Body: This is the main body of the program. It first sets the proper
# directories. It then runs the functions: 'get_current_number',
# 'get_range', 'read_tag', 'get_image', and 'assemble_comic'.
#======================================================================
#
# Print title
echo; echo "[xkcd: Get xkcd Comics]"
# Set directories
XKCD="/home/geoffrey/Comics/xkcd"
XKCD_parts="/home/geoffrey/Comics/xkcd/parts"
echo "Destination Directory: $XKCD"
echo "Components Directory: $XKCD_parts"
echo
# Run function to get the number of
# current (latest) xkcd comic
get_current_number
# Check that arguments and range are
# valid (for xkcd archives)
get_range
[email protected] # Read the webpage for each xkcd comic
# and extract details from its html tag
read_tag
# Get the image for each xkcd comic
get_image
# Use 'imagemagick' ('convert') to
# assemble xkcd comic (add title and
# caption to image)
assemble_comic
# Signal end of program
echo $0
[email protected]; echo "done"; echo
#[End of file]