#!/bin/bash #+FIXME /bin/bash is needed for the array in get_next_manga_animea_net() and in #+the preparations part for a test with [[ ]] # variables: ################################################################## # JOB can be "all", "preload" or "img" LOGFILE=manga.log RESUME=false JOB=all WGET_OPTION=--no-verbose CURL_OPTION='--write-out "Done: %{url_effective}"' QUIET=false START="Started '$0${@:+ $@}' at `date "+%F %H:%M:%S"`" MANGADIR=${MANGADIR:-~/manga} # functions: ################################################################## help_function () { local PROG=`basename "$0"` echo "usage: $PROG [ -xq ] [ -d dir | -f log ] URL" echo " $PROG -r [ -d dir | -f log ]" echo " $PROG [ -f log ] -c arcive" echo "dir defaults to \`.' and log defaults to \`manga.log'" echo "normal behavior is to load the images starting with the one embedged in URL" echo "-r will resume from a logfile (for example if new chapters where published)" echo "-c will cat the logfile inside an archive (not very sufisticated)" } if which -s wget; then load_to_file () { wget $WGET_OPTION --output-document="$2" "$1"; } load_to_pipe () { wget --quiet --output-document=- "$1"; } elif which -s curl; then #TODO buggy load_to_file () { curl --silent $CURL_OPTION --output "$2" "$1"; } load_to_pipe () { curl --silent --output - "$1"; } elif which -s elinks; then load_to_file () { elinks -source "$1" > "$2"; } load_to_pipe () { elinks -source "$1"; } else echo "Don't know how to load data (wget/curl not found)" >&2 exit 1 fi # a function for every known host to fetch the next URL, IMG and FILENAME. get_next_www_mangafox_com () { if [ -z "$URL" ]; then exit -1 ; fi NEXT=`load_to_pipe "$URL" | sed -n '/return enlarge/{s/.*href="\([^"]*\)".*src="\([^"]*\)".*/\1 \2/p;}'` IMG=${NEXT#* } FILE=${URL%.html}.${IMG##*.} FILE=${FILE##*/} if [[ ${NEXT%% *} = "javascript:void(0);" ]]; then #TODO # echo "!void(0)!" #exit -1 NEXT= else #TODO NEXT=${URL%/*.html}/${NEXT%% *} fi #TODO } get_next_www_mangareader_net () { NEXT=`load_to_pipe "$URL" | sed -n '/imgholder/{N;s/.*href="\([^"]*\)".*src="\([^"]*\)".*alt="\([^"]*\)".*/\2"\1"\3/p;q;}'` IMG=${NEXT%%\"*} FILE="${NEXT##*\"}.${IMG##*.}" NEXT=${NEXT#*\"} NEXT=http://www.mangareader.net${NEXT%\"*} if [ "$IMG" ]; then return 0; else return 1; fi } get_next_manga_animea_net () { #TODO test w/o array eval LINE=(`load_to_pipe "$URL" | sed -n '/imagelink/{s/ /%20/g;s/.*href="\([^"]*\)".*src="\([^"]*\)".*/\1 \2/;p;}'`) IMG=${LINE[1]} #IMG=${LINE#* } FILE="${URL%%.html}.${IMG##*.}" FILE=${FILE##*/} NEXT=${LINE[0]} #NEXT=${LINE% *} if [[ ${#LINE[@]} -eq 2 ]]; then return 0; else return 1; fi } get_next_read_homeunix_com () { if [ -z "$URL" ]; then return 1; fi LINE=`load_to_pipe "$URL" | grep "document.write.*\(IMG ALT\)\|\(NEXT CHAPTER\)"` IMG=`sed -n '/SRC/{;s/.*SRC="\([^"]*\).*/\1/;s/ /%20/g;p;}' <<<"$LINE"` if [[ "$LINE" = *NEXT\ CHAPTER* ]]; then NEXT=`sed -n '/NEXT CHAPTER/{s#.*href ="\([^"]*\).*#http://read.homeunix.com/onlinereading/\1#;s/ /%20/g;p;}' <<<"$LINE"` else NEXT=`sed -n '/IMG ALT/{s/.*href ="\([^"]*\)".*/\1/;s/ /%20/g;p;}' <<<"$LINE"` fi #FIXME: What filename to use? FILE=$((++i)).${IMG##*.} return 0 } cleanup_interrupt () { : } # options: #################################################################### while getopts c:d:f:hj:rxq FLAG; do case $FLAG in c) tar --wildcards -xOf "$OPTARG" "*/$LOGFILE"; exit;; d) DIR=$OPTARG;; f) LOGFILE=`basename "${OPTARG}"`; DIR=`dirname "${OPTARG}"`;; h) help_function; exit 1;; j) JOB=$OPTARG;; r) RESUME=true;; x) set -x;; q) WGET_OPTION=--quiet CURL_OPTION=--silent QUIET=true;; \?) exit 43;; esac done shift $((OPTIND-1)) # preparation: ################################################################ if [ "$DIR" ]; then if [[ $DIR != */* ]]; then DIR=$MANGADIR/$DIR; fi if mkdir -p "$DIR"; then cd "$DIR"; else exit -1; fi $QUIET || echo "Working in $PWD" fi if $RESUME; then URL=`tail -n 1 "$LOGFILE"` URL=${URL%% *} else URL="$1" fi if [ -z "$URL" ]; then echo "No URL given. Try '${0##*/} -h' for help."; exit 1; fi GET_NEXT=${URL#http://} GET_NEXT=${GET_NEXT%%/*} GET_NEXT=get_next_${GET_NEXT//./_} if $RESUME; then $GET_NEXT; fi # work: ####################################################################### case $JOB in all) while $GET_NEXT; do load_to_file "$IMG" "$FILE" & #wget $WGET_OPTION --output-document="$FILE" "$IMG" & echo "$URL $IMG" >> "$LOGFILE" URL=$NEXT unset NEXT done ;; preload) while $GET_NEXT; do echo "$URL $IMG" >> "$LOGFILE" echo "Currently loading $URL" URL=$NEXT unset NEXT done ;; img) while read URL IMG; do #TODO #echo "Not implemented yet!" >&2 #exit -1 echo "URL=$URL" echo "IMG=$IMG" done < "$LOGFILE" ;; *) echo "Not a valid job name!"; exit 1;; esac wait if ! $QUIET; then echo "$START" date +"%F %H:%M:%S Images loaded. Exiting ..." fi