Posted by luc on Thu 27th Oct 07:05 (modification of post by view diff)
View followups from Anonymous | download | new post
- #!/bin/bash
- #+FIXME /bin/bash is needed for the array in get_next_manga_animea_net() and in
- #+the preparations part for a test with [[ ]]
- # variables: ##################################################################
- # JOB can be "all", "preload" or "img"
- LOGFILE=manga.log
- RESUME=false
- JOB=all
- WGET_OPTION=--no-verbose
- CURL_OPTION='--write-out "Done: %{url_effective}"'
- QUIET=false
- START="Started '$0${@:+ $@}' at `date "+%F %H:%M:%S"`"
- MANGADIR=${MANGADIR:-~/manga}
- # functions: ##################################################################
- help_function () {
- local PROG=`basename "$0"`
- echo "usage: $PROG [ -xq ] [ -d dir | -f log ] URL"
- echo " $PROG -r [ -d dir | -f log ]"
- echo " $PROG [ -f log ] -c arcive"
- echo "dir defaults to \`.' and log defaults to \`manga.log'"
- echo "normal behavior is to load the images starting with the one embedged in URL"
- echo "-r will resume from a logfile (for example if new chapters where published)"
- echo "-c will cat the logfile inside an archive (not very sufisticated)"
- }
- if which -s wget; then
- load_to_file () { wget $WGET_OPTION --output-document="$2" "$1"; }
- load_to_pipe () { wget --quiet --output-document=- "$1"; }
- elif which -s curl; then #TODO buggy
- load_to_file () { curl --silent $CURL_OPTION --output "$2" "$1"; }
- load_to_pipe () { curl --silent --output - "$1"; }
- elif which -s elinks; then
- load_to_file () { elinks -source "$1" > "$2"; }
- load_to_pipe () { elinks -source "$1"; }
- else
- echo "Don't know how to load data (wget/curl not found)" >&2
- exit 1
- fi
- # a function for every known host to fetch the next URL, IMG and FILENAME.
- get_next_www_mangafox_com () {
- if [ -z "$URL" ]; then exit -1 ; fi
- NEXT=`load_to_pipe "$URL" | sed -n '/return enlarge/{s/.*href="\([^"]*\)".*src="\([^"]*\)".*/\1 \2/p;}'`
- IMG=${NEXT#* }
- FILE=${URL%.html}.${IMG##*.}
- FILE=${FILE##*/}
- if [[ ${NEXT%% *} = "javascript:void(0);" ]]; then #TODO
- # echo "!void(0)!"
- #exit -1
- NEXT=
- else #TODO
- NEXT=${URL%/*.html}/${NEXT%% *}
- fi #TODO
- }
- get_next_www_mangareader_net () {
- NEXT=`load_to_pipe "$URL" | sed -n '/imgholder/{N;s/.*href="\([^"]*\)".*src="\([^"]*\)".*alt="\([^"]*\)".*/\2"\1"\3/p;q;}'`
- IMG=${NEXT%%\"*}
- FILE="${NEXT##*\"}.${IMG##*.}"
- NEXT=${NEXT#*\"}
- NEXT=http://www.mangareader.net${NEXT%\"*}
- if [ "$IMG" ]; then return 0; else return 1; fi
- }
- get_next_manga_animea_net () { #TODO test w/o array
- eval LINE=(`load_to_pipe "$URL" | sed -n '/imagelink/{s/ /%20/g;s/.*href="\([^"]*\)".*src="\([^"]*\)".*/\1 \2/;p;}'`)
- IMG=${LINE[1]}
- #IMG=${LINE#* }
- FILE="${URL%%.html}.${IMG##*.}"
- FILE=${FILE##*/}
- NEXT=${LINE[0]}
- #NEXT=${LINE% *}
- if [[ ${#LINE[@]} -eq 2 ]]; then return 0; else return 1; fi
- }
- get_next_read_homeunix_com () {
- if [ -z "$URL" ]; then return 1; fi
- LINE=`load_to_pipe "$URL" | grep "document.write.*\(IMG ALT\)\|\(NEXT CHAPTER\)"`
- IMG=`sed -n '/SRC/{;s/.*SRC="\([^"]*\).*/\1/;s/ /%20/g;p;}' <<<"$LINE"`
- if [[ "$LINE" = *NEXT\ CHAPTER* ]]; then
- NEXT=`sed -n '/NEXT CHAPTER/{s#.*href ="\([^"]*\).*#http://read.homeunix.com/onlinereading/\1#;s/ /%20/g;p;}' <<<"$LINE"`
- else
- NEXT=`sed -n '/IMG ALT/{s/.*href ="\([^"]*\)".*/\1/;s/ /%20/g;p;}' <<<"$LINE"`
- fi
- #FIXME: What filename to use?
- FILE=$((++i)).${IMG##*.}
- return 0
- }
- cleanup_interrupt () {
- :
- }
- # options: ####################################################################
- while getopts c:d:f:hj:rxq FLAG; do
- case $FLAG in
- c) tar --wildcards -xOf "$OPTARG" "*/$LOGFILE"; exit;;
- d) DIR=$OPTARG;;
- f) LOGFILE=`basename "${OPTARG}"`; DIR=`dirname "${OPTARG}"`;;
- h) help_function; exit 1;;
- j) JOB=$OPTARG;;
- r) RESUME=true;;
- x) set -x;;
- q) WGET_OPTION=--quiet CURL_OPTION=--silent QUIET=true;;
- \?) exit 43;;
- esac
- done
- shift $((OPTIND-1))
- # preparation: ################################################################
- if [ "$DIR" ]; then
- if [[ $DIR != */* ]]; then DIR=$MANGADIR/$DIR; fi
- if mkdir -p "$DIR"; then cd "$DIR"; else exit -1; fi
- $QUIET || echo "Working in $PWD"
- fi
- if $RESUME; then
- URL=`tail -n 1 "$LOGFILE"`
- URL=${URL%% *}
- else
- URL="$1"
- fi
- if [ -z "$URL" ]; then echo "No URL given. Try '${0##*/} -h' for help."; exit 1; fi
- GET_NEXT=${URL#http://}
- GET_NEXT=${GET_NEXT%%/*}
- GET_NEXT=get_next_${GET_NEXT//./_}
- if $RESUME; then $GET_NEXT; fi
- # work: #######################################################################
- case $JOB in
- all)
- while $GET_NEXT; do
- load_to_file "$IMG" "$FILE" &
- #wget $WGET_OPTION --output-document="$FILE" "$IMG" &
- echo "$URL $IMG" >> "$LOGFILE"
- URL=$NEXT
- unset NEXT
- done
- ;;
- preload)
- while $GET_NEXT; do
- echo "$URL $IMG" >> "$LOGFILE"
- echo "Currently loading $URL"
- URL=$NEXT
- unset NEXT
- done
- ;;
- img)
- while read URL IMG; do
- #TODO
- #echo "Not implemented yet!" >&2
- #exit -1
- echo "URL=$URL"
- echo "IMG=$IMG"
- done < "$LOGFILE"
- ;;
- *) echo "Not a valid job name!"; exit 1;;
- esac
- wait
- if ! $QUIET; then
- echo "$START"
- date +"%F %H:%M:%S Images loaded. Exiting ..."
- fi
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.