Register
It is currently Tue Jul 29, 2014 12:39 am

rapidshare download script


All times are UTC - 6 hours


Post new topic Reply to topic  [ 1 post ] 
Author Message
 PostPosted: Fri Jul 24, 2009 4:08 am   

Joined: Fri Jul 24, 2009 4:02 am
Posts: 2
I wrote a bash script to download multiple files from oneclickhoster rapidshare.com with a premium account.
just store your cookie in ~/.cookie like this:

Code:
echo enc=7FBC35EFA19morestuff > ~/.cookie


then start the script with "download" and paste your links on stdin, press ctrl-d if you're finished,
or copy them into a file and start with: "download file-with-links"

after download the script tries to extract files with unrar.

It uses the download app: "axel" and "wget".
add "save_state_interval = 1" to ~/.axelrc


Code:
#!/bin/bash


function mydd() {
    SOURCE=$(tempfile)
    cat > $SOURCE << EOF
import sys
x = 16 * 1024
while x:
    d = sys.stdin.read( x )
    if not d: break
    print d
    x -= len( d )
EOF
   
    python $SOURCE
    rm -f $SOURCE
}


function strip() {
    sed 's/^ *\([^ ]*\) *$/\1/g'
}


function log() {
    echo "$@" >> log
}


function input_get_file_or_stdin() {
    test -f "$1" && cat "$1" || cat
}


function input_extract_urls() {
    echo $* | tr -s '×' 'x' | egrep -oi 'http://(www\.)?rapidshare.com/files/[0-9]+/[a-z0-9_.-]+\.[a-z0-9]{3,}' | strip
}


function input_extract_passwords() {
    echo $* | egrep -io "(pw|pass|passwd|password) *[:=] *[^ ]{3,}" | cut -d: -f2- | strip
}


function count() {
    echo $* | wc -w
}

function linify() {
    echo $* | tr ' ' '\n'
}


# check if a proxy server is already running.
# if not, start one

#if ! netcat -z localhost 1580 ; then
#    echo -n "starting proxy"
#    proxy > /tmp/proxy.log 2> /tmp/proxy.err &
#    PROXY_PID=$!
#   
#    for i in $(seq 15) ; do
#        echo -n .
#        sleep 0.5
#        test -e /proc/$PROXY_PID || break
#        netcat -z localhost 1580 && break
#    done
#   
#    echo
#   
#    # still no proxy? quit!
#    if ! netcat -z localhost 1580 ; then
#        kill $PROXY_PID 2> /dev/null
#        echo "could not start proxy instance - insecure!"
#        exit 1
#    fi
#fi

# set the proxy-server
#export http_proxy=http://localhost:1580


# prepare
log "reading links and passwords"
INPUT=$( input_get_file_or_stdin "$1" | strip | sort | uniq )

log "extracting rapidshare links"
LINKS=$( input_extract_urls $INPUT )

log "extracting passwords"
PASSWORDS=$( input_extract_passwords $INPUT )

log "found $(count $LINKS) links and $(count $PASSWORDS) passwords"



# check rapidshare links
function rapidshare_get_fileids() {
    linify $* | cut -d/ -f 5
}


function rapidshare_get_filenames() {
    linify $* | cut -d/ -f6-
}


function rapidshare_build_check_url() {
    local LINKS=$*
    local FILE_IDS=$( rapidshare_get_fileids $LINKS | paste -s -d, )
    local FILE_NAMES=$( rapidshare_get_filenames $LINKS | paste -s -d, )
   
    echo "http://api.rapidshare.com/cgi-bin/rsapi.cgi?sub=checkfiles_v1&files=$FILE_IDS&filenames=$FILE_NAMES"
}


function rapidshare_check() {
    local CHECK_URL=$( rapidshare_build_check_url $* )
    local HANDLE=$(tempfile)
   
    wget -q -O - $CHECK_URL | sort -t, -k2 > $HANDLE
    echo $HANDLE
}


function rapidshare_check_free() {
    rm -f $1
}


function rapidshare_check_get_ids() {
    cat $1 | cut -d, -f1
}


function rapidshare_check_get_size() {
    cat $1 | egrep "^$2," | cut -d, -f3
}


function rapidshare_check_get_name() {
    cat $1 | egrep "^$2," | cut -d, -f2
}


function rapidshare_check_get_server() {
    cat $1 | egrep "^$2," | cut -d, -f4
}


function rapidshare_check_is_valid() {
    local SIZE=$( rapidshare_check_get_size $1 $2 )
    local SERVER=$( rapidshare_check_get_server $1 $2 )
   
    [ "$SIZE" != 0 ] && [ "$SERVER" != 0 ]
    return $?
}


ALL_VALID=true
CHECK=$(rapidshare_check $LINKS)
for ID in $( rapidshare_check_get_ids $CHECK ) ; do
    rapidshare_check_is_valid $CHECK $ID || ALL_VALID=false
done

# oops, not all links are valid
if [ $ALL_VALID == false ] ; then
   
    # create the checklist
    ITEMS=$( for ID in $( rapidshare_check_get_ids $CHECK ) ; do
        echo $ID
        echo $(rapidshare_check_get_name $CHECK $ID )
        rapidshare_check_is_valid $CHECK $ID && echo on || echo off
    done )
   
    RESULT_FILE=$( tempfile )
    dialog --clear --msgbox "Ooops, you've added invalid links.\nPlease select the files you want to download from the next checklist." 0 0 \
        --and-widget --separate-output --checklist "Mark files to download" 0 70 12 \
        $ITEMS 2> $RESULT_FILE
   
    if [ $? -eq 0 ] ; then
        VALID_IDS=$( cat $RESULT_FILE )
        rm -f $RESULT_FILE
       
        # no files selected, just quit
        if [ $(count $VALID_IDS) -eq 0 ] ; then
            exit 0
        fi
    else
        log "you canceled the dialog"
        rm -f $RESULT_FILE
        exit 1
    fi
else
    VALID_IDS=$( rapidshare_check_get_ids $CHECK )
fi


log checking $(count $VALID_IDS ) "files for exe"...


function rapidshare_download() {
    local URL=$1
    local TARGET=${2:-"-"}
   
    wget --header "Cookie: "$(<$HOME/.cookie) \
        --tries=1 -c \
        -q -O $TARGET \
        $URL
}


function rapidshare_has_exe_file() {
    local URL=$1
    local TEMP=$(tempfile)
   
    rapidshare_download $URL | mydd > $TEMP 2> /dev/null
    unrar vb -p- $TEMP 2> /dev/null | egrep -i '\.exe$' > /dev/null
    local RESULT=$?
   
    rm -f $TEMP
    return $RESULT
}


function rapidshare_check_get_url() {
    local NAME=$( rapidshare_check_get_name $1 $2 )
    echo http://rapidshare.com/files/$2/$NAME
}

function make_hash() {
    echo $* | md5sum | cut -b1-32
}

HASH_DIR=$HOME/.exetest/
mkdir -p "$HASH_DIR"

EXE_FAIL_RESULT=$(tempfile)
{
    CURRENT=0
    TOTAL_COUNT=$( count $VALID_IDS )
    for ID in $VALID_IDS ; do
        NAME=$( rapidshare_check_get_name $CHECK $ID )
       
        echo XXX
        echo checking ${NAME:0:50}...
        echo XXX
       
        URL=$( rapidshare_check_get_url $CHECK $ID )
        HASH=$( make_hash $URL )
       
        if ! [ -f "$HASH_DIR"/$HASH ] ; then
            TEMP=$(tempfile)
            rapidshare_has_exe_file $URL && echo $ID >> $TEMP
            mv $TEMP "$HASH_DIR"/$HASH
        fi
        cat "$HASH_DIR"/$HASH >> $EXE_FAIL_RESULT
       
        CURRENT=$(( CURRENT + 1 ))
        echo $(( CURRENT * 100 / TOTAL_COUNT ))
       
        # sleep rulz
        sleep 0.1
    done
} | dialog --gauge "checking for .exe files..." 0 70 || exit 1

log "ids that failed the exe test:" $(< $EXE_FAIL_RESULT)

# let the user decide, what to do!
FAILED=$(< $EXE_FAIL_RESULT)
rm -f $EXE_FAIL_RESULT

if [ $(count $FAILED) -ne 0 ] ; then
    # create the checklist
    ITEMS=$( for ID in $VALID_IDS; do
        echo $ID
        echo $(rapidshare_check_get_name $CHECK $ID )
        (echo $FAILED | egrep -q $ID) && echo off || echo on
    done )
   
    RESULT_FILE=$( tempfile )
    dialog --clear --msgbox "Ooops, there are .exe files in your links.\nPlease select the files you want to download from the next checklist." 0 0 \
        --and-widget --separate-output --checklist "Mark files to download" 0 70 12 \
        $ITEMS 2> $RESULT_FILE
   
    # check the return code
    if [ $? -eq 0 ] ; then
        VALID_IDS=$( cat $RESULT_FILE )
        rm -f $RESULT_FILE
   
        # no files selected, just quit
        if [ $(count $VALID_IDS) -eq 0 ] ; then
            exit 0
        fi
    else
        log "you canceled the dialog"
        rm -f $RESULT_FILE
        exit 1
    fi
fi
rm -f $EXE_FAIL_RESULT

log download $(count $VALID_IDS ) files...

function file_size() {
    stat -c %s "$1"
}


# check for html files
for ID in $VALID_IDS ; do
    NAME=$( rapidshare_check_get_name $CHECK $ID )
   
    [ -f $NAME ] || continue
   
    MIMETYPE=$( file -i $NAME )
    if echo $MIMETYPE | grep -q html ; then
        dialog --yesno "The existing file $NAME is of type text/html.\nIt probably shouldn't be!.\nShould i delete it (yes) or cancel the download (no)?" 0 0
        [ $? -eq 0 ] && rm -f $NAME || exit 1
    fi
done

STATE_FILE=$(tempfile)
echo "okay" > $STATE_FILE


function axel_file_size() {
    cat $1.st | python -c "$(printf 'import struct,sys\nprint struct.unpack( "<LQ", sys.stdin.read(12) )[1]')"
}


COUNT=0
TOTAL=$( count $VALID_IDS )
for ID in $VALID_IDS ; do
    COUNT=$(( COUNT+1 ))
    NAME=$( rapidshare_check_get_name $CHECK $ID )
    URL=$( rapidshare_check_get_url $CHECK $ID )
   
    # tell them what we are doing
    echo XXX
    echo "loading [$COUNT/$TOTAL]..."
    echo "name: ${NAME:0:55}"
    echo XXX
   
    # test if the file already exists
    TOTAL_SIZE=$( rapidshare_check_get_size $CHECK $ID )
    if [ -f $NAME ] && [ $(file_size $NAME) -eq $TOTAL_SIZE ] ; then
        # then skip this one
        log $NAME is fully loaded, skip...
        continue
    fi
   
    log "will download $URL now"
   
    # we will download manually here, so we have better control
    # over wget and stuff!
    axel -H "Cookie: $(<$HOME/.cookie)" -n 4 -q -o $NAME.part $URL &
    PID=$!
   
    # kill wget, if we quit
    trap "kill $PID; rm -f $STATE_FILE; exit 1" INT TERM EXIT
   
    # lets set it all up and go
    while ! test -f $NAME.part.st ; do sleep 0.1 ; done
    LAST_SIZE=$( axel_file_size $NAME.part )
    LAST_TIME=$( date +%s )
   
    # dirty hack for "while $PID is still alive"
    while kill -SIGCONT $PID ; do
        TIME=$( date +%s )
        SIZE=$(axel_file_size $NAME.part)
       
        # give a speed update every few seconds
        TIME_DIFF=$(( TIME - LAST_TIME ))
        if [ $TIME_DIFF -ge 3 ] ; then
            SIZE_DIFF=$(( SIZE - LAST_SIZE ))
            SPEED=$(( (SIZE_DIFF / 1000) / TIME_DIFF ))
           
            echo XXX
            echo "loading [$COUNT/$TOTAL] with ${SPEED}kb/s, size: ("$(( TOTAL_SIZE / (1024 * 1024) ))"mb)"
            echo "name: ${NAME:0:55}"
            echo XXX
           
            LAST_TIME=$TIME
            LAST_SIZE=$SIZE
        fi
       
        echo $(( SIZE * 100 / TOTAL_SIZE ))
       
        # dont kill the cpu
        sleep 1
    done
   
    # test if download was not successful
    if [ -f $NAME.part.st ] || ![ -f $NAME.part ] ; then
        log $NAME was not successfully downloaded
        echo error > $STATE_FILE
        break
    fi
   
    mv $NAME.part $NAME
   
    # be nice
    sleep 0.1
done 2> /dev/null | dialog --gauge "starting download..." 7 70 0 || echo error > $STATE_FILE

STATE=$(< $STATE_FILE)
rm -f $STATE_FILE

# oops, not all files were loaded properly
if [ $STATE == "error" ] ; then
    ITEMS=$( for ID in $VALID_IDS ; do
        NAME=$( rapidshare_check_get_name $CHECK $ID )
        SIZE=$( rapidshare_check_get_size $CHECK $ID )
       
        echo $ID $NAME
        ([ -f $NAME ] && [ $(file_size $NAME) -eq $SIZE ]) && echo on || echo off
    done )
   
    # filter the valid ids...
    VALID_IDS=$( for ID in $VALID_IDS ; do
        NAME=$( rapidshare_check_get_name $CHECK $ID )
        SIZE=$( rapidshare_check_get_size $CHECK $ID )
        ([ -f $NAME ] && [ $(file_size $NAME) -eq $SIZE ]) && echo $ID
    done )
   
    dialog --no-cancel --checklist "checked files were downloaded successfully" 0 70 12 $ITEMS 2> /dev/null
else
    dialog --timeout 5 --msgbox "All $(count $VALID_IDS) files were downloaded successfully!\nPress Okay to extract the files." 0 0
fi

function extract_get_extractable() {
    # all files which end in part01.rar
    # + all files which end not in part??.rar
    {
        linify $* | egrep -i 'part0*1\.rar(\.html)?$'
        linify $* | egrep -vi 'part[0-9]+\.rar(\.html)?$' | egrep -i '\.rar(\.html)?$'
    } | sort | uniq
}


# get the names of all valid files
FILES=$( for ID in $VALID_IDS ; do
    echo $(rapidshare_check_get_name $CHECK $ID)
done )

# free the rapidshare context... we dont need it anymore!
rapidshare_check_free $CHECK

# should we unrar it?
if ! ( echo $INPUT | egrep -q '(no|dont) (extract|unrar)' ) ; then

    FILES=$(extract_get_extractable $FILES)
    log "files to extract: $FILES"

    PASSWORD_COUNT=$(count $PASSWORDS)
    FILES_COUNT=$(count $FILES )

    # convert files to an array
    FILES=( $FILES )

    #clear the screen
    clear

    for (( INDEX = 0; INDEX < ${#FILES[@]}; INDEX++ )) ; do
        log index $INDEX with files ${FILES[@]}
       
        # try with passwords and without password
        for PASS in "" $PASSWORDS ; do
            FILE=${FILES[$INDEX]}
           
            # try to extract the file
            if [ -z "$PASS" ] ; then
                log trying to unpack $FILE without pass
                echo trying to unpack $FILE without pass
                unrar x -y -inul -c- -p- $FILE || continue
               
                MORE_FILES=$( extract_get_extractable $( unrar vb -p- -v $FILE 2> /dev/null ) )
            else
                log trying to unpack $FILE with pass $PASS
                echo trying to unpack $FILE with pass $PASS
                unrar x -y -inul -c- -p$PASS $FILE || continue
               
                MORE_FILES=$( extract_get_extractable $( unrar vb -p$PASS -v $FILE 2> /dev/null ) )
            fi
           
            # info to the user!
            echo "success!"
            echo
           
            # we were successful
            if [ $(count $MORE_FILES) -ne 0 ] ; then
                MORE_FILES=($MORE_FILES)
                FILES=(${FILES[@]} ${MORE_FILES[@]})
            fi
           
            # dont try this file again
            break
        done
    done
fi

echo finished


Top
 Profile  
Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 1 post ] 

All times are UTC - 6 hours


Who is online

Users browsing this forum: No registered users and 3 guests


You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot post attachments in this forum

Jump to:  
cron


BashScripts | Promote Your Page Too
Powered by phpBB © 2011 phpBB Group
© 2003 - 2011 USA LINUX USERS GROUP