#!/bin/bash

echo =======================================================================================
echo admin@krisweston.com added and cleaned up some of my code but hes dropped off the face of the planet so i have to pickup where he left off
echo rmccurdy.com if you have any issues with any of the script not working ...

echo =======================================================================================
echo 'NOTES:'

echo '* Build with Ubuntu 10.04.3 LTS'
echo '* GNU sed version 4.2.1'
echo '* curl 7.19.7 (i486-pc-linux-gnu) libcurl/7.19.7 OpenSSL/0.9.8k zlib/1.2.3.3 libidn/1.15 '

echo 'TODO:'
echo '* error checking max pages zero then bail report error ..'
echo '* setup vars for config max timeout and test urls ..'
echo '* add more checks from freeproxylists.com proxies ssl etc'
echo '* add support to check TEST urls before we start or auto detect and set net TEST url if blocked etc ..'
echo '* check output files for IP:PORT and wc to determining if site ripp worked ...'
echo '* add file uploader site check'
# curl -s -A "$varagent" -x "$proxyip" --url http://www.filesonic.com/file/537557874/T-64AOCP.rar --connect-timeout $TIMEOUT -m 10 | grep -ci 'suspicious'
# IP http://proxy.parser.by/check_proxy.php

echo =======================================================================================

sleep 1
trap quit INT

varagent='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)'
TIMEOUT=12
OUTPUT_DIR="output"

[ ! -d "$OUTPUT_DIR" ] && mkdir "$OUTPUT_DIR"
[ ! -d "$OUTPUT_DIR"/socks ] && mkdir "$OUTPUT_DIR"/socks/
[ ! -d "$OUTPUT_DIR"/http ] && mkdir "$OUTPUT_DIR"/http/

function quit {

killall -9 curl
[ -f cookie ] && rm cookie
[ -f objs ] && rm objs
[ -f result ] && rm result
echo
exit

}

function get_freeproxylists {

echo Ripping ELITE freeproxylists.com
echo They do not provide anyone with your IP address and effectively hide any information about you and your reading interests

#ugly
for img in `curl -A "$varagent"  -s http://www.freeproxylists.com/elite.php | grep "elite " | grep "elite/" | sed 's/.*elite\///g' | sed 's/\.html.*//g' | sed 's/^/http:\/\/www.freeproxylists.com\/load_elite_/g' | sed 's/$/\.html/g'`
do
echo $img
curl -A "$varagent"  -s $img | awk '{gsub("&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;","\n"); print}' | awk '{gsub("&lt;/td&gt;&lt;td&gt;",":"); print}' | sed 's/&lt.*//g' | grep -v "<" | sed '/Try our\|You/d' | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/freeproxylist.txt
done

}

function get_sakura {

[ -f tmp ] && rm tmp
local MAX_PAGE=$( curl -s "http://proxylist.sakura.ne.jp/"| grep -o "Page .[0-9]" | sed 's/Page //' | sort -n | tail -1 )

echo "Ripping proxylist.sakura.ne.jp ("$MAX_PAGE" Pages)"
sleep 1
for i in $(seq 1 $MAX_PAGE)
do
curl -s -A "$varagent"  "http://proxylist.sakura.ne.jp/index.htm?pages="$i"" | grep 'proxy([1-4]' >> tmp
# parse output
cat tmp | cut -d "'" -f 2,4,6,8,9 | sed "s/'/./g" | sed "s/.,/:/" | sed 's/);//' | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/sakura.ne.jp.txt
rm tmp
done
}

# total crap 1 http and one socks .. function get_multiproxy {

# total crap 1 http and one socks .. echo "Ripping multiproxy.org"
# total crap 1 http and one socks .. lynx -connect_timeout=3 -width=999 -dump -nolist "http://www.multiproxy.org/cgi-bin/search-proxy.pl" | sed 's/ //'g | grep ':' | sed '/Disclaimer\|Total\|USEMAP\|All\|Non-anon/d' | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/socks/multiproxy.txt

# total crap 1 http and one socks .. }

function get_nntime {

[ -f tmp ] && rm tmp
echo "Ripping nntime.com"
# pages is wrong try division of total proxy on main page

for i in seq {01..17}
do
curl -s "http://nntime.com/proxy-list-$i.htm"  -A 'SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)' | egrep '(document.write| = )|;<\/script>' |sed -e 's/.*<td>/print("/g' -e 's/<script type="text\/javascript">document.write(//g' -e 's/":/:/g' -e 's/<\/script>.*/;/g' | sed '/^[ \t]/d' | tr -d '\r' >> tmp
done
js tmp >> "$OUTPUT_DIR"/http/nntime.txt
rm tmp

}

function get_myproxy {
# fix cookie

echo "Ripping www.my-proxy.com"

[ -f tmp ] && rm tmp
[ -f cookie ] && rm cookie

# no cookie needed anymore ??
curl -L -A "$varagent" -s  -c cookie -b cookie http://www.my-proxy.com/list/proxy.php > /dev/null
cat cookie  | sed 's/0$/2/' >tmp
mv tmp cookie

echo ripping Anonymous Proxy 3 pages 
curl -A "$varagent"  -s  -c cookie -b cookie "http://proxies.my-proxy.com/proxy-list-s1.html" -e 'http://www.m.com/list/verify.php'http://www.my-proxy.com/list/verify.php  |grep br | awk '{gsub("<br>","\n"); print}' | grep "[0-9]\.[0-9]" | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/my-proxy-s.txt
curl -A "$varagent"  -s  -c cookie -b cookie "http://proxies.my-proxy.com/proxy-list-s2.html" -e 'http://www.m.com/list/verify.php'http://www.my-proxy.com/list/verify.php  |grep br | awk '{gsub("<br>","\n"); print}' | grep "[0-9]\.[0-9]" | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/my-proxy-s.txt
curl -A "$varagent"  -s  -c cookie -b cookie "http://proxies.my-proxy.com/proxy-list-s3.html" -e 'http://www.m.com/list/verify.php'http://www.my-proxy.com/list/verify.php  |grep br | awk '{gsub("<br>","\n"); print}' | grep "[0-9]\.[0-9]" | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/my-proxy-s.txt

echo ripping Socks 4 and 5 Proxy  2 pages
curl -A "$varagent"  -s  -c cookie -b cookie "http://proxies.my-proxy.com/proxy-list-socks4.html" -e 'http://www.m.com/list/verify.php'http://www.my-proxy.com/list/verify.php  |grep br | awk '{gsub("<br>","\n"); print}' | grep "[0-9]\.[0-9]" | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/socks/socks4.txt
curl -A "$varagent"  -s  -c cookie -b cookie "http://proxies.my-proxy.com/proxy-list-socks5.html" -e 'http://www.m.com/list/verify.php'http://www.my-proxy.com/list/verify.php  |grep br | awk '{gsub("<br>","\n"); print}' | grep "[0-9]\.[0-9]" | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/socks/socks4.txt



echo ripping http 10 pages
for i in seq {1..10}
do
curl -A "$varagent"  -s  -c cookie -b cookie "http://proxies.my-proxy.com/proxy-list-"$i".html" -e 'http://www.m.com/list/verify.php'http://www.my-proxy.com/list/verify.php  |grep br | awk '{gsub("<br>","\n"); print}' | grep "[0-9]\.[0-9]" | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/my-proxy.txt

sleep 1
done
}


function get_proxylistsnet {

echo "Ripping proxylists.net"

#lynx -connect_timeout=3 -width=999 -dump -nolist 'http://www.proxylists.net/http_highanon.txt' >> "$OUTPUT_DIR"/proxylistsnet.txt
wget -q -U "$varagent" http://www.proxylists.net/socks4.txt -O - | tr -d '\r' > "$OUTPUT_DIR"/socks/plnsocks4.txt
wget -q -U "$varagent" http://www.proxylists.net/socks5.txt -O - | tr -d '\r' > "$OUTPUT_DIR"/socks/plnsocks5.txt
wget -q -U "$varagent" http://www.proxylists.net/http_highanon.txt -O - | tr -d '\r' > "$OUTPUT_DIR"/http/plnhighanon.txt

}

function get_shroomery {

echo "Ripping www.shroomery.org"
# OLD lynx -connect_timeout=3 -width=999 -dump -nolist 'http://www.shroomery.org/ythan/proxylist.php' | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/shroomery.txt
lynx -connect_timeout=3 -width=999 -dump -nolist 'http://www.shroomery.org/ythan/proxylist.txt' | sed '/^[ \t]/d' | tr -d '\r' >> "$OUTPUT_DIR"/http/shroomery.txt

}

function get_samair {
[ -f tmp ] && rm tmp
local MAX_PAGE=$( curl -A "$varagent" -s http://www.samair.ru/proxy/ | grep -io "total pages: .[0-9]" | sed 's/Total pages: //' )

echo "Ripping www.samair.ru ("$MAX_PAGE" Pages)"

for i in $(seq 1 "$MAX_PAGE")
do
a=$(printf "%02d" "$i") # leading zero
wget -q http://www.samair.ru/proxy/proxy-"$a".htm
# throttle
sleep 1
# parse
cat ./proxy-$a.htm | grep -Eo '[0-9]+.[0-9]+.[0-9]+.[0-9]+.[0-9]+' | sed '/^[ \t]/d' | tr -d '\r' >> tmp
rm ./proxy-$a.htm
done

mv tmp "$OUTPUT_DIR"/http/samair.txt

# samair socks proxies - todo - arrange socks proxies into seperate files according to SOCKS 4 or 5 support

MAX_PAGE=$( curl -A "$varagent" -s http://www.samair.ru/proxy/socks.htm | grep -io "socks[0-9][0-9]" | sed 's/socks//' | sort -n | tail -1 )
echo "Ripping www.samair.ru SOCKS proxies("$MAX_PAGE" Pages)"

curl -A "$varagent" -s http://www.samair.ru/proxy/socks.htm | grep -Eo '[0-9]+*\.[0-9]+.[0-9]+.[0-9]+.[0-9]+' | sed '/^[ \t]/d' | tr -d '\r' > tmp

for i in $(seq 1 "$MAX_PAGE")
do
curl -A "$varagent" -s http://www.samair.ru/proxy/socks"$i".htm | grep -Eo '[0-9]+*\.[0-9]+.[0-9]+.[0-9]+.[0-9]+' | sed '/^[ \t]/d' | tr -d '\r' >> tmp
# throttle
sleep 1
done

mv tmp "$OUTPUT_DIR"/socks/samairsocks.txt
}

function check_conn {

ping -s1 -c1 www.google.com >/dev/null
if [ $? -ne 0 ] ; then
echo problem with connection....
exit
fi

}

function rosiget {

local maxloops=5
local failing=1
local pagenum=$1

for i in $(seq 1 "$maxloops")
do

echo Ripping page  $1  of rosinstrument
sleep 1
    curl -s -b cookie -c cookie -A '"$varagent"' "http://rosinstrument.com/raw_free_db.htm?"$pagenum"&t=2" | grep '<script language="javascript" type="text/javascript">' -A 15 | sed 's/document.write/print/' | sed '1,2d' > objs
    if [ $? -ne 0 ] ; then
    echo "Problem with connection.."
               if [ $i -le "$maxloops" ] ; then
            echo "Trying again"
            else
                    echo "attempts exceeded "$maxloops": failed..."
                    echo "try changing ip"
                    failing=0
                    exit
            fi
    else
    break
    fi
echo $i
sleep 1
done
}


function get_rosinstrument {
#http://rosinstrument.com/raw_free_db.htm?t=2
# how bloody annoying...


touch cookie
echo setting cookie
curl -s -b cookie -c cookie -A '"$varagent"' "http://rosinstrument.com/raw_free_db.htm?"$pagenum"&t=2"
touch objs
touch result
touch "$OUTPUT_DIR"/http/rosinstrument

# connect proxies (t=2)
# get first page to determine max page nums
rosiget 0
js objs > result
cat result | grep -o 'stat">[a-z,0-9.:-]*' | sed 's/stat">//' > "$OUTPUT_DIR"/http/rosinstrument

# MAX_PAGE  var was not getting set or something because the cookie was stale or something .. so after touch cookie I got new cookie

local MAX_PAGE=$( cat result | tr -d "'" | grep -o '?*[0-9,=tamp;& ]*title=to last page' | sed 's/&amp;t=2 title=to last page//' | sed 's/?//' )
[ -z $MAX_PAGE ] && { echo "connection problem, exiting... http://rosinstrument.com/raw_free_db.htm?t=2 to unblock etc .." ; exit ; }
echo "Ripping www.rosinstrument.com CONNECT proxies("$MAX_PAGE" Pages)"

for i in $(seq 1 "$MAX_PAGE")
do
rosiget "$i"
js objs > result
cat result | grep -o 'stat">[a-z,0-9.:-]*' | sed 's/stat">//' >> "$OUTPUT_DIR"/http/rosinstrument
TOTAL_CONNECT=`cat "$OUTPUT_DIR"/http/rosinstrument | wc -l`
echo -ne "Got "$TOTAL_CONNECT" proxies   \\r"
done
echo
rm cookie objs result
}


function parse {
echo "Parsing..."
# remove files if there already
[ -f "$OUTPUT_DIR"/CONNECT_proxychains ] && rm "$OUTPUT_DIR"/CONNECT_proxychains
#[ -f "$OUTPUT_DIR"/socks/ALL ] && rm "$OUTPUT_DIR"/socks/ALL
#
if [ -f ""$OUTPUT_DIR"/CONNECT_good" ] ; then
cat "$OUTPUT_DIR"/CONNECT_good | sed 's/^/http /' | sed "s/:/\t/" > "$OUTPUT_DIR"/CONNECT_proxychains
else
    echo "IP's not checked, please run $0 -c "
fi

## combine proxy files
#for blah in $(find $OUTPUT_DIR -type f )
#do
#cat $blah >> "$OUTPUT_DIR"/ALL
#done
#
#cat "$OUTPUT_DIR"/ALL | sort -u > tmp
#mv tmp "$OUTPUT_DIR"/ALL
#TOTAL_HTTP=`cat "$OUTPUT_DIR"/ALL | wc -l`
#echo "Got "$TOTAL_HTTP" proxies"
#
## combine socks proxy files
#for blah in $(find "$OUTPUT_DIR"/socks -type f )
#do
#cat $blah >> "$OUTPUT_DIR"/socks/ALL
#done
#
#cat "$OUTPUT_DIR"/socks/ALL | sort -u > tmp
#mv tmp "$OUTPUT_DIR"/socks/ALL
#TOTAL_SOCKS=`cat "$OUTPUT_DIR"/socks/ALL | wc -l`
#echo "Got "$TOTAL_SOCKS" SOCKS proxies"

}

function ping_http {



local proxyip=$1


local http_check_var=`curl -s -A "$varagent" -x "$proxyip" --url http://whatismyip.org -L --connect-timeout $TIMEOUT -m 10 | grep '^[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}$'`

if [[ "$http_check_var" == "" ]]
then
echo http_check_var is null not adding .. > /dev/null #DEBUG
else
#DEBUG echo http_check_var is GOOD ! checking one more time ....... must pass two checks ! 
	local http_check_var=`curl -s -A "$varagent" -x "$proxyip" --url http://whatismyip.org -L --connect-timeout $TIMEOUT -m 10 | grep '^[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}$'`
	if [[ "$http_check_var" == "" ]]
	then 
	echo second check faild skipping > /dev/null # DEBUG
	else
#DEBUG 	echo second check PASS !
	echo "$proxyip" >> "$OUTPUT_DIR"/http/good.txt
	fi
echo -ne "Found `cat "$OUTPUT_DIR"/http/good.txt | wc -l` working proxies   \\r"
fi
echo -ne "\t\t\t\t checking process $count   \\r"

}

function ping_socks {
# ugly reuse of same code except for one variable
# change code not to put too many requests in to whatismyip at high speed


local proxyip=$1

local socks_check_var=`curl -s -A "$varagent" --socks4 "$proxyip" --url http://whatismyip.org -L --connect-timeout $TIMEOUT -m 10 | grep '^[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}$'`
if [[ "$socks_check_var" == "" ]]
then
echo socks_check_var is null not adding .. >>/dev/null #DEBUG
else
# DEBUG echo socks_check_var is GOOD ! checking one more time ....... must pass two checks !
        local socks_check_var=`curl -s -A "$varagent" --socks4 "$proxyip" --url http://whatismyip.org -L --connect-timeout $TIMEOUT -m 10 | grep '^[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}$'`
        if [[ "$socks_check_var" == "" ]]
        then
        echo second check faild skipping >> /dev/null # DEBUG
        else
# DEBUG echo second check PASS !
        echo "$proxyip" >> "$OUTPUT_DIR"/socks/good.txt
        fi
echo -ne "Found `cat "$OUTPUT_DIR"/socks/good.txt | wc -l` working proxies   \\r"
fi
echo -ne "\t\t\t\t checking process $count   \\r"



}


function check_alive {



#################### HTTP check ##################################

echo Removing dupes ...
cat "$OUTPUT_DIR"/http/*|sort|uniq > http_tmp
rm  "$OUTPUT_DIR"/http/*
mv http_tmp "$OUTPUT_DIR"/http/ALL

TOTAL_HTTP=`cat "$OUTPUT_DIR"/http/* | wc -l`
echo "Checking "$TOTAL_HTTP" HTTP proxies"

# no wure what this line is .. [ -f "$OUTPUT_DIR"/http/ALL ] && rm "$OUTPUT_DIR"/http/good.txt

local count=1
FILE="$OUTPUT_DIR"/http/ALL
for line in $( cat $FILE )
do
ping_http "$line" &

local p=$(( $count % 400)) # if process > 50 wait a bit
    if [ "$p" -eq 0 ] ; then
    wait
    fi

let count++
done
wait

#################################### SOCKS #########################

echo Removing dupes ...
cat "$OUTPUT_DIR"/socks/*|sort|uniq > socks_tmp
rm  "$OUTPUT_DIR"/socks/*
mv socks_tmp "$OUTPUT_DIR"/socks/ALL

TOTAL_SOCKS=`cat "$OUTPUT_DIR"/socks/*| wc -l`
echo "Checking "$TOTAL_SOCKS" SOCKS proxies"

[ -f "$OUTPUT_DIR"/socks/good.txt ] && rm "$OUTPUT_DIR"/socks/good.txt
count=1
FILE="$OUTPUT_DIR"/socks/ALL
for line in $( cat $FILE )
do
ping_socks "$line" &
local p=$(( $count % 400)) # if process > 50 wait a bit
        if [ "$p" -eq 0 ] ; then
        wait
        fi

let count++
done
wait
echo

}

function nocodeen {

# wtf is this ?
# no CoDeeN
for i in `cat "$OUTPUT_DIR"/good.txt|sed -e 's/:/ -sV -P0 -n -p /g' -e 's/^/nmap /g'` ;do echo "$i";done > nmap

bash nmap > tmp

#egrep -B 2 open tmp | egrep -v "(PORT|CoDeeN|--)" | sed 's/Interesting ports on /IP /g' | grep open -B 1 | sed 's/\/.*//g'|sed 's/--//g' | tr -d '\n' | awk '{gsub("IP ","\n"); print}' > "$OUTPUT_DIR"/nocodeen.txt

rm tmp nmap
}

function get_proxies {

# START
get_freeproxylists
get_sakura
get_multiproxy
get_nntime
get_myproxy
get_proxylistsnet
get_shroomery
et_samair
get_rosinstrument

}

function feed_proxychains {

if [ ! -f ""$OUTPUT_DIR"/CONNECT_good" ] ; then
    echo "IP's not checked, please run $0 -c "
    exit
fi

#cat "$OUTPUT_DIR"/socks/good.txt | sed 's/^/socks4 /' | sed "s/:/\t/" > "$OUTPUT_DIR"/ALL_PROXYCHAINS_SOCKS
# todo this bit needs changing for 4 and 5 socks
#cat "$OUTPUT_DIR"/socks/samairsocks.txt | sed 's/^/socks4 /' | sed "s/:/\t/" >> "$OUTPUT_DIR"/ALL_PROXYCHAINS_SOCKS4
#cat "$OUTPUT_DIR"/socks/plnsocks5.txt | sed 's/^/socks5 /' | sed "s/:/\t/" > "$OUTPUT_DIR"/ALL_PROXYCHAINS_SOCKS5

[ ! -f /etc/proxychains.bak ] && cp /etc/proxychains.conf /etc/proxychains.bak
cat "$OUTPUT_DIR"/CONNECT_proxychains >> /etc/proxychains.conf
#cat "$OUTPUT_DIR"/ALL_PROXYCHAINS_SOCKS >> /etc/proxychains.conf
#cat "$OUTPUT_DIR"/ALL_PROXYCHAINS_SOCKS5 >> /etc/proxychains.conf

echo "Good proxies copied to /etc/proxychains.conf"

}

function recover_proxychains {

cp /etc/proxychains.bak /etc/proxychains.conf
echo "/etc/proxychains.conf recovered..."

}

function _usage {

echo "Currently supporting freeproxylists, sakura, multiproxy, nntime, myproxy, proxylistsnet, shroomery, samair.ru , rosinstrument"
echo
echo "`basename $0` -r rip proxy websites to \"$OUTPUT_DIR\" driectory"
echo "`basename $0` -c read IPs from \"$OUTPUT_DIR\" driectory and check if alive"
echo "`basename $0` -p copy good IPs to /etc/proxychains.conf"
echo "`basename $0` -b recover backup of /etc/proxychains.conf"
echo "`basename $0` -s show etc/proxychains.conf"

}

###################### main #####################################

#defaults
getproxies=0
checkalive=0
feedchains=0
recoverchains=0

if (($# == 0)); then
 echo "Script requires an argument" ...
 _usage
 exit
fi

while getopts "rcpbhs" flag
do
     case "$flag" in
         r)
             getproxies=1
             ;;
         c)
             checkalive=1
             ;;
         p)
             feedchains=1
             ;;
         b)
             recoverchains=1
             ;;
         s)
             cat /etc/proxychains.conf
             ;;
         h)
             _usage
         exit
             ;;
         ?)
             _usage
             exit
             ;;
     esac
done

# removed .. dont need to check connectoin ..  check_conn

if [ "$recoverchains" -eq 1 ] && [ "$feedchains" -eq 1 ] ;then
echo "copy to proxychains selected with recover proxychains, ignoring recover"
recoverchains=0
fi

[ "$getproxies" -eq 1 ] && { get_proxies ; echo "Completed ripping..."; }

[ "$checkalive" -eq 1 ] && { check_alive ; }

[ "$feedchains" -eq 1 ] && { parse ; feed_proxychains ; }

[ "$recoverchains" -eq 1 ] && { recover_proxychains ; }

echo "Done."

exit


