Простой модуль Python. Надеюсь, что он работает. В любом случае, начинать надо с простых примеров. Скачать можно PyProxy здесь. ...pyproxy is a Proxy hunter and Tester a high-level cross-protocol proxy-hunter python library.
Эти команды проверки и распаковки (разархивации) пакетов tar.gz надо запомнить¶
In [1]:
# Check the packag before unpack
!tar -zvtf /home/kiss/Desktop/Temp/pyproxy-v.09.tar.gz
In [2]:
# Unpack in 'ipython notebook' folder
!tar -zxvf /home/kiss/Desktop/Temp/pyproxy-v.09.tar.gz
In [3]:
# iport to the next cell
%load pyproxy.py
In [3]:
#!/usr/bin/env python
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
#
# Copyright 2010 Gunslinger_ <yudha.gunslinger@gmail.com>
# http://bit.ly/c0debreaker
import sys
import warnings
import urllib2
import re
import socket
import random
import optparse
import os
warnings.filterwarnings(action="ignore", message=".*(sets) module is deprecated", category=DeprecationWarning)
import sets
__author__ = "Gunslinger_ <yudha.gunslinger@gmail.com>"
__date__ = "Thu Oct 7 00:00:41 2010"
__version__ = "09"
__copyright__ = "Copyright (c) 2010 Gunslinger_"
class proxyhunter(object):
"""
Instance variables:
Outputproxy
Output file every proxy will be printed in
Default : proxylist.txt
Goodproxy
Output file all good proxy will be print
Default : goodproxylist.txt
Verbose
More noise, every proxy will be print into screen
Default : True
Timeout
Timeout every test proxy connections in socket
Default : 30
Sitelist
Proxy site for parsing proxy
Default : []
"""
def __init__(self, OutputProxy='proxylist.txt', GoodProxy='goodproxylist.txt', Verbose=True, TimeOut=30, Sitelist=[]):
self._red = '\033[31m'
self._reset = '\033[0;0m'
self._wide = " "*50
self._timeout = TimeOut
self._verbose = Verbose
self._testurl = 'http://www.google.com'
self._ouruseragent = ['Mozilla/4.0 (compatible; MSIE 5.0; SunOS 5.10 sun4u; X11)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.2pre) Gecko/20100207 Ubuntu/9.04 (jaunty) Namoroka/3.6.2pre',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser;',
'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)',
'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1)',
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6)',
'Microsoft Internet Explorer/4.0b1 (Windows 95)',
'Opera/8.00 (Windows NT 5.1; U; en)',
'amaya/9.51 libwww/5.4.0',
'Mozilla/4.0 (compatible; MSIE 5.0; AOL 4.0; Windows 95; c_athome)',
'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 qihoobot@qihoo.net)',
'Mozilla/4.0 (compatible; MSIE 5.0; Windows ME) Opera 5.11 [en]']
self._referer = ['http://google.com','http://bing.com']
# You can add yours...
self._sitelist = Sitelist
self._output = OutputProxy
self._goodproxy = GoodProxy
def Samairdotru(self):
counter = 1
proxycounter = 0
maxpages = 60
urls = []
cntlen = 0
proxyfile = file(self._output, 'a')
print "[*] Hunting proxy from samair.ru please wait..."
while counter <= maxpages:
if counter <= 9:
opener = urllib2.build_opener(urllib2.HTTPHandler)
opener.addheaders = [('User-agent', random.choice(self._ouruseragent)),
('Referer', random.choice(self._referer))]
urllib2.install_opener(opener)
url = urllib2.urlopen('http://www.samair.ru/proxy/proxy-0'+repr(counter)+'.htm').read()
else:
opener = urllib2.build_opener(urllib2.HTTPHandler)
opener.addheaders = [('User-agent', random.choice(self._ouruseragent)),
('Referer', random.choice(self._referer))]
urllib2.install_opener(opener)
url = urllib2.urlopen('http://www.samair.ru/proxy/proxy-'+repr(counter)+'.htm').read()
proxies = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}:\d{1,5}'), url)
lenstr = len(proxies)
proxycounter = int(proxycounter) + int(len(proxies))
sys.stdout.write("\r[*] %s%d%s Proxies received from : http://www.samair.ru/proxy/ %s" % (self._red, int(proxycounter), self._reset, self._wide))
sys.stdout.flush()
for singleproxy in proxies:
if self._verbose:
print singleproxy
proxyfile.write(singleproxy+"\n")
counter = counter+1
opener.close()
print "\n"
proxyfile.close()
def ParseProxy(self, site):
print "[*] Parse proxy from %s" % (site.split("//",3)[1])
proxycounter = 0
urls = []
proxyfile = file(self._output, 'a')
opener = urllib2.build_opener(urllib2.HTTPHandler)
opener.addheaders = [('User-agent', random.choice(self._ouruseragent)),
('Referer', random.choice(self._referer))]
urllib2.install_opener(opener)
url = urllib2.urlopen(site).read()
proxies = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}[:]\d{1,5}'), url)
for singleproxy in proxies:
if self._verbose:
print singleproxy
proxyfile.write(singleproxy+"\n")
proxycounter = proxycounter+1
sys.stdout.write("[*] %s%d%s Proxies receieved from : %s %s\n" % (self._red, int(proxycounter), self._reset, site.split("//",3)[1], self._wide))
sys.stdout.flush()
opener.close()
proxyfile.close()
def Single(self):
for site in self._sitelist:
self.ParseProxy(site)
def Cleanitup(self, sorted_output="uniqueproxylist.txt"):
""" proxy will be printed in uniqueproxylist.txt by default """
proxyfile = open(self._output, 'r').readlines()
outfile = file(sorted_output, 'a')
sortproxy = []
finalcount = 0
for proxy in proxyfile:
if proxy not in sortproxy:
sortproxy.append(proxy)
outfile.write(proxy)
finalcount += 1
if self._verbose:
for proxy in sortproxy:
print proxy,
print "\n[*] %s%d%s Unique proxy list has been sorted ." % (self._red, int(finalcount), self._reset),
if sorted_output == "":
print ""
else:
print "saved in %s" % (sorted_output)
outfile.close()
def LoadProxy(self):
global proxylist
try:
preventstrokes = open(self._output, "r")
proxylist = preventstrokes.readlines()
count = 0
while count < len(proxylist):
proxylist[count] = proxylist[count].strip()
count += 1
print "[*] File successfully loaded..."
except(IOError):
print "\n[-] Error: Check your proxylist path\n"
sys.exit(1)
def CoreFreshTester(self, proxy):
try:
socket.setdefaulttimeout(self._timeout)
proxy = proxy.split(":")
proxy_info = {
'host' : proxy[0],
'port' : int(proxy[1])
}
proxy_support = urllib2.ProxyHandler({"http" : "%s:%d" % (proxy[0], int(proxy[1]))})
opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)
opener.addheaders = [('User-agent', random.choice(self._ouruseragent)),
('Referer', random.choice(self._referer))]
urllib2.install_opener(opener)
f = urllib2.urlopen(self._testurl)
if self._verbose:
print f.headers
print f.read()
except urllib2.HTTPError, e:
if self._verbose:
print 'Error : %s code : %s' % (e, e.code)
return e.code
except Exception, detail:
if self._verbose:
print "Error : %s" % (detail)
return 1
return 0
def MainFreshTester(self, proxy):
if self.CoreFreshTester(proxy):
print "[*] %s%s%s \n \'--------------> Bad" % (self._red, proxy, self._reset)
else:
print "[*] %s%s%s \n \'--------------> Good" % (self._red, proxy, self._reset)
writegoodpxy.write(proxy)
def TestProxy(self):
global writegoodpxy
writegoodpxy = file(self._goodproxy, 'w')
for proxy in proxylist:
self.MainFreshTester(proxy)
print "[*] All Fresh proxy has been saved in %s" % (self._goodproxy)
writegoodpxy.close()
''' Direct use class of this library '''
class runengine(object):
def __init__(self):
self._sitelist = ['http://www.proxy-list.net/anonymous-proxy-lists.shtml',
'http://www.digitalcybersoft.com/ProxyList/fresh-proxy-list.shtml',
'http://www.1proxyfree.com/',
'http://www.proxylists.net/http_highanon.txt',
'http://www.atomintersoft.com/products/alive-proxy/socks5-list/',
'http://www.proxylist.net/',
'http://aliveproxy.com/high-anonymity-proxy-list/',
'http://spys.ru/en/',
'http://spys.ru/en/http-proxy-list/',
'http://atomintersoft.com/free_proxy_list',
'http://aliveproxy.com/proxy-list/proxies.aspx/Indonesia-id',
'http://tinnhanh.ipvnn.com/free-proxy/Indonesia_Proxy_List.ipvnn']
def parseoption(self):
global jSamairdotru, jSingle, jTestproxy, doall, version, output, proxytest, verbose, goodproxy, timeout
baseprog = os.path.basename(sys.argv[0])
parser = optparse.OptionParser()
if len(sys.argv) <= 1:
parser.exit(msg="""Usage : %s [option]
-h or --help for get help \n\n""" % (sys.argv[0]))
''' parse for option '''
parser.add_option("-s", "--samair",
dest="jSamairdotru",
action="store_true",
help="just use samair.ru to hunt proxies")
parser.add_option("-l", "--sitelist", dest="jSingle", action="store_true",
help="use all site in the list")
parser.add_option("-t", "--test",
dest="jTestproxy",
action="store_true",
help="test all proxy !")
parser.add_option("-a", "--all",
dest="doall",
action="store_true",
help="do all !")
parser.add_option("-v", "--version",
dest="version",
action="store_true",
help="print current proxy hunter version")
parser.add_option("-d", "--debug",
dest="verbose",
action="store_true",
help="debug program for more talkable & every proxy will be print to screen")
parser.add_option("-o", "--outputfile",
dest="outputfile",
default="proxylist.txt",
type="string",
action="store",
metavar="FILE",
help="output proxy will be print [default : %default]" )
parser.add_option("-i", "--inputfile",
dest="inputfile",
default="proxylist.txt",
type="string",
action="store",
metavar="FILE",
help="input proxy will be checked [default : %default]")
parser.add_option("-g", "--outputgood",
dest="outputgoodproxy",
default="goodproxy.txt",
type="string",
action="store",
metavar="FILE",
help="output all good proxy will be saved [default : %default]")
parser.add_option("-c", "--timeout",
dest="timeout",
default=30,
type="int",
action="store",
metavar="NUMBER",
help="timeout connections being program run [default : %default]")
group = optparse.OptionGroup(parser, "Example ",
"""%s -s | Gather proxy with samair.ru
%s -l | Gather proxy in the url list
%s -t proxylist.txt | Test proxy inside proxylist.txt
%s -a | Do all
%s -v | Print current version
""" % (baseprog, baseprog, baseprog, baseprog, baseprog))
parser.add_option_group(group)
(options, args) = parser.parse_args()
jSamairdotru = options.jSamairdotru
jSingle = options.jSingle
jTestproxy = options.jTestproxy
doall = options.doall
version = options.version
output = options.outputfile
proxytest = options.inputfile
verbose = options.verbose
goodproxy = options.outputgoodproxy
timeout = options.timeout
def printversion(self):
print "Version : %s \n" % (__version__)
def run(self):
proxyengine = proxyhunter(OutputProxy=output, GoodProxy=goodproxy, Verbose=verbose, TimeOut=timeout, Sitelist=self._sitelist)
if version:
self.printversion()
if jSamairdotru:
proxyengine.Samairdotru()
proxyengine.Cleanitup()
if jSingle:
proxyengine.Single()
proxyengine.Cleanitup()
if jTestproxy:
proxyengine.LoadProxy()
proxyengine.TestProxy()
if doall:
proxyengine.Samairdotru()
proxyengine.Single()
proxyengine.LoadProxy()
proxyengine.TestProxy()
def main():
print "\nPyProxy v.%s by %s - Proxy Hunter and Tester Opensource engine\nA high-level cross-protocol proxy-hunter\n" % (__version__, __author__)
proxyengine = runengine()
proxyengine.parseoption()
proxyengine.run()
if __name__ == '__main__':
main()
In []:
py
Дальше надо бы разобрать этот код на части, надо будет разобрать каждый блок, да и с импортом модулей наверняка возникнут проблемы. Чтобы не засорять эту страничку, она и так будет громоздкой, вспомним про то, что у нас работает сервер Tornado (8888 port), и мы можем подключиться к нему и из консоли.
In [1]:
%lsmagic
In [2]:
%qtconsole
# команда не прошла, наверное она десь просто не установлена
In []:
python pyproxy.py -h
PyProxy v.09 by Gunslinger_ <yudha.gunslinger@gmail.com> - Proxy Hunter and Tester Opensource engine
A high-level cross-protocol proxy-hunter
Usage: pyproxy.py [options]
Options:
-h, --help show this help message and exit
-s, --samair just use samair.ru to hunt proxies
-l, --sitelist use all site in the list
-t, --test test all proxy !
-a, --all do all !
-v, --version print current proxy hunter version
-d, --debug debug program for more talkable & every proxy will be
print to screen
-o FILE, --outputfile=FILE
output proxy will be print
[default : proxylist.txt]
-i FILE, --inputfile=FILE
input proxy will be checked
[default : proxylist.txt]
-g FILE, --outputgood=FILE
output all good proxy will be saved
[default : goodproxy.txt]
-c NUMBER, --timeout=NUMBER
timeout connections being program run
[default : 30]
Example :
pyproxy.py -s | Gather proxy with samair.ru
pyproxy.py -l | Gather proxy in the url list
pyproxy.py -t proxylist.txt | Test proxy inside proxylist.txt
pyproxy.py -a | Do all
pyproxy.py -v | Print current version
Итак, я сейчас думаю о том, как работает эта пограмма, но передо мной инфраструктурные задачи: как подключить параллельно консоль, как потом лучше применять дебаггер и какой... Все эти навыки необходимы, это именно должны быть "навыки", как навыки вождения (автомобиля): когда думаешь, куда повернуть, некогда думать о том, в какой последовательности нажимать на педали и переключать передачи...
Поэтому закончим этот пост нажеждой - вот внизу то, что мне удалось получить от запуска модуля, он собрал ...неожиданно, списки прокси с некоторых сайтов. Отлично! Значит нужны будут инструменты, чтобы изучить и подправить этот код.
Поэтому закончим этот пост нажеждой - вот внизу то, что мне удалось получить от запуска модуля, он собрал ...неожиданно, списки прокси с некоторых сайтов. Отлично! Значит нужны будут инструменты, чтобы изучить и подправить этот код.
In []:
python pyproxy.py -l
PyProxy v.09 by Gunslinger_ <yudha.gunslinger@gmail.com> - Proxy Hunter and Tester Opensource engine
A high-level cross-protocol proxy-hunter
[*] Parse proxy from www.proxy-list.net/anonymous-proxy-lists.shtml
[*] 0 Proxies receieved from : www.proxy-list.net/anonymous-proxy-lists.shtml
[*] Parse proxy from www.digitalcybersoft.com/ProxyList/fresh-proxy-list.shtml
[*] 0 Proxies receieved from : www.digitalcybersoft.com/ProxyList/fresh-proxy-list.shtml
[*] Parse proxy from www.1proxyfree.com/
[*] 0 Proxies receieved from : www.1proxyfree.com/
[*] Parse proxy from www.proxylists.net/http_highanon.txt
[*] 100 Proxies receieved from : www.proxylists.net/http_highanon.txt
[*] Parse proxy from www.atomintersoft.com/products/alive-proxy/socks5-list/
[*] 15 Proxies receieved from : www.atomintersoft.com/products/alive-proxy/socks5-list/
[*] Parse proxy from www.proxylist.net/
[*] 0 Proxies receieved from : www.proxylist.net/
[*] Parse proxy from aliveproxy.com/high-anonymity-proxy-list/
[*] 10 Proxies receieved from : aliveproxy.com/high-anonymity-proxy-list/
[*] Parse proxy from spys.ru/en/
[*] 0 Proxies receieved from : spys.ru/en/
[*] Parse proxy from spys.ru/en/http-proxy-list/
[*] 0 Proxies receieved from : spys.ru/en/http-proxy-list/
[*] Parse proxy from atomintersoft.com/free_proxy_list
[*] 15 Proxies receieved from : atomintersoft.com/free_proxy_list
[*] Parse proxy from aliveproxy.com/proxy-list/proxies.aspx/Indonesia-id
[*] 10 Proxies receieved from : aliveproxy.com/proxy-list/proxies.aspx/Indonesia-id
[*] Parse proxy from tinnhanh.ipvnn.com/free-proxy/Indonesia_Proxy_List.ipvnn
[*] 0 Proxies receieved from : tinnhanh.ipvnn.com/free-proxy/Indonesia_Proxy_List.ipvnn
[*] 128 Unique proxy list has been sorted . saved in uniqueproxylist.txt
Как здесь и написано, программа сама создала файл uniqueproxylist.txt ...и убрала там дубли!!! Так что у этого поста будет продолжение, но сначала я закреплю навыки работы с дебаггерами.
Посты чуть ниже также могут вас заинтересовать
Комментариев нет:
Отправить комментарий