Skip to content

FIX for PremProxy and FreeProxy #75

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def parse_proxyList(self):

content = response.content
soup = BeautifulSoup(content, "html.parser")
table = soup.find("table", attrs={"id": "proxylisttable"})
table = soup.find("table", attrs={"class": "table table-striped table-bordered"})

# The first tr contains the field names.
headings = [th.get_text() for th in table.find("tr").find_all("th")]
Expand Down
16 changes: 12 additions & 4 deletions http_request_randomizer/requests/parsers/PremProxyParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from http_request_randomizer.requests.parsers.UrlParser import UrlParser
from http_request_randomizer.requests.proxy.ProxyObject import ProxyObject, AnonymityLevel, Protocol

from http_request_randomizer.requests.useragent.userAgent import UserAgentManager

logger = logging.getLogger(__name__)
__author__ = 'pgaref'

Expand All @@ -18,6 +20,12 @@ def __init__(self, id, web_url, timeout=None):
web_url += "/list/"
# Ports decoded by the JS unpacker
self.js_unpacker = None
self.useragent = UserAgentManager()
self.headers = {
"User-Agent": self.useragent.get_random_user_agent(),
"Origin": self.base_url,
"Referer": self.base_url
}
UrlParser.__init__(self, id=id, web_url=web_url, timeout=timeout)

def parse_proxyList(self):
Expand All @@ -31,7 +39,7 @@ def parse_proxyList(self):
self.js_unpacker = self.init_js_unpacker()

for page in page_set:
response = requests.get("{0}{1}".format(self.get_url(), page), timeout=self.timeout)
response = requests.get("{0}{1}".format(self.get_url(), page), timeout=self.timeout, headers=self.headers)
if not response.ok:
# Could not parse ANY page - Let user know
if not curr_proxy_list:
Expand Down Expand Up @@ -65,7 +73,7 @@ def parse_proxyList(self):
return curr_proxy_list

def get_pagination_set(self):
response = requests.get(self.get_url(), timeout=self.timeout)
response = requests.get(self.get_url(), timeout=self.timeout, headers=self.headers)
page_set = set()
# Could not parse pagination page - Let user know
if not response.ok:
Expand All @@ -84,7 +92,7 @@ def get_pagination_set(self):
return page_set

def init_js_unpacker(self):
response = requests.get(self.get_url(), timeout=self.timeout)
response = requests.get(self.get_url(), timeout=self.timeout, headers=self.headers)
# Could not parse provider page - Let user know
if not response.ok:
logger.warning("Proxy Provider url failed: {}".format(self.get_url()))
Expand All @@ -96,7 +104,7 @@ def init_js_unpacker(self):
for script in soup.findAll('script'):
if '/js/' in script.get('src'):
jsUrl = self.base_url + script.get('src')
return JsUnPacker(jsUrl)
return JsUnPacker(jsUrl, headers=self.headers)
return None

def create_proxy_object(self, row, port):
Expand Down
4 changes: 2 additions & 2 deletions http_request_randomizer/requests/parsers/js/UnPacker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ class JsUnPacker(object):
"""
# TODO: it might not be necessary to unpack the js code

def __init__(self, js_file_url):
def __init__(self, js_file_url, headers=None):
logger.info("JS UnPacker init path: {}".format(js_file_url))
r = requests.get(js_file_url)
r = requests.get(js_file_url, headers=headers)
encrypted = r.text.strip()
encrypted = '(' + encrypted.split('}(')[1][:-1]
unpacked = eval('self.unpack' +encrypted) # string of the js code in unpacked form
Expand Down
134 changes: 63 additions & 71 deletions tests/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,77 +70,69 @@ def sslproxy_mock(url, request):

@urlmatch(netloc=r'(.*\.)?free-proxy-list\.net$')
def free_proxy_mock(url, request):
return """<table border="0" cellpadding="0" cellspacing="0" id="proxylisttable"
id="proxylisttable">\n
<thead>\n
<tr>\n
<th>IP Address</th>
\n
<th>Port</th>
\n
<th>Code</th>
\n
<th>Country</th>
\n
<th>Anonymity</th>
\n
<th>Google</th>
\n
<th>Https</th>
\n
<th>Last Checked</th>
\n
</tr>
\n
</thead>
\n
<tbody>
<tr>
<td>138.197.136.46</td>
<td>3128</td>
<td>CA</td>
<td>Canada</td>
<td>anonymous</td>
<td>no</td>
<td>no</td>
<td>7 seconds ago</td>
</tr>
\n
<tr>
<td>177.207.75.227</td>
<td>8080</td>
<td>BR</td>
<td>Brazil</td>
<td>transparent</td>
<td>no</td>
<td>no</td>
<td>2 hours 21 minutes ago</td>
</tr>
\n
</tbody>
\n
<tfoot>\n
<tr>\n
<th class="input"><input type="text"/></th>
\n
<th></th>
\n
<th></th>
\n
<th></th>
\n
<th></th>
\n
<th></th>
\n
<th></th>
\n
<th></th>
\n
</tr>
\n
</tfoot>
\n
return """<table class="table table-striped table-bordered">
<thead>
<tr>
<th>IP Address</th>
<th>Port</th>
<th>Code</th>
<th class="hm">Country</th>
<th>Anonymity</th>
<th class="hm">Google</th>
<th class="hx">Https</th>
<th class="hm">Last Checked</th>
</tr>
</thead>
<tbody>
<tr>
<td>58.234.116.197</td>
<td>8193</td>
<td>KR</td>
<td class="hm">Korea</td>
<td>anonymous</td>
<td class="hm">yes</td>
<td class="hx">no</td>
<td class="hm">1 min ago</td>
</tr>
<tr>
<td>20.122.24.225</td>
<td>80</td>
<td>US</td>
<td class="hm">United States</td>
<td>anonymous</td>
<td class="hm">yes</td>
<td class="hx">no</td>
<td class="hm">1 min ago</td>
</tr>
<tr>
<td>154.236.177.100</td>
<td>1981</td><td>EG</td>
<td class="hm">Egypt</td>
<td>elite proxy</td>
<td class="hm">yes</td>
<td class="hx">yes</td>
<td class="hm">1 min ago</td>
</tr>
<tr>
<td>54.37.160.92</td>
<td>1080</td><td>FR</td>
<td class="hm">France</td>
<td>elite proxy</td>
<td class="hm">no</td>
<td class="hx">yes</td>
<td class="hm">1 min ago</td>
</tr>
<tr>
<td>110.232.78.55</td>
<td>55667</td>
<td>ID</td>
<td class="hm">Indonesia</td>
<td>anonymous</td>
<td class="hm">no</td>
<td class="hx">yes</td>
<td class="hm">1 min ago</td>
</tr>
</tbody>
</table>"""


Expand Down