From 284e05dd03856b0351c398b8bc32c70ce20b16fb Mon Sep 17 00:00:00 2001
From: alsrua7222 <59680587+alsrua7222@users.noreply.github.com>
Date: Fri, 7 Jan 2022 01:58:24 +0900
Subject: [PATCH 1/2] solution work for FreeProxy

---
 .../requests/parsers/FreeProxyParser.py          |  2 +-
 .../requests/parsers/PremProxyParser.py          | 16 ++++++++++++----
 .../requests/parsers/js/UnPacker.py              |  4 ++--
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/http_request_randomizer/requests/parsers/FreeProxyParser.py b/http_request_randomizer/requests/parsers/FreeProxyParser.py
index 112a26d..5ae167b 100644
--- a/http_request_randomizer/requests/parsers/FreeProxyParser.py
+++ b/http_request_randomizer/requests/parsers/FreeProxyParser.py
@@ -24,7 +24,7 @@ def parse_proxyList(self):
 
             content = response.content
             soup = BeautifulSoup(content, "html.parser")
-            table = soup.find("table", attrs={"id": "proxylisttable"})
+            table = soup.find("table", attrs={"class": "table table-striped table-bordered"})
 
             # The first tr contains the field names.
             headings = [th.get_text() for th in table.find("tr").find_all("th")]
diff --git a/http_request_randomizer/requests/parsers/PremProxyParser.py b/http_request_randomizer/requests/parsers/PremProxyParser.py
index 0fed99f..5a3379c 100644
--- a/http_request_randomizer/requests/parsers/PremProxyParser.py
+++ b/http_request_randomizer/requests/parsers/PremProxyParser.py
@@ -7,6 +7,8 @@
 from http_request_randomizer.requests.parsers.UrlParser import UrlParser
 from http_request_randomizer.requests.proxy.ProxyObject import ProxyObject, AnonymityLevel, Protocol
 
+from http_request_randomizer.requests.useragent.userAgent import UserAgentManager
+
 logger = logging.getLogger(__name__)
 __author__ = 'pgaref'
 
@@ -18,6 +20,12 @@ def __init__(self, id, web_url, timeout=None):
         web_url += "/list/"
         # Ports decoded by the JS unpacker
         self.js_unpacker = None
+        self.useragent = UserAgentManager()
+        self.headers = {
+            "User-Agent": self.useragent.get_random_user_agent(),
+            "Origin": self.base_url,
+            "Referer": self.base_url
+        }
         UrlParser.__init__(self, id=id, web_url=web_url, timeout=timeout)
 
     def parse_proxyList(self):
@@ -31,7 +39,7 @@ def parse_proxyList(self):
             self.js_unpacker = self.init_js_unpacker()
 
             for page in page_set:
-                response = requests.get("{0}{1}".format(self.get_url(), page), timeout=self.timeout)
+                response = requests.get("{0}{1}".format(self.get_url(), page), timeout=self.timeout, headers=self.headers)
                 if not response.ok:
                     # Could not parse ANY page - Let user know
                     if not curr_proxy_list:
@@ -65,7 +73,7 @@ def parse_proxyList(self):
             return curr_proxy_list
 
     def get_pagination_set(self):
-        response = requests.get(self.get_url(), timeout=self.timeout)
+        response = requests.get(self.get_url(), timeout=self.timeout, headers=self.headers)
         page_set = set()
         # Could not parse pagination page - Let user know
         if not response.ok:
@@ -84,7 +92,7 @@ def get_pagination_set(self):
         return page_set
 
     def init_js_unpacker(self):
-        response = requests.get(self.get_url(), timeout=self.timeout)
+        response = requests.get(self.get_url(), timeout=self.timeout, headers=self.headers)
         # Could not parse provider page - Let user know
         if not response.ok:
             logger.warning("Proxy Provider url failed: {}".format(self.get_url()))
@@ -96,7 +104,7 @@ def init_js_unpacker(self):
         for script in soup.findAll('script'):
             if '/js/' in script.get('src'):
                 jsUrl = self.base_url + script.get('src')
-                return JsUnPacker(jsUrl)
+                return JsUnPacker(jsUrl, headers=self.headers)
         return None
 
     def create_proxy_object(self, row, port):
diff --git a/http_request_randomizer/requests/parsers/js/UnPacker.py b/http_request_randomizer/requests/parsers/js/UnPacker.py
index 2383362..947b920 100644
--- a/http_request_randomizer/requests/parsers/js/UnPacker.py
+++ b/http_request_randomizer/requests/parsers/js/UnPacker.py
@@ -14,9 +14,9 @@ class JsUnPacker(object):
     """
     # TODO: it might not be necessary to unpack the js code
 
-    def __init__(self, js_file_url):
+    def __init__(self, js_file_url, headers=None):
         logger.info("JS UnPacker init path: {}".format(js_file_url))
-        r = requests.get(js_file_url)
+        r = requests.get(js_file_url, headers=headers)
         encrypted = r.text.strip()
         encrypted = '(' + encrypted.split('}(')[1][:-1]
         unpacked = eval('self.unpack' +encrypted) # string of the js code in unpacked form

From 9ab14148becf58e39292e479629ef08a265bd6a3 Mon Sep 17 00:00:00 2001
From: alsrua7222 <59680587+alsrua7222@users.noreply.github.com>
Date: Wed, 12 Jan 2022 23:38:45 +0900
Subject: [PATCH 2/2] test_freeProxy html update

---
 tests/mocks.py | 134 +++++++++++++++++++++++--------------------------
 1 file changed, 63 insertions(+), 71 deletions(-)

diff --git a/tests/mocks.py b/tests/mocks.py
index 78320bd..bff3d5f 100644
--- a/tests/mocks.py
+++ b/tests/mocks.py
@@ -70,77 +70,69 @@ def sslproxy_mock(url, request):
 
 @urlmatch(netloc=r'(.*\.)?free-proxy-list\.net$')
 def free_proxy_mock(url, request):
-    return """<table border="0" cellpadding="0" cellspacing="0" id="proxylisttable"
-id="proxylisttable">\n
-<thead>\n
-<tr>\n
-    <th>IP Address</th>
-    \n
-    <th>Port</th>
-    \n
-    <th>Code</th>
-    \n
-    <th>Country</th>
-    \n
-    <th>Anonymity</th>
-    \n
-    <th>Google</th>
-    \n
-    <th>Https</th>
-    \n
-    <th>Last Checked</th>
-    \n
-</tr>
-\n
-</thead>
-\n
-<tbody>
-<tr>
-    <td>138.197.136.46</td>
-    <td>3128</td>
-    <td>CA</td>
-    <td>Canada</td>
-    <td>anonymous</td>
-    <td>no</td>
-    <td>no</td>
-    <td>7 seconds ago</td>
-</tr>
-\n
-<tr>
-    <td>177.207.75.227</td>
-    <td>8080</td>
-    <td>BR</td>
-    <td>Brazil</td>
-    <td>transparent</td>
-    <td>no</td>
-    <td>no</td>
-    <td>2 hours 21 minutes ago</td>
-</tr>
-\n
-</tbody>
-\n
-<tfoot>\n
-<tr>\n
-    <th class="input"><input type="text"/></th>
-    \n
-    <th></th>
-    \n
-    <th></th>
-    \n
-    <th></th>
-    \n
-    <th></th>
-    \n
-    <th></th>
-    \n
-    <th></th>
-    \n
-    <th></th>
-    \n
-</tr>
-\n
-</tfoot>
-\n
+    return """<table class="table table-striped table-bordered">
+    <thead>
+        <tr>
+            <th>IP Address</th>
+            <th>Port</th>
+            <th>Code</th>
+            <th class="hm">Country</th>
+            <th>Anonymity</th>
+            <th class="hm">Google</th>
+            <th class="hx">Https</th>
+            <th class="hm">Last Checked</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td>58.234.116.197</td>
+            <td>8193</td>
+            <td>KR</td>
+            <td class="hm">Korea</td>
+            <td>anonymous</td>
+            <td class="hm">yes</td>
+            <td class="hx">no</td>
+            <td class="hm">1 min ago</td>
+        </tr>
+        <tr>
+            <td>20.122.24.225</td>
+            <td>80</td>
+            <td>US</td>
+            <td class="hm">United States</td>
+            <td>anonymous</td>
+            <td class="hm">yes</td>
+            <td class="hx">no</td>
+            <td class="hm">1 min ago</td>
+            </tr>
+        <tr>
+            <td>154.236.177.100</td>
+            <td>1981</td><td>EG</td>
+            <td class="hm">Egypt</td>
+            <td>elite proxy</td>
+            <td class="hm">yes</td>
+            <td class="hx">yes</td>
+            <td class="hm">1 min ago</td>
+        </tr>
+        <tr>
+            <td>54.37.160.92</td>
+            <td>1080</td><td>FR</td>
+            <td class="hm">France</td>
+            <td>elite proxy</td>
+            <td class="hm">no</td>
+            <td class="hx">yes</td>
+            <td class="hm">1 min ago</td>
+        </tr>
+        <tr>
+            <td>110.232.78.55</td>
+            <td>55667</td>
+            <td>ID</td>
+            <td class="hm">Indonesia</td>
+            <td>anonymous</td>
+            <td class="hm">no</td>
+            <td class="hx">yes</td>
+            <td class="hm">1 min ago</td>
+        </tr>
+    </tbody>
 </table>"""