diff --git a/README.md b/README.md index 19af0ed..622a2ab 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,15 @@ Python package to generate a random proxy on the fly! - Fetch elite / transparent / anonymous proxies respectively. - Fetch directly from [free-proxy-list](https://free-proxy-list.net). - For better response time, fetch from an elasticsearch `cache_server`. -- `cache_server` is updated via routines described [here](./random_proxies/cache_server/README.md) +- `cache_server` is updated via routines described [here](./random_proxies/cache/README.md) ## Example usage - +or ```bash $ git clone https://github.com/2knal/random_proxies.git` $ cd random_proxies/ @@ -40,14 +41,16 @@ Open python interpreter. (Supports version 3.7+) '23.101.2.247:81' ``` -Refer more examples [here](./examples/) +Refer more examples [here](./examples/example.py) ## TODO -- [ ] Publish package version 0.0.1 -- [ ] Port to MongoDB + +- [x] Port to MongoDB +- [x] Publish package version 0.0.2 +- [ ] Return meta data, response structure found [here](./random_proxies/cache/README.md) - [ ] Scrape proxies from other sources - [ ] Add support for SOCKS version 5 -- [ ] Implement REST API to allow other languages to interface with it +- [x] Implement REST API to allow other languages to interface with it - [ ] Setup documentation page - [ ] Add unit tests diff --git a/examples/example.py b/examples/example.py index 104bb3f..c282796 100644 --- a/examples/example.py +++ b/examples/example.py @@ -1,9 +1,9 @@ -''' +""" (Once the package is published) -pip install random_proxies - or +pip install random-proxies + or Follow example usage to import the package -''' +""" from random_proxies import random_proxy diff --git a/random_proxies/cache/.env.sample b/random_proxies/cache/.env.sample new file mode 100644 index 0000000..77466a6 --- /dev/null +++ b/random_proxies/cache/.env.sample @@ -0,0 +1 @@ +MONGO_URI= \ No newline at end of file diff --git a/random_proxies/cache/Dockerfile b/random_proxies/cache/Dockerfile new file mode 100644 index 0000000..f66e63c --- /dev/null +++ b/random_proxies/cache/Dockerfile @@ -0,0 +1,15 @@ +# For cache server setup + +FROM python:3.6-alpine + +RUN mkdir /app +WORKDIR /app + +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt + +COPY . /app + +EXPOSE 5000 + +CMD ["python", "-m", "random_proxies.cache.app"] \ No newline at end of file diff --git a/random_proxies/cache_server/README.md b/random_proxies/cache/README.md similarity index 67% rename from random_proxies/cache_server/README.md rename to random_proxies/cache/README.md index 51727c3..c21fe9e 100644 --- a/random_proxies/cache_server/README.md +++ b/random_proxies/cache/README.md @@ -10,6 +10,7 @@ ### Proxy structure ##### HTTP / HTTPS Proxy + ```json { "ip address": "185.140.234.18", @@ -24,6 +25,7 @@ ``` ##### SOCKS Proxy + ```json { "ip address": "185.140.234.18", @@ -36,10 +38,26 @@ "last checked": "5 minutes ago" } ``` + +##### Improved response structure + +```json +{ + "ip": "185.140.234.18:8080", + "meta": { + "code": "ir", + "country": "iran", + "anonymity": "transparent", + "version": "socks4", + "https": "no" + } +} +``` + ### Procedures to run > Note: Adding cronjobs for below routines. -- `routine.py`: Run after every 2 hours, every day -- `update.py`: Run after every 6 hours, every day -- `clean.py`: Run every day at 12 am +- `main/routine.py`: Run after every 2 hours, every day +- `main/update.py`: Run after every 6 hours, every day +- `main/clean.py`: Run every day at 12 am diff --git a/random_proxies/cache/__init__.py b/random_proxies/cache/__init__.py new file mode 100644 index 0000000..2b9f6fa --- /dev/null +++ b/random_proxies/cache/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +import os +from os.path import join, dirname +from dotenv import load_dotenv + +from pymongo import MongoClient + +env_path = join(dirname(__file__), '.env') +load_dotenv(env_path) + +uri = os.environ.get('MONGO_URI') + +conn = MongoClient(uri) +db = conn['random_proxies'] + +# Remove circular import +from random_proxies.proxies.log import logger +from random_proxies.proxies.proxy_health import is_good_proxy +from random_proxies.proxies.settings import BASE_URL, SOCKS_URL, SSL_URL +from random_proxies.proxies.utils import fetch, parse_response diff --git a/random_proxies/cache/app.py b/random_proxies/cache/app.py new file mode 100644 index 0000000..4fcbed2 --- /dev/null +++ b/random_proxies/cache/app.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +from random_proxies.cache import db +from random_proxies.cache import logger + +import os +from os.path import join, dirname +from random import choice +from flask import Flask, request, jsonify +from markdown import markdown + +app = Flask(__name__) + + +@app.route('/') +def index(): + try: + with open(join(dirname(__file__), 'README.md')) as f: + markdown_file = f.read() + return markdown(markdown_file) + except: + return "It works" + + +@app.route('/fetch', methods=['GET']) +def fetch(): + conditions = request.args + proxies_collection = db['proxies'] + recents_collection = db['recents'] + + # Fetch from proxies + proxies = proxies_collection.find(conditions) + if proxies.count() == 0: + return jsonify({'success': 'no'}) + + # Randomly select it + proxies = list(proxies) + proxy = choice(proxies) + ip = proxy['_id'] + + try: + # Remove it from proxies index + proxies_collection.delete_one({'_id': ip}) + + # Add it to recents index + recents_collection.insert_one(proxy) + + return jsonify({'ip': ip, 'success': 'yes'}) + + except Exception as e: + template = 'An exception of type {0} occurred.\nArguments: {1!r}' + message = template.format(type(e).__name__, e.args) + logger.error(message) + return None + + +if __name__ == '__main__': + app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/random_proxies/cache_server/__init__.py b/random_proxies/cache/main/__init__.py similarity index 100% rename from random_proxies/cache_server/__init__.py rename to random_proxies/cache/main/__init__.py diff --git a/random_proxies/cache_server/clean.py b/random_proxies/cache/main/clean.py similarity index 50% rename from random_proxies/cache_server/clean.py rename to random_proxies/cache/main/clean.py index c0ce913..8b30445 100644 --- a/random_proxies/cache_server/clean.py +++ b/random_proxies/cache/main/clean.py @@ -1,25 +1,25 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals -from elasticsearch import helpers +# Will run every day at 12 am to check for working proxies from time import time -from random_proxies.cache_server.config import es -from random_proxies.cache_server.config import is_good_proxy -from random_proxies.cache_server.config import logger -from random_proxies.cache_server.utils import add +from random_proxies.cache import db +from random_proxies.cache import is_good_proxy +from random_proxies.cache import logger + def _clean(): - # Get all the proxies from proxies index - data = es.search(index='proxies', doc_type='proxy', body={'size': }) - proxies = data['hits']['hits'] - + # Get all the proxies from proxies collection + collection = db['proxies'] + proxies = collection.find({}) + # Delete those which arent good for proxy in proxies: - ip = proxy['ip address'] + ':' + proxy['port'] + ip = proxy['_id'] protocol = ('http', 'https')[proxy['https'] == 'yes'] - + # Implies SOCKS proxy if 'version' in proxy: ip = proxy['version'] + '://' + ip @@ -27,18 +27,19 @@ def _clean(): try: # If it doesn't work if not is_good_proxy(ip, protocol=protocol): - # Delete from proxies index - es.delete(index='proxies', doc_type='proxy', id=ip) - + # Delete from proxies collection + collection.delete_one({'_id': ip}) + except Exception as e: - # Delete from proxies index - es.delete(index='proxies', doc_type='proxy', id=ip) + # Delete from proxies collection + collection.delete_one({'_id': ip}) template = 'An exception of type {0} occurred.\nArguments: {1!r}' message = template.format(type(e).__name__, e.args) logger.error(message) + if __name__ == '__main__': tic = time() _clean() tac = time() - print('Total time: [clean]', tac - tic) \ No newline at end of file + print('Total time: [clean]', tac - tic) diff --git a/random_proxies/cache_server/routine.py b/random_proxies/cache/main/routine.py similarity index 58% rename from random_proxies/cache_server/routine.py rename to random_proxies/cache/main/routine.py index 5594122..a8e746b 100644 --- a/random_proxies/cache_server/routine.py +++ b/random_proxies/cache/main/routine.py @@ -1,16 +1,16 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals -from elasticsearch import helpers +# Will run after every 2 hours to add new proxies to 'proxies' collection from time import time -from random_proxies.cache_server.config import es -from random_proxies.cache_server.config import fetch, parse_response -from random_proxies.cache_server.config import is_good_proxy -from random_proxies.cache_server.config import logger -from random_proxies.cache_server.utils import add -from random_proxies.cache_server.config import BASE_URL, SSL_URL, SOCKS_URL +from random_proxies.cache import db +from random_proxies.cache import fetch, parse_response +from random_proxies.cache import is_good_proxy +from random_proxies.cache import logger +from random_proxies.cache import BASE_URL, SSL_URL, SOCKS_URL + def _check(): urls = [BASE_URL, SSL_URL, SOCKS_URL] @@ -19,13 +19,20 @@ def _check(): # Fetch all the proxies from these urls for url in urls: res = fetch(url) - # Passing empty conditions so that + # Passing empty conditions so that all proxies will be fetched proxies.extend(parse_response(res, {})) + count = 0 + + # proxies collection + proxies_collection = db['proxies'] + # Check if they work - working_proxies = [] for proxy in proxies: ip = proxy['ip address'] + ':' + proxy['port'] + + # Adding _id to proxy document + proxy['_id'] = ip protocol = ('http', 'https')[proxy['https'] == 'yes'] # Implies SOCKS proxy @@ -36,17 +43,17 @@ def _check(): try: # Only if it works if is_good_proxy(ip, protocol=protocol): - working_proxies.append(proxy) + # Add it to proxies collection + proxies_collection.insert_one(proxy) + except Exception as e: template = 'An exception of type {0} occurred.\nArguments: {1!r}' message = template.format(type(e).__name__, e.args) logger.error(message) - - return working_proxies + if __name__ == '__main__': tic = time() - proxies = _check() - add(proxies, 'proxies') + _check() tac = time() - print('Total time: [routine]', tac - tic) \ No newline at end of file + print('Total time: [routine]', tac - tic) diff --git a/random_proxies/cache/main/update.py b/random_proxies/cache/main/update.py new file mode 100644 index 0000000..7a314a7 --- /dev/null +++ b/random_proxies/cache/main/update.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +# Will run after every 4 hours to update 'recents' collection + +from time import time + +from random_proxies.cache import db +from random_proxies.cache import is_good_proxy +from random_proxies.cache import logger + + +def _check(): + recents_collection = db['recents'] + proxies_collection = db['proxies'] + + # Check if proxies are working in recents index + recents = db.collection.find({}) + for proxy in recents: + + ip = proxy['_id'] + protocol = ('http', 'https')[proxy['https'] == 'yes'] + + # Implies SOCKS proxy + if 'version' in proxy: + ip = proxy['version'] + '://' + ip + protocol = 'http' + + try: + # Only if it works + if is_good_proxy(ip, protocol=protocol): + # Delete from recents + recents_collection.delete_one({'_id': ip}) + + # Add them to proxies + proxies_collection.insert_one(proxy) + + except Exception as e: + # Delete from recents + recents_collection.delete_one({'_id': ip}) + template = 'An exception of type {0} occurred.\nArguments: {1!r}' + message = template.format(type(e).__name__, e.args) + logger.error(message) + + +if __name__ == '__main__': + tic = time() + _check() + tac = time() + print('Total time: [update]', tac - tic) diff --git a/random_proxies/cache_server/.env.sample b/random_proxies/cache_server/.env.sample deleted file mode 100644 index 62924a0..0000000 --- a/random_proxies/cache_server/.env.sample +++ /dev/null @@ -1 +0,0 @@ -ELASTIC_PASSWORD= \ No newline at end of file diff --git a/random_proxies/cache_server/config.py b/random_proxies/cache_server/config.py deleted file mode 100644 index a40465a..0000000 --- a/random_proxies/cache_server/config.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, unicode_literals - -import os -from os.path import join, dirname -from dotenv import load_dotenv -from elasticsearch import Elasticsearch - -env_path = join(dirname(__file__), '.env') -load_dotenv(env_path) - -# GLOBALS -elastic_password = os.environ.get('ELASTIC_PASSWORD') -elastic_username = 'elastic' - -elastic_uri = 'http://localhost:9200' -MAX_SIZE = 10000 - -# Setting up conn -es = Elasticsearch([elastic_uri], http_auth=(elastic_username, elastic_password)) - -# Creating necessary index -if not es.indices.exists(index='proxies'): - es.indices.create(index='proxies', ignore=400) - -if not es.indices.exists(index='recents'): - es.indices.create(index='recents', ignore=400) - -# Removing circular import -from random_proxies.proxies.settings import BASE_URL, SSL_URL, SOCKS_URL -from random_proxies.proxies.log import logger -from random_proxies.proxies.utils import fetch, parse_response -from random_proxies.proxies.proxy_health import is_good_proxy diff --git a/random_proxies/cache_server/docker-compose.yml b/random_proxies/cache_server/docker-compose.yml deleted file mode 100644 index e3e802a..0000000 --- a/random_proxies/cache_server/docker-compose.yml +++ /dev/null @@ -1,27 +0,0 @@ -version: '3' - -services: - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:7.6.2 - container_name: cache_elasticsearch - environment: - - cluster.name=cache-server - - discovery.type=single-node - - xpack.security.enabled=true - - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} - - ES_JAVA_OPTS=-Xms512m -Xmx512m - volumes: - - data:/usr/share/elasticsearch/data - ports: - - 9200:9200 - networks: - - elastic - restart: always - -volumes: - data: - driver: local - -networks: - elastic: - driver: bridge diff --git a/random_proxies/cache_server/update.py b/random_proxies/cache_server/update.py deleted file mode 100644 index 6c5d370..0000000 --- a/random_proxies/cache_server/update.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, unicode_literals - -from elasticsearch import helpers - -from time import time - -from random_proxies.cache_server.config import es -from random_proxies.cache_server.config import fetch, parse_response -from random_proxies.cache_server.config import is_good_proxy -from random_proxies.cache_server.config import logger -from random_proxies.cache_server.utils import add -from random_proxies.cache_server.config import BASE_URL, SSL_URL, SOCKS_URL, MAX_SIZE - -def _check(): - - # Check if proxies are working in recents index - data = es.search(index='recents', doc_type='proxy', body={'size': }) - recents = data['hits']['hits'] - add_back_proxies = [] - if recents: - for proxy in recents: - proxy = proxy['_source'] - try: - # Only if it works - if is_good_proxy(ip, protocol=protocol): - add_back_proxies.append(proxy) - except Exception as e: - template = 'An exception of type {0} occurred.\nArguments: {1!r}' - message = template.format(type(e).__name__, e.args) - logger.error(message) - - # If yes, then add them back to proxies index - if add_back_proxies: - add(add_back_proxies, 'recents') - -if __name__ == '__main__': - tic = time() - _check() - tac = time() - print('Total time: [update]', tac - tic) \ No newline at end of file diff --git a/random_proxies/cache_server/utils.py b/random_proxies/cache_server/utils.py deleted file mode 100644 index c46f9fd..0000000 --- a/random_proxies/cache_server/utils.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import, unicode_literals - -from elasticsearch import helpers - -from time import time - -from random_proxies.cache_server.config import es -from random_proxies.cache_server.config import logger - -def add(proxies, index): - actions = [ - { - "_index": index, - "_type" : "proxy", - "_id" : proxy['ip address'] + ':' + proxy['port'], - "_source": proxy, - "op_type": "create" - } - for proxy in proxies - ] - if proxies: - try: - tic = time() - helpers.bulk(es, actions) - tac = time() - print('Time taken to add to index: ', tac - tic) - except Exception as e: - template = 'An exception of type {0} occurred.\nArguments: {1!r}' - message = template.format(type(e).__name__, e.args) - logger.error(message) \ No newline at end of file diff --git a/random_proxies/proxies/db.py b/random_proxies/proxies/db.py index 80a82e3..3fef3f8 100644 --- a/random_proxies/proxies/db.py +++ b/random_proxies/proxies/db.py @@ -1,34 +1,23 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals -from random import choice +import json +import requests -from random_proxies.cache_server.config import es -from random_proxies.cache_server.config import MAX_SIZE -from random_proxies.proxies.log import logger +from random_proxies.proxies.settings import CACHE_SERVER_URL from random_proxies.proxies.exception import NoSuchProxyError + def pop(conditions): - search_query = { - 'size': MAX_SIZE, - 'query': conditions - } + query_string = '?' - # Get proxies which satisfy given conditions - data = es.search(index='proxies', doc_type='proxy', body=search_query) - proxies = data['hits']['hits'] + for k, v in conditions.items(): + query_string += f'{k}={v}&' + url = CACHE_SERVER_URL + query_string[:-1] + data = requests.get(url).text + data = json.loads(data) - if len(proxies) == 0: + if data['success'] == 'yes': + return data['ip'] + else: raise NoSuchProxyError('No proxy satisfying given conditions.') - - # Randomly select it - proxy = choice(proxies) - ip = proxy['_id'] - - # Remove it from proxies index - es.delete(index='proxies', doc_type='proxy', id=ip) - - # Add it to recents index - es.index(index='recents', doc_type='proxy', id=ip, body=proxy['_source']) - - return ip diff --git a/random_proxies/proxies/exception.py b/random_proxies/proxies/exception.py index d35ab69..7cf9bdd 100644 --- a/random_proxies/proxies/exception.py +++ b/random_proxies/proxies/exception.py @@ -1,11 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals + class TimeoutError(Exception): pass + class NoSuchProxyError(Exception): pass + class CountryCodeError(Exception): pass diff --git a/random_proxies/proxies/log.py b/random_proxies/proxies/log.py index 0aa84b3..49d28f0 100644 --- a/random_proxies/proxies/log.py +++ b/random_proxies/proxies/log.py @@ -5,4 +5,4 @@ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.ERROR) -logger = logging.getLogger(__package__) \ No newline at end of file +logger = logging.getLogger(__package__) diff --git a/random_proxies/proxies/proxy.py b/random_proxies/proxies/proxy.py index 9857916..d5785ea 100644 --- a/random_proxies/proxies/proxy.py +++ b/random_proxies/proxies/proxy.py @@ -3,24 +3,26 @@ from random import choice -import random_proxies.proxies.settings as settings -from random_proxies.proxies.utils import fetch, parse_response +from random_proxies.proxies import settings +from random_proxies.proxies.utils import fetch, parse_response, country_to_code from random_proxies.proxies.exception import NoSuchProxyError from random_proxies.proxies.db import pop + def _select(proxies): if len(proxies) == 0: raise NoSuchProxyError('No proxy satisfying given conditions.') proxy = choice(proxies) - + return proxy['ip address'] + ':' + proxy['port'] + def random_proxy( - use_cache=True, - protocol='http', - standard='anonymous', - country=None, - code=None + use_cache=True, + protocol='http', + standard=None, + country=None, + code=None ): conditions = { 'country': country, @@ -35,7 +37,7 @@ def random_proxy( url = settings.SSL_URL elif protocol == 'socks': url = settings.SOCKS_URL - + if not use_cache: res = fetch(url) proxies = parse_response(res, conditions) @@ -43,13 +45,14 @@ def random_proxy( else: if protocol == 'socks': conditions['version'] = 'socks4' - query = { - 'bool': { - 'must': [ - { 'match': { k:v } } for k, v in conditions.items() if v != None - ] - } - } - + + # Removing None conditions + new_conditions = {k: v for k, v in conditions.items() if v is not None} + + # County-code matching + if code is not None and country is not None: + if country_to_code(country, code): + return pop(new_conditions) + # Fetch from db - return pop(query) + return pop(new_conditions) diff --git a/random_proxies/proxies/proxy_health.py b/random_proxies/proxies/proxy_health.py index d1d0ec0..36535a1 100644 --- a/random_proxies/proxies/proxy_health.py +++ b/random_proxies/proxies/proxy_health.py @@ -7,12 +7,13 @@ from random_proxies.proxies.utils import timeout from random_proxies.proxies.log import logger + @timeout(seconds=settings.HTTP_TIMEOUT) -def is_good_proxy(ip, protocol='http'): +def is_good_proxy(ip, protocol='http'): proxies = { 'http': 'http://' + ip, } - + if protocol == 'https': proxies['https'] = 'https://' + ip @@ -27,4 +28,3 @@ def is_good_proxy(ip, protocol='http'): message = template.format(type(e).__name__, e.args) logger.error(message) return False - diff --git a/random_proxies/proxies/settings.py b/random_proxies/proxies/settings.py index c97a378..10838ab 100644 --- a/random_proxies/proxies/settings.py +++ b/random_proxies/proxies/settings.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals -__version__ = '0.0.1' +__version__ = '0.0.2' BASE_URL = 'https://free-proxy-list.net/' SSL_URL = 'http://sslproxies.org/' @@ -13,3 +13,4 @@ HTTP_RETRIES = 3 LAST_CHECKED_THRESHOLD = 20 +CACHE_SERVER_URL = 'http://54.162.135.16:5000/fetch' diff --git a/random_proxies/proxies/utils.py b/random_proxies/proxies/utils.py index 07fb1e0..741d0df 100644 --- a/random_proxies/proxies/utils.py +++ b/random_proxies/proxies/utils.py @@ -10,25 +10,29 @@ import errno import os import signal -import json - +import json +from os.path import dirname, join from random_proxies.proxies.log import logger from random_proxies.proxies.exception import TimeoutError, CountryCodeError -import random_proxies.proxies.settings as settings +from random_proxies.proxies import settings + def country_to_code(country, code): mapper = {} - with open('c2c.json', 'r') as f: + path = join(dirname(__file__), 'c2c.json') + with open(path, 'r') as f: mapper = json.load(f) if mapper[country] == code: return True raise CountryCodeError('Country code does not match with the added country.') + # https://stackoverflow.com/questions/2281850/timeout-function-if-it-takes-too-long-to-finish def timeout(seconds=10, error_message=os.strerror(errno.ETIME)): def decorator(func): def _handle_timeout(signum, frame): raise TimeoutError(error_message) + def wrapper(*args, **kwargs): signal.signal(signal.SIGALRM, _handle_timeout) signal.alarm(seconds) @@ -37,14 +41,17 @@ def wrapper(*args, **kwargs): finally: signal.alarm(0) return result + return wraps(func)(wrapper) + return decorator + # https://www.peterbe.com/plog/best-practice-with-retries-with-requests def fetch(url=settings.BASE_URL, - suffix='', - backoff_factor=settings.HTTP_DELAY, - status_forcelist=(500, 502, 504)): + suffix='', + backoff_factor=settings.HTTP_DELAY, + status_forcelist=(500, 502, 504)): try: res = None with Session() as sess: @@ -67,6 +74,7 @@ def fetch(url=settings.BASE_URL, logger.error(message) return None + def parse_header(header): # Fetching field names fields = [] @@ -75,6 +83,7 @@ def parse_header(header): fields.append(name) return fields + # Do some condition checking here def parse_values(body, fields, conditions): rows = body.find_all('tr') @@ -96,14 +105,14 @@ def parse_values(body, fields, conditions): temp = field.split() if temp[1].startswith('second'): proxy[field] = value - + # Taking only proxies scanned before less than 20 minutes elif temp[1].startswith('minute') and int(temp[0]) < settings.LAST_CHECKED_THRESHOLD: proxy[field] = value elif field == 'country': # Check if code is added or not - if conditions.get('country')== None: + if conditions.get('country') is None: proxy[field] = value else: if value.startswith(conditions.get('country')): @@ -114,7 +123,7 @@ def parse_values(body, fields, conditions): elif field == 'code': # First code must match country if both are not none - if conditions['code'] == None: + if conditions.get('code') is None: proxy[field] = value else: if conditions.get('country') and country_to_code(conditions.get('country'), conditions.get('code')): @@ -122,8 +131,11 @@ def parse_values(body, fields, conditions): elif conditions.get('code'): proxy[field] = value - elif field == 'anonymity' and conditions.get('anonymity') == value: - proxy[field] = value + elif field == 'anonymity': + if conditions.get('anonymity') is None: + proxy[field] = value + elif conditions.get('anonymity') == value: + proxy[field] = value elif field in ['ip address', 'port', 'google', 'version']: proxy[field] = value @@ -131,11 +143,12 @@ def parse_values(body, fields, conditions): else: flag = True break - + if not flag: proxies.append(proxy) return proxies + # TODO # Add to db after giving the response # Use multiprocessing to do so diff --git a/requirements.txt b/requirements.txt index 0f4bee2..9bb0f0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,20 @@ beautifulsoup4==4.9.0 certifi==2020.4.5.1 chardet==3.0.4 -elasticsearch==7.6.0 +click==7.1.2 +dnspython==1.16.0 +Flask==1.1.2 idna==2.9 +itsdangerous==1.1.0 +Jinja2==2.11.2 lxml==4.5.0 +Markdown==3.2.1 +MarkupSafe==1.1.1 +pymongo==3.10.1 PySocks==1.7.1 python-dotenv==0.13.0 requests==2.23.0 soupsieve==2.0 urllib3==1.25.9 +Werkzeug==1.0.1 +flake8==3.8.3 \ No newline at end of file diff --git a/setup.py b/setup.py index 662f996..529b225 100644 --- a/setup.py +++ b/setup.py @@ -10,28 +10,27 @@ # This call to setup() does all the work setup( - name = "random_proxies", - version = __version__, - description = "Get a proxy server IP on the fly!", - long_description = README, - long_description_content_type = "text/markdown", - url = "https://github.com/2knal/random_proxies", - author = "Kunal Sonawane", - author_email = "kunal.sonawane@somaiya.edu", - license = "MIT", - classifiers = [ + name="random_proxies", + version=__version__, + description="Get a proxy server IP on the fly!", + long_description=README, + long_description_content_type="text/markdown", + url="https://github.com/2knal/random_proxies", + author="Kunal Sonawane", + author_email="kunal.sonawane@somaiya.edu", + license="MIT", + classifiers=[ "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", ], - packages = find_packages(), - include_package_data = True, - install_requires = [ - "beautifulsoup4==4.9.0", - "elasticsearch==7.6.0", - "lxml==4.5.0", - "PySocks==1.7.1", - "python-dotenv==0.13.0", - "requests==2.23.0" + packages=find_packages(), + include_package_data=True, + install_requires=[ + "beautifulsoup4>=4.9.0", + "lxml>=4.5.0", + "PySocks>=1.7.1", + "python-dotenv>=0.13.0", + "requests>=2.23.0" ] -) \ No newline at end of file +)