Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ Python package to generate a random proxy on the fly!
- Fetch elite / transparent / anonymous proxies respectively.
- Fetch directly from [free-proxy-list](https://free-proxy-list.net).
- For better response time, fetch from an elasticsearch `cache_server`.
- `cache_server` is updated via routines described [here](./random_proxies/cache_server/README.md)
- `cache_server` is updated via routines described [here](./random_proxies/cache/README.md)


## Example usage
<!-- ```bash
pip install random_proxies

```bash
pip install random-proxies
```
or -->
or
```bash
$ git clone https://github.com/2knal/random_proxies.git`
$ cd random_proxies/
Expand All @@ -40,14 +41,16 @@ Open python interpreter. (Supports version 3.7+)
'23.101.2.247:81'
```

Refer more examples [here](./examples/)
Refer more examples [here](./examples/example.py)

## TODO
- [ ] Publish package version 0.0.1
- [ ] Port to MongoDB

- [x] Port to MongoDB
- [x] Publish package version 0.0.2
- [ ] Return meta data, response structure found [here](./random_proxies/cache/README.md)
- [ ] Scrape proxies from other sources
- [ ] Add support for SOCKS version 5
- [ ] Implement REST API to allow other languages to interface with it
- [x] Implement REST API to allow other languages to interface with it
- [ ] Setup documentation page
- [ ] Add unit tests

Expand Down
8 changes: 4 additions & 4 deletions examples/example.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
'''
"""
(Once the package is published)
pip install random_proxies
or
pip install random-proxies
or
Follow example usage to import the package
'''
"""

from random_proxies import random_proxy

Expand Down
1 change: 1 addition & 0 deletions random_proxies/cache/.env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MONGO_URI=<ADD_MONGO_CONN_URL_HERE>
15 changes: 15 additions & 0 deletions random_proxies/cache/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# For cache server setup

FROM python:3.6-alpine

RUN mkdir /app
WORKDIR /app

COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt

COPY . /app

EXPOSE 5000

CMD ["python", "-m", "random_proxies.cache.app"]
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
### Proxy structure

##### HTTP / HTTPS Proxy

```json
{
"ip address": "185.140.234.18",
Expand All @@ -24,6 +25,7 @@
```

##### SOCKS Proxy

```json
{
"ip address": "185.140.234.18",
Expand All @@ -36,10 +38,26 @@
"last checked": "5 minutes ago"
}
```

##### Improved response structure

```json
{
"ip": "185.140.234.18:8080",
"meta": {
"code": "ir",
"country": "iran",
"anonymity": "transparent",
"version": "socks4",
"https": "no"
}
}
```

### Procedures to run

> Note: Adding cronjobs for below routines.

- `routine.py`: Run after every 2 hours, every day
- `update.py`: Run after every 6 hours, every day
- `clean.py`: Run every day at 12 am
- `main/routine.py`: Run after every 2 hours, every day
- `main/update.py`: Run after every 6 hours, every day
- `main/clean.py`: Run every day at 12 am
22 changes: 22 additions & 0 deletions random_proxies/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

import os
from os.path import join, dirname
from dotenv import load_dotenv

from pymongo import MongoClient

env_path = join(dirname(__file__), '.env')
load_dotenv(env_path)

uri = os.environ.get('MONGO_URI')

conn = MongoClient(uri)
db = conn['random_proxies']

# Remove circular import
from random_proxies.proxies.log import logger
from random_proxies.proxies.proxy_health import is_good_proxy
from random_proxies.proxies.settings import BASE_URL, SOCKS_URL, SSL_URL
from random_proxies.proxies.utils import fetch, parse_response
59 changes: 59 additions & 0 deletions random_proxies/cache/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

from random_proxies.cache import db
from random_proxies.cache import logger

import os
from os.path import join, dirname
from random import choice
from flask import Flask, request, jsonify
from markdown import markdown

app = Flask(__name__)


@app.route('/')
def index():
try:
with open(join(dirname(__file__), 'README.md')) as f:
markdown_file = f.read()
return markdown(markdown_file)
except:
return "It works"


@app.route('/fetch', methods=['GET'])
def fetch():
conditions = request.args
proxies_collection = db['proxies']
recents_collection = db['recents']

# Fetch from proxies
proxies = proxies_collection.find(conditions)
if proxies.count() == 0:
return jsonify({'success': 'no'})

# Randomly select it
proxies = list(proxies)
proxy = choice(proxies)
ip = proxy['_id']

try:
# Remove it from proxies index
proxies_collection.delete_one({'_id': ip})

# Add it to recents index
recents_collection.insert_one(proxy)

return jsonify({'ip': ip, 'success': 'yes'})

except Exception as e:
template = 'An exception of type {0} occurred.\nArguments: {1!r}'
message = template.format(type(e).__name__, e.args)
logger.error(message)
return None


if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)
Original file line number Diff line number Diff line change
@@ -1,44 +1,45 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

from elasticsearch import helpers
# Will run every day at 12 am to check for working proxies

from time import time

from random_proxies.cache_server.config import es
from random_proxies.cache_server.config import is_good_proxy
from random_proxies.cache_server.config import logger
from random_proxies.cache_server.utils import add
from random_proxies.cache import db
from random_proxies.cache import is_good_proxy
from random_proxies.cache import logger


def _clean():
# Get all the proxies from proxies index
data = es.search(index='proxies', doc_type='proxy', body={'size': })
proxies = data['hits']['hits']
# Get all the proxies from proxies collection
collection = db['proxies']
proxies = collection.find({})

# Delete those which arent good
for proxy in proxies:
ip = proxy['ip address'] + ':' + proxy['port']
ip = proxy['_id']
protocol = ('http', 'https')[proxy['https'] == 'yes']

# Implies SOCKS proxy
if 'version' in proxy:
ip = proxy['version'] + '://' + ip
protocol = 'http'
try:
# If it doesn't work
if not is_good_proxy(ip, protocol=protocol):
# Delete from proxies index
es.delete(index='proxies', doc_type='proxy', id=ip)
# Delete from proxies collection
collection.delete_one({'_id': ip})

except Exception as e:
# Delete from proxies index
es.delete(index='proxies', doc_type='proxy', id=ip)
# Delete from proxies collection
collection.delete_one({'_id': ip})
template = 'An exception of type {0} occurred.\nArguments: {1!r}'
message = template.format(type(e).__name__, e.args)
logger.error(message)


if __name__ == '__main__':
tic = time()
_clean()
tac = time()
print('Total time: [clean]', tac - tic)
print('Total time: [clean]', tac - tic)
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

from elasticsearch import helpers
# Will run after every 2 hours to add new proxies to 'proxies' collection

from time import time

from random_proxies.cache_server.config import es
from random_proxies.cache_server.config import fetch, parse_response
from random_proxies.cache_server.config import is_good_proxy
from random_proxies.cache_server.config import logger
from random_proxies.cache_server.utils import add
from random_proxies.cache_server.config import BASE_URL, SSL_URL, SOCKS_URL
from random_proxies.cache import db
from random_proxies.cache import fetch, parse_response
from random_proxies.cache import is_good_proxy
from random_proxies.cache import logger
from random_proxies.cache import BASE_URL, SSL_URL, SOCKS_URL


def _check():
urls = [BASE_URL, SSL_URL, SOCKS_URL]
Expand All @@ -19,13 +19,20 @@ def _check():
# Fetch all the proxies from these urls
for url in urls:
res = fetch(url)
# Passing empty conditions so that
# Passing empty conditions so that all proxies will be fetched
proxies.extend(parse_response(res, {}))

count = 0

# proxies collection
proxies_collection = db['proxies']

# Check if they work
working_proxies = []
for proxy in proxies:
ip = proxy['ip address'] + ':' + proxy['port']

# Adding _id to proxy document
proxy['_id'] = ip
protocol = ('http', 'https')[proxy['https'] == 'yes']

# Implies SOCKS proxy
Expand All @@ -36,17 +43,17 @@ def _check():
try:
# Only if it works
if is_good_proxy(ip, protocol=protocol):
working_proxies.append(proxy)
# Add it to proxies collection
proxies_collection.insert_one(proxy)

except Exception as e:
template = 'An exception of type {0} occurred.\nArguments: {1!r}'
message = template.format(type(e).__name__, e.args)
logger.error(message)

return working_proxies


if __name__ == '__main__':
tic = time()
proxies = _check()
add(proxies, 'proxies')
_check()
tac = time()
print('Total time: [routine]', tac - tic)
print('Total time: [routine]', tac - tic)
50 changes: 50 additions & 0 deletions random_proxies/cache/main/update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

# Will run after every 4 hours to update 'recents' collection

from time import time

from random_proxies.cache import db
from random_proxies.cache import is_good_proxy
from random_proxies.cache import logger


def _check():
recents_collection = db['recents']
proxies_collection = db['proxies']

# Check if proxies are working in recents index
recents = db.collection.find({})
for proxy in recents:

ip = proxy['_id']
protocol = ('http', 'https')[proxy['https'] == 'yes']

# Implies SOCKS proxy
if 'version' in proxy:
ip = proxy['version'] + '://' + ip
protocol = 'http'

try:
# Only if it works
if is_good_proxy(ip, protocol=protocol):
# Delete from recents
recents_collection.delete_one({'_id': ip})

# Add them to proxies
proxies_collection.insert_one(proxy)

except Exception as e:
# Delete from recents
recents_collection.delete_one({'_id': ip})
template = 'An exception of type {0} occurred.\nArguments: {1!r}'
message = template.format(type(e).__name__, e.args)
logger.error(message)


if __name__ == '__main__':
tic = time()
_check()
tac = time()
print('Total time: [update]', tac - tic)
1 change: 0 additions & 1 deletion random_proxies/cache_server/.env.sample

This file was deleted.

Loading