Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions Example/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


my_downloader = simp.Downloader()
my_downloader.search_urls('Landsapes',limit=10, verbose=True)
my_downloader.search_urls('Landscapes',limit=10, verbose=True)

# Get List of Saved URLs in cache
print(my_downloader.get_urls())
Expand All @@ -13,15 +13,21 @@
# Download + search file
my_downloader.download('spaceship', limit=2)

# Now donwload all the Searched picture
# Now download all the Searched picture
my_downloader.download(download_cache=True)

# Flush cache
my_downloader.flush_cache()

# Change Direcotory
# Change Directory
my_downloader.directory = 'my_dir/'
# Change File extension type
my_downloader.extensions = '.jpg'
print(my_downloader.extensions)
my_downloader.download('laptop', limit=10, verbose=True)


# Flush cache
my_downloader.flush_cache()
# Example with Google filters
my_downloader.download('space', limit=10, verbose=True, filters={'size': 'l', 'specific_color': 'orange'})
87 changes: 76 additions & 11 deletions simple_image_download/simple_image_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import functools
import time


################
# ---> CONSTANTS
################
Expand All @@ -18,20 +17,80 @@
GOOGLE_PICTURE_ID = '''&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'''
HEADERS = {
'User-Agent':
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
SCANNER_COUNTER = None

VALID_FILTERS = {
'size': ['l', 'm', 'i'], # large, medium, icon
'color': ['gray', 'trans'], # black and white, transparent
'specific_color': ['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown'],
'type': ['clipart', 'lineart', 'animated'],
'time': ['d', 'w', 'm', 'y'], # last 24 hours, last week, last month, last year
'usage_rights': ['cl', 'ol'] # Creative Commons License, Comercial and other Licenses
}


def generate_search_url(keywords):
keywords_to_search = [str(item).strip() for item in keywords.split(',')][0].split()
keywords_to_search = keywords.split(',')
keywords_count = len(keywords_to_search)
return keywords_to_search, keywords_count


def generate_urls(search):
"""Generates a URLS in the correct format that brings to Google Image seearch page"""
return [(BASE_URL+quote(word)+GOOGLE_PICTURE_ID) for word in search]
def check_if_filters_are_valid(filters):
result = ""
is_valid = True
if 'color' in filters.keys() and 'specific_color' in filters.keys():
is_valid = False
result = "Cannot have both 'color' and 'specific_color' keys at the same time."
else:
for key, value in filters.items():
if key in VALID_FILTERS.keys():
if value not in VALID_FILTERS[key]:
result = "Filter value '{0}' for key '{1}' is not valid. Valid values are: {2}".format(value, key,
VALID_FILTERS[
key])
is_valid = False
break
else:
result = "Filter key '{0}' not valid. Valid keys are: {1}".format(quote(key), list(VALID_FILTERS.keys()))
is_valid = False
break
return is_valid, result


def generate_filters_string(filters):
is_valid_filters, filter_error = check_if_filters_are_valid(filters)
if is_valid_filters:
filter_str = "&tbs="
keys_list = list(filters)
for i in range(len(filters.items())):
key = keys_list[i]
value = filters[key]
if key == 'size':
filter_str += "isz:" + value
elif key == 'color':
filter_str += "ic:" + value
elif key == 'specific_color':
filter_str += "ic:specific%2Cisc:" + value
elif key == 'type':
filter_str += "itp:" + value
elif key == 'time':
filter_str += "qdr:" + value
elif key == 'usage_rights':
filter_str += "il:" + value

if i + 1 < len(filters.items()):
filter_str += "%2C"
return filter_str
else:
print(filter_error)
return ""


def generate_urls(search, filters):
"""Generates a URLS in the correct format that brings to Google Image search page"""
return [(BASE_URL + quote(word) + filters + GOOGLE_PICTURE_ID) for word in search]


def check_webpage(url):
Expand Down Expand Up @@ -74,6 +133,7 @@ class Downloader:
Main Downloader
::param extension:iterable of Files extensions
"""

def __init__(self, extensions=None):
if extensions:
self._extensions = set(*[extensions])
Expand Down Expand Up @@ -103,7 +163,6 @@ def extensions(self):
def extensions(self, value):
self._extensions = set([value])


def get_urls(self):
return [self._cached_urls[url][1].url
for url in self._cached_urls]
Expand All @@ -114,10 +173,13 @@ def _download_page(self, url):
resp_data = str(resp.read())
return resp_data

def search_urls(self, keywords, limit=1, verbose=False, cache=True, timer=None):
def search_urls(self, keywords, limit=10, verbose=False, cache=True, timer=None, filters={}):
cache_out = {}
search, count = generate_search_url(keywords)
urls_ = generate_urls(search)
filters_ = generate_filters_string(filters)
if filters is not {} and filters_ == "":
return
urls_ = generate_urls(search, filters_)
timer = timer if timer else 1000
max_progressbar = count * (list(range(limit+1))[-1]+1)
bar = progressbar.ProgressBar(maxval=max_progressbar,
Expand Down Expand Up @@ -146,16 +208,19 @@ def search_urls(self, keywords, limit=1, verbose=False, cache=True, timer=None):
self._cached_urls = cache_out
if not cache_out:
print('==='*15 + ' < ' + 'NO PICTURES FOUND' + ' > ' + '==='*15)

return cache_out

def download(self, keywords=None, limit=1, verbose=False, cache=True, download_cache=False, timer=None):

def download(self, keywords=None, limit=1, verbose=False, cache=True, download_cache=False, timer=None, filters={}):
if not download_cache:
content = self.search_urls(keywords, limit, verbose, cache, timer)
content = self.search_urls(keywords, limit, verbose, cache, timer, filters)
else:
content = self._cached_urls
if not content:
print('Downloader has not URLs saved in Memory yet, run Downloader.search_urls to find pics first')
for name, (path, url) in content.items():
name = name.replace(" ", "_")
with open(os.path.join(path, name), 'wb') as file:
file.write(url.content)
if verbose:
Expand Down