Skip to content

Commit

Permalink
Merge pull request #35 from arrrlo/feature/multithreading
Browse files Browse the repository at this point in the history
Feature/multithreading
  • Loading branch information
arrrlo authored Jun 13, 2019
2 parents 37e50d7 + 64e7012 commit bfa05d0
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 60 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## 1.0.0

### Added in 1.0.0
- multithreaded images downloading
- download progress bars
- external progress bar insertion

## 0.3.8

### Fixed in 0.3.8
Expand Down
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ _search_params = {
'fileType': 'jpg|gif|png',
'imgType': 'clipart|face|lineart|news|photo',
'imgSize': 'huge|icon|large|medium|small|xlarge|xxlarge',
'searchType': 'image',
'imgDominantColor': 'black|blue|brown|gray|green|pink|purple|teal|white|yellow'
}

Expand All @@ -104,6 +103,21 @@ for image in gis.results():
image.resize(500, 500)
```

## Inserting custom progressbar function

```python
from google_images_search import GoogleImagesSearch

def my_progressbar(url, progress):
print(url + ' ' + progress + '%')

gis = GoogleImagesSearch(
'your_dev_api_key', 'your_project_cx', progressbar_fn=my_progressbar
)

...
```

## Saving to a BytesIO object

```python
Expand Down
15 changes: 4 additions & 11 deletions google_images_search/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import click
from termcolor import cprint
from pyfiglet import figlet_format

from .fetch_resize_save import FetchResizeSave
from .google_api import GoogleBackendException
Expand All @@ -12,7 +10,9 @@
@click.option('-c', '--custom_search_cx', help='Custom Search CX')
def cli(ctx, developer_key, custom_search_cx):
ctx.obj = {
'object': FetchResizeSave(developer_key, custom_search_cx)
'object': FetchResizeSave(
developer_key, custom_search_cx, progress=True
)
}


Expand Down Expand Up @@ -57,14 +57,10 @@ def search(ctx, query, num, safe, filetype, imagetype,

click.clear()

cprint(figlet_format('Google Images Search', width=120), 'red')

click.echo('-'*120)

try:
ctx.obj['object'].search(search_params, download_path, width, height)

for _, image in enumerate(ctx.obj['object'].results()):
for image in ctx.obj['object'].results():
click.echo(image.url)
if image.path:
click.secho(image.path, fg='blue')
Expand All @@ -78,6 +74,3 @@ def search(ctx, query, num, safe, filetype, imagetype,
click.secho('Error occurred trying to fetch '
'images from Google. Please try again.', fg='red')
return

click.echo('-'*120)
click.echo()
149 changes: 115 additions & 34 deletions google_images_search/fetch_resize_save.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import shutil
import curses
import requests
import threading
from PIL import Image
from resizeimage import resizeimage

Expand All @@ -10,10 +11,31 @@
class FetchResizeSave(object):
"""Class with resizing and downloading logic"""

def __init__(self, developer_key, custom_search_cx):
self._google_custom_search = GoogleCustomSearch(developer_key,
custom_search_cx)
self._search_resut = []
def __init__(self, developer_key, custom_search_cx,
progressbar_fn=None, progress=False):

# initialise google api
self._google_custom_search = GoogleCustomSearch(
developer_key, custom_search_cx, self)

self._search_result = list()

self._stdscr = None
self._progress = False
self._chunk_sizes = dict()
self._terminal_lines = dict()
self._download_progress = dict()
self._report_progress = progressbar_fn

if progressbar_fn:
# user nserted progressbar fn
self._progress = True
else:
if progress:
# initialise internal progressbar
self._progress = True
self._stdscr = curses.initscr()
self._report_progress = self.__report_progress

def search(self, search_params, path_to_dir=False, width=None,
height=None, cache_discovery=True):
Expand All @@ -27,24 +49,70 @@ def search(self, search_params, path_to_dir=False, width=None,
:return: None
"""

for url in self._google_custom_search.search(search_params,
cache_discovery):
i = 0
threads = list()
for url in self._google_custom_search.search(
search_params, cache_discovery
):
# initialise image object
image = GSImage(self)
image.url = url

if path_to_dir:
image.download(path_to_dir)
if width and height:
image.resize(width, height)
# set thread safe variables
self._download_progress[url] = 0
self._terminal_lines[url] = i
i += 2

# set thread with function and arguments
thread = threading.Thread(
target=self._download_and_resize,
args=(path_to_dir, image, width, height)
)

# start thread
thread.start()

# register thread
threads.append(thread)

self._search_resut.append(image)
# wait for all threads to end here
for thread in threads:
thread.join()

if self._progress:
if self._stdscr:
curses.endwin()

def set_chunk_size(self, url, content_size):
"""Set images chunk size according to its size
:param url: image url
:param content_size: image size
:return: None
"""

self._chunk_sizes[url] = int(int(content_size) / 100) + 1

def _download_and_resize(self, path_to_dir, image, width, height):
"""Method used for threading
:param path_to_dir: path to download dir
:param image: image object
:param width: crop width
:param height: crop height
:return: None
"""

if path_to_dir:
image.download(path_to_dir)
if width and height:
image.resize(width, height)
self._search_result.append(image)

def results(self):
"""Returns objects of downloaded images
:return: list
"""

return self._search_resut
return self._search_result

def download(self, url, path_to_dir):
"""Downloads image from url to path dir
Expand All @@ -57,40 +125,37 @@ def download(self, url, path_to_dir):
if not os.path.exists(path_to_dir):
os.makedirs(path_to_dir)

raw_data = self.__class__.get_raw_data(url)

raw_filename = url.split('/')[-1].split('?')[0]
basename, ext = os.path.splitext(raw_filename)
filename = "".join(x for x in basename if x.isalnum()) + ext

path_to_image = os.path.join(path_to_dir, filename)

with open(path_to_image, 'wb') as f:
self.__class__.copy_to(raw_data, f)
with open(path_to_image, 'wb+') as f:
for chunk in self.get_raw_data(url):
f.write(chunk)

return path_to_image

@staticmethod
def get_raw_data(url):
"""Takes data from image url into a variable
def get_raw_data(self, url):
"""Generator method for downloading images in chunks
:param url: url to image
:return: raw image data
"""

req = requests.get(url, stream=True)
req.raw.decode_content = True
return req.raw
with requests.get(url, stream=True) as req:
for chunk in req.iter_content(chunk_size=self._chunk_sizes[url]):

@staticmethod
def copy_to(raw_data, obj):
"""
Copy raw image data to another object, preferably BytesIO
:param raw_data: raw image data
:param obj: BytesIO object
:return: None
"""
# filter out keep-alive new chunks
if chunk:

# report progress
if self._progress:
self._download_progress[url] += 1
if self._download_progress[url] <= 100:
self._report_progress(url, self._download_progress[url])

shutil.copyfileobj(raw_data, obj)
yield chunk

@staticmethod
def resize(path_to_image, width, height):
Expand All @@ -107,6 +172,22 @@ def resize(path_to_image, width, height):
img.save(path_to_image, img.format)
fd_img.close()

def __report_progress(self, url, progress):
"""Prints a progress bar in terminal
:param url:
:param progress:
:return:
"""

self._stdscr.addstr(
self._terminal_lines[url], 0, "Downloading file: {0}".format(url)
)
self._stdscr.addstr(
self._terminal_lines[url] + 1, 0,
"Progress: [{1:100}] {0}%".format(progress, "#" * progress)
)
self._stdscr.refresh()


class GSImage(object):
"""Class for handling one image"""
Expand Down Expand Up @@ -166,7 +247,7 @@ def get_raw_data(self):
:return: raw data
"""

return self._fetch_resize_save.__class__.get_raw_data(self._url)
return b''.join(list(self._fetch_resize_save.get_raw_data(self._url)))

def copy_to(self, obj, raw_data=None):
"""Copies raw image data to another object, preferably BytesIO
Expand All @@ -178,7 +259,7 @@ def copy_to(self, obj, raw_data=None):
if not raw_data:
raw_data = self.get_raw_data()

self._fetch_resize_save.__class__.copy_to(raw_data, obj)
obj.write(raw_data)

def resize(self, width, height):
"""Resize the image
Expand Down
34 changes: 27 additions & 7 deletions google_images_search/google_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ class GoogleCustomSearch(object):
"""Wrapper class for Google images search api"""

def __init__(self, developer_key=None,
custom_search_cx=None):
custom_search_cx=None,
fethch_resize_save=None):

self._developer_key = developer_key or \
os.environ.get('GCS_DEVELOPER_KEY')
self._custom_search_cx = custom_search_cx or \
os.environ.get('GCS_CX')

self._google_build = None
self._fethch_resize_save = fethch_resize_save

self._search_params_keys = {
'q': None,
Expand Down Expand Up @@ -54,8 +56,10 @@ def _search_params(self, params):
for key, value in self._search_params_keys.items():
params_value = params.get(key)
if params_value:
# take user defined param value if defined
search_params[key] = params_value
elif value:
# take default param value if defined
search_params[key] = value

return search_params
Expand All @@ -70,18 +74,34 @@ def search(self, params, cache_discovery=True):

search_params = self._search_params(params)

try:
res = self._query_google_api(search_params, cache_discovery)
except:
raise GoogleBackendException()
res = self._query_google_api(search_params, cache_discovery)

for image in res.get('items'):
try:
check = requests.get(image['link'], timeout=5)
if check.status_code == 200:
response = requests.head(image['link'], timeout=5)
content_length = response.headers.get('Content-Length')

# check if the url is valid
if response.status_code == 200 and \
'image' in response.headers['Content-Type'] and \
content_length:

# calculate download chunk size based on image size
self._fethch_resize_save.set_chunk_size(
image['link'], content_length
)

# if everything is ok, yield image url back
yield image['link']

else:
# validation failed, go with another image
continue

except requests.exceptions.ConnectTimeout:
pass
except requests.exceptions.SSLError:
pass


class GoogleBackendException(Exception):
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def readme():

setup(
name='Google Images Search',
version="0.3.8",
version="1.0.0",

description='Search for image using Google Custom Search API and resize & crop the image afterwords',
long_description=readme(),
Expand All @@ -21,7 +21,7 @@ def readme():
author_email='[email protected]',

classifiers=[
'Development Status :: 4 - Beta',
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Topic :: Software Development :: Build Tools',
'License :: OSI Approved :: MIT License',
Expand Down
Loading

0 comments on commit bfa05d0

Please sign in to comment.