Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated vendors and added features #12

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions cloudy_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,44 @@ def render_from_template(directory, template_name, **kwargs):
return template.render(**kwargs)


def stringify_result(r):
s = r[0]
if r[1]:
s += "(%.2f)" % r[1]
if len(r) > 2 and r[2]:
s += "(" + r[2] + ")"
return s

def render_tsv_report(image_results, vendor_stats, report_date):
# Building the report header (first row) based on the info from the
# first image result
header = "File" + "\t"
if 'image_tags' in image_results[0] and len(image_results[0]['image_tags']) > 0:
header += "Image tags" + "\t"
for vendor in image_results[0]['vendors']:
for feature_name, feature_results in vendor['standardized_result'].iteritems():
header += vendor['vendor_name'] + "_" + feature_name + "\t"
header += "\n"

## Building the report body
report = ""
for image_result in image_results:
report += image_result['output_image_filepath'] + "\t"
if 'image_tags' in image_results[0] and len(image_results[0]['image_tags']) > 0:
# because the column was included or not based on the first image, we
# need to do this check first in order to keep the report columns aligned
if 'image_tags' in image_result and len(image_result['image_tags']) > 0:
report += ', '.join(image_result['image_tags']) + "\t"
else:
report += "\t"
for vendor in image_result['vendors']:
for feature_name, feature_results in vendor['standardized_result'].iteritems():
report += ', '.join(map(lambda r: stringify_result(r), feature_results)) + "\t"
report += "\n"

return header + report


def vendor_statistics(image_results):
vendor_stats = {}

Expand Down Expand Up @@ -259,6 +297,17 @@ def process_all_images():
with open(output_html_filepath, 'w') as output_html_file:
output_html_file.write(output_html.encode('utf-8'))

# Create TSV report with all results
output_tsv = render_tsv_report(
image_results,
vendor_stats,
datetime.datetime.today()
)

# Write TSV output
output_tsv_filepath = os.path.join(settings('output_dir'), 'output.tsv')
with open(output_tsv_filepath, 'w') as output_tsv_file:
output_tsv_file.write(output_tsv.encode('utf-8'))

if __name__ == "__main__":
process_all_images()
12 changes: 10 additions & 2 deletions example_api_keys.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
{
"google" : "YOUR KEY HERE",
"microsoft" : "YOUR KEY HERE",
"ibm" : "YOUR KEY HERE",
"microsoft" : {
"location": "YOUR LOCATION HERE (eg. canadacentral, westus2...)",
"api_key": "YOUR KEY HERE",
},
"ibm" : {
"api_key": "YOUR KEY HERE",
"url": "https://gateway.watsonplatform.net/visual-recognition/api",
"language": "en"
},
"clarifai": "YOUR KEY HERE",
"cloudsight" : {
"api_key" : "YOUR KEY HERE",
"api_secret" : "YOUR SECRET HERE"
Expand Down
5 changes: 1 addition & 4 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Run a corpus of images through multiple computer vision API vendors. View image
1. Optionally match the tags returned with your desired tags to test accuracy.
1. Calculate stats around response times, number of tags returned, etc.
1. Create a scaled copy of the original image with height 200px.
1. Generate output.html to show all the images and labeling results in an easy to consume manner.
1. Generate output.html and output.tsv to show all the images and labeling results in an easy to consume manner.

## Usage

Expand All @@ -30,9 +30,6 @@ Run a corpus of images through multiple computer vision API vendors. View image

The keys should not be placed in the api_keys.json file but in ~/.aws/credentials and ~/.aws/config. See http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-config-files

### Note for Clarifai

The keys should not be placed in the api_keys.json file but in ~/.clarifai/config. See https://github.com/Clarifai/clarifai-python#setup

## Desired Tags

Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
numpy==1.12.1
cloudsight==0.9.2
clarifai==2.0.21
watson-developer-cloud==0.25.2
clarifai==2.2.3
watson-developer-cloud==1.6.0
Jinja2==2.9.6
boto3==1.4.4
Pillow==2.9.0
Pillow==2.9.0
20 changes: 17 additions & 3 deletions static/template.html
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,24 @@ <h2 class="image_name">desired_tags:
<td class="result_values">
{% for result in feature_results %}
{% set score = result[1] %}
{% set annotation = result[2] %}

{% if feature_name == 'colors' %}
<span class="result">
{% else %}
<span class="result" style="opacity: {{ score }}">
{{ result[0] }}
{% endif %}
{% if feature_name == 'colors' %}
<span style="background-color: {{ result[0] }};">{{ result[0] }}</span>
{% else %}
{{ result[0] }}
{% endif %}

{% if score is not none %}
<span class="score">({{ ("%.2f" % score).lstrip('0') }})</span>
<span class="score">({{ ("%.2f" % score).lstrip('0') }})</span>
{% endif %}
{% if annotation %}
<span>({{ annotation }})</span>
{% endif %}
{% if not loop.last %}
,
Expand Down Expand Up @@ -128,7 +142,7 @@ <h2 class="image_name">desired_tags:
{% endfor %}

{% if vendor_stats|length > 0 %}
<div class="row" id="statistics">
<div class="row" id="statistics">
<div class="twelve columns result">
<h4>Stats</h4>
<p>
Expand Down
2 changes: 1 addition & 1 deletion vendors/clarifai_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


def call_vision_api(image_filename, api_keys):
app = ClarifaiApp()
app = ClarifaiApp(api_key=api_keys['clarifai'])
model = app.models.get('general-v1.3')
image = ClImage(file_obj=open(image_filename, 'rb'))
result = model.predict([image])
Expand Down
10 changes: 10 additions & 0 deletions vendors/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ def call_vision_api(image_filename, api_keys):
"type": "SAFE_SEARCH_DETECTION",
"maxResults": 10
},
{
"type": "IMAGE_PROPERTIES",
"maxResults": 10
},
]
}
]
Expand Down Expand Up @@ -71,4 +75,10 @@ def get_standardized_result(api_result):
for annotation in api_result['logoAnnotations']:
output['logo_tags'].append((annotation['description'], annotation['score']))

if 'imagePropertiesAnnotation' in api_result and 'dominantColors' in api_result['imagePropertiesAnnotation']:
output['colors'] = []
for color_annotation in api_result['imagePropertiesAnnotation']['dominantColors']['colors']:
color = "#{0:x}".format(int(color_annotation['color'].get('red') or 0)) + "{0:x}".format(int(color_annotation['color'].get('green') or 0)) + "{0:x}".format(int(color_annotation['color'].get('blue') or 0))
output['colors'].append((color, color_annotation['score']))

return output
31 changes: 26 additions & 5 deletions vendors/ibm.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
from watson_developer_cloud import VisualRecognitionV3

def call_vision_api(image_filename, api_keys):
api_key = api_keys['ibm']
api_key = api_keys['ibm']['api_key']
iam_url = api_keys['ibm']['url']
language = api_keys['ibm']['language']

# Via example found here:
# https://github.com/watson-developer-cloud/python-sdk/blob/master/examples/visual_recognition_v3.py
visual_recognition = VisualRecognitionV3('2016-05-20', api_key=api_key)
# Via example found in the code snippets from the Implementation doc
# in the IBM Watson dashboard/console
visual_recognition = VisualRecognitionV3('2018-03-19',
iam_api_key=api_key,
url=iam_url)

with open(image_filename, 'rb') as image_file:
result = visual_recognition.classify(images_file=image_file)
result = visual_recognition.classify(images_file=image_file, headers={'Accept-Language': language})

return result


def get_standardized_result(api_result):
color_indicators = ['color', 'couleur']
# add color word in other languages if you use a different
# language, as per the configs in api_keys
output = {
'tags' : [],
}
Expand All @@ -28,4 +35,18 @@ def get_standardized_result(api_result):
for tag_data in api_result['classes']:
output['tags'].append((tag_data['class'], tag_data['score']))

# some tags contain the word "color", so we use that indicator
# to extract color information. This is not as reliable as other
# vendors providing structured information about colors, but it
# is better than nothing
color_tags = [tag for tag in api_result['classes'] for color_indicator in color_indicators if color_indicator in tag['class']]
if len(color_tags) > 0:
output['colors'] = []
for color_tag in color_tags:
color = color_tag['class']
for color_indicator in color_indicators:
color = color.replace(color_indicator, '')
color = color.strip()
output['colors'].append((color, color_tag['score']))

return output
37 changes: 25 additions & 12 deletions vendors/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import requests

def call_vision_api(image_filename, api_keys):
api_key = api_keys['microsoft']
post_url = "https://api.projectoxford.ai/vision/v1.0/analyze?visualFeatures=Categories,Tags,Description,Faces,ImageType,Color,Adult&subscription-key=" + api_key
api_key = api_keys['microsoft']['api_key']
location = api_keys['microsoft']['location']
post_url = "https://" + location + ".api.cognitive.microsoft.com/vision/v1.0/analyze?visualFeatures=Categories,Tags,Description,Faces,ImageType,Color,Adult&subscription-key=" + api_key

image_data = open(image_filename, 'rb').read()
result = requests.post(post_url, data=image_data, headers={'Content-Type': 'application/octet-stream'})
Expand All @@ -30,25 +31,37 @@ def get_standardized_result(api_result):
output = {
'tags' : [],
'captions' : [],
# 'categories' : [],
'categories' : []
# 'adult' : [],
# 'image_types' : []
# 'tags_without_score' : {}
}

for tag_data in api_result['tags']:
output['tags'].append((tag_data['name'], tag_data['confidence']))
if 'description' in api_result:
if 'tags' in api_result['description']:
for tag_data in api_result['description']['tags']:
output['tags'].append((tag_data, None))
# tags do not have confidence or scores

for caption in api_result['description']['captions']:
output['captions'].append((caption['text'], caption['confidence']))
if 'captions' in api_result['description']:
for caption in api_result['description']['captions']:
output['captions'].append((caption['text'], caption['confidence']))

# for category in api_result['categories']:
# output['categories'].append(([category['name'], category['score']))
if 'categories' in api_result:
for category in api_result['categories']:
output['categories'].append((category['name'], category['score']))

# output['adult'] = api_result['adult']
if 'color' in api_result:
output['colors'] = []
for color_type in ['accentColor', 'dominantColorBackground', 'dominantColorForeground']:
if color_type in api_result['color']:
output['colors'].append((api_result['color'][color_type], None, color_type))
# colors do not have confidence or scores
if 'dominantColors' in api_result['color']:
for color in api_result['color']['dominantColors']:
output['colors'].append((color, None, 'dominant'))

# for tag in api_result['description']['tags']:
# output['tags_without_score'][tag] = 'n/a'
# output['adult'] = api_result['adult']

# output['image_types'] = api_result['imageType']

Expand Down