Skip to content

Commit

Permalink
check the metadata file and create one if missing, sort the metadata …
Browse files Browse the repository at this point in the history
…and the search terms
  • Loading branch information
khaledk2 committed Nov 2, 2023
1 parent b5e6a69 commit e550a93
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 24 deletions.
12 changes: 10 additions & 2 deletions omero_search_engine/api/stats/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from flask import Response, send_file
from omero_search_engine import search_omero_app
import os
from omero_search_engine.validation.results_validator import (
get_omero_stats,
)


@stats.route("/", methods=["GET"])
Expand All @@ -40,7 +43,6 @@ def search_terms(resource):
"Content-disposition": "attachment; filename=%s_stats.csv" % (resource)
},
)
return "OMERO search engine (search_terms API)"


@stats.route("/metadata", methods=["GET"])
Expand All @@ -49,7 +51,13 @@ def get_metadata():
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
metadata = os.path.join(base_folder, "metadata.csv")

if os.path.isfile(metadata):
return send_file(metadata, as_attachment=True)
else:
return "No metadata is avilable"
report = get_omero_stats(return_contents=True)
return Response(
report,
mimetype="text/csv",
headers={"Content-disposition": "attachment; filename=metadata.csv"},
)
26 changes: 14 additions & 12 deletions omero_search_engine/validation/results_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,8 @@ def test_no_images():
"""


def get_omero_stats():
values = ["Attribute", "No. buckets", "Total number", "Resource"]
def get_omero_stats(return_contents=False):
values = ["Resource", "Attribute", "No. buckets", "Total number"]
base_folder = "/etc/searchengine/"
if not os.path.isdir(base_folder):
base_folder = os.path.expanduser("~")
Expand All @@ -644,41 +644,43 @@ def get_omero_stats():

data = []
terms = get_restircted_search_terms()
data.append(",".join(values))
for resource, names in terms.items():
for name in names:
if name == "name":
continue
returned_results = query_cashed_bucket(name, resource)
if resource == "image":
data.append(
"%s, %s, %s,%s"
% (
[
resource,
name,
returned_results.get("total_number_of_buckets"),
returned_results.get("total_number_of_image"),
resource,
)
]
)
else:
kk = "total_number_of_%s" % resource
data.append(
"%s, %s, %s,%s"
% (
[
resource,
name,
returned_results.get("total_number_of_buckets"),
returned_results.get(kk),
resource,
)
]
)

for dat in returned_results.get("data"):
if not dat["Value"]:
print("Value is empty string", dat["Key"])
report = "\n".join(data)
import pandas as pd

df = pd.DataFrame(data, columns=values)
df2 = df.sort_values(by=["Resource", "No. buckets"], ascending=[True, False])
report = df2.to_csv()
with open(metadata_file, "w") as f:
f.write(report)
if return_contents:
return report


def get_no_images_sql_containers():
Expand Down
25 changes: 15 additions & 10 deletions tools/utils/logs_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


def get_search_terms(folder_name, resource=None, return_file_content=False):
logging.info("checking files inisde: %s" % folder_name)
logging.info("checking files inside: %s" % folder_name)
resourses = {}
for root, dirs, files in os.walk(folder_name):
logging.info("0....%s,%s,%s" % (root, dirs, files))
Expand Down Expand Up @@ -60,7 +60,7 @@ def analyse_log_file(file_name, resourses):
filters.append(json.loads(ss, strict=False))
suc = suc + 1
except Exception as e:
print(str(e))
logging.info(str(e))
failes = failes + 1

for filter in filters:
Expand All @@ -86,22 +86,27 @@ def check_filters(conds, resourses):

def write_reports(resourses, resource, return_file_content, file_name):
for res, itms in resourses.items():
lines = ["key,total hits,unique hits"]
columns = ["key", "total hits", "unique hits"]
lines = []
for name, values in itms.items():
line = [name]
lines.append(line)
vv = []
for val in values:
if val not in vv:
vv.append(val)
line.insert(1, str(len(values)))
line.insert(2, str(len(vv)))
lines.append(",".join(line))
contents = "\n".join(lines)
line.insert(1, len(values))
line.insert(2, len(vv))
import pandas as pd

df = pd.DataFrame(lines, columns=columns)
df2 = df.sort_values(by=["total hits", "unique hits"], ascending=[False, False])
contents = df2.to_csv()
if return_file_content:
if res == resource:
print("================================")
print(resource, return_file_content)
print("================================")
logging.info("================================")
logging.info("%s, %s" % (resource, return_file_content))
logging.info("================================")
return contents
else:
f = open(file_name.replace(".csv", "_%s.csv" % res), "w")
Expand Down

0 comments on commit e550a93

Please sign in to comment.