Skip to content

Commit

Permalink
processors: allow filtering out robots/machines
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Aug 26, 2024
1 parent 634b376 commit fd7ee92
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions invenio_stats/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ def anonymize_user(doc):
return doc


def flag_robots(doc):
"""Flag events which are created by robots.
def flag_robots(doc, exclude=False):
"""Flag and filter events which are created by robots.
The list of robots is defined by the `COUNTER-robots Python package
<https://github.com/inveniosoftware/counter-robots>`_ , which follows the
Expand All @@ -99,11 +99,13 @@ def flag_robots(doc):
<https://github.com/CDLUC3/Make-Data-Count/tree/master/user-agents>`_.
"""
doc["is_robot"] = "user_agent" in doc and is_robot(doc["user_agent"])
if exclude and doc["is_robot"]:
return None
return doc


def flag_machines(doc):
"""Flag events which are created by machines.
def flag_machines(doc, exclude=False):
"""Flag and filter events which are created by machines.
The list of machines is defined by the `COUNTER-robots Python package
<https://github.com/inveniosoftware/counter-robots>`_ , which follows the
Expand All @@ -114,6 +116,8 @@ def flag_machines(doc):
"""
doc["is_machine"] = "user_agent" in doc and is_machine(doc["user_agent"])
if exclude and doc["is_machine"]:
return None
return doc


Expand Down

0 comments on commit fd7ee92

Please sign in to comment.