From 53517c24bfd76d7d723232815e99fd41d76f2662 Mon Sep 17 00:00:00 2001
From: Ralph De Wit <ralphdewit@gmail.com>
Date: Fri, 15 Nov 2024 10:44:39 +0100
Subject: [PATCH] Created module with plotting functions for tutorial notebook
 (#3)

---
 .bumpversion.cfg                       |  35 +++++
 .gitignore                             |   1 +
 docs/conf.py                           |   2 +-
 docs/source/introduction.rst           |   6 +-
 notebooks/sensingclues_tutorial.py     | 178 +++++++++++----------
 pyproject.toml                         |  27 +---
 requirements_dev.txt                   |   4 +-
 sensingcluespy/__init__.py             |   6 +
 sensingcluespy/api_calls.py            | 113 +++++++++----
 sensingcluespy/src/helper_functions.py |   1 +
 sensingcluespy/src/visualization.py    | 210 +++++++++++++++++++++++++
 11 files changed, 440 insertions(+), 143 deletions(-)
 create mode 100644 .bumpversion.cfg
 create mode 100644 sensingcluespy/src/visualization.py
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
new file mode 100644
index 0000000..74aac16
--- /dev/null
+++ b/.bumpversion.cfg
@@ -0,0 +1,35 @@
+[bumpversion]
+current_version = 0.3.1
+commit = True
+tag = False
+parse = 
+	(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+) # major, minor and patch
+	(?:\-(?P<dev>(?:rc|))\.(?P<prenum>\d+))?       # pre-release
+serialize = 
+	{major}.{minor}.{patch}-{dev}.{prenum}
+	{major}.{minor}.{patch}
+commit_message = "bump version {old_version} -> {new_version}"
+tag_message = "{new_version}"
+tag_scope = "default"
+
+[bumpversion:part:dev]
+values = 
+	stable
+	rc
+
+[bumpversion:part:prenum]
+first_value = 0
+
+[bumpversion:file:sensingcluespy/__init__.py]
+search = __version__ = "{current_version}"
+replace = __version__ = "{new_version}"
+
+[bumpversion:file:docs/conf.py]
+search = release = "{current_version}"
+replace = release = "{new_version}"
+
+[bdist_wheel]
+universal = 1
+
+[options.packages.find]
+where = .
diff --git a/.gitignore b/.gitignore
index cdcada8..fb2493b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
+.python-version
 MANIFEST
 
 # PyInstaller
diff --git a/docs/conf.py b/docs/conf.py
index ac41ec7..3f215a2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,7 +12,7 @@
 project = "sensingcluespy"
 copyright = "2024, SensingClues"
 author = "SensingClues"
-release = "0.2.3"
+release = "0.3.1"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
index 58ffef7..2018906 100644
--- a/docs/source/introduction.rst
+++ b/docs/source/introduction.rst
@@ -4,8 +4,10 @@ Introduction
 `SensingClues <https://sensingclues.org/>`_ allows you to record, monitor and analyze wildlife observations to support nature conservation initiatives.
 The package ``sensingcluespy`` allows Python-users to connect to SensingClues' database and download
 data logged using the Cluey-app. This includes wildlife observations and tracks, custom map layers,
-and the wildlife ontology used by SensingClues. **Note:** you need credentials for the SensingClues `Cluey <https://sensingclues.org/cluey>`_-app to
-connect to the database.
+and the wildlife ontology used by SensingClues.
+
+**Note:** you need credentials for the SensingClues `Cluey <https://sensingclues.org/cluey>`_-app to
+connect to the database. However, the tutorial notebook uses a read-only demo-user, allowing you to explore the available functionality, even without your own Cluey-account.
 
 
 
diff --git a/notebooks/sensingclues_tutorial.py b/notebooks/sensingclues_tutorial.py
index 8b4b568..92f49f3 100644
--- a/notebooks/sensingclues_tutorial.py
+++ b/notebooks/sensingclues_tutorial.py
@@ -4,8 +4,8 @@
 #     text_representation:
 #       extension: .py
 #       format_name: light
-#       format_version: '1.4'
-#       jupytext_version: 1.2.0
+#       format_version: '1.5'
+#       jupytext_version: 1.16.4
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
@@ -16,18 +16,27 @@
 #
 # [SensingClues](https://sensingclues.org/) allows you to record, monitor and analyze wildlife observations to support nature conservation initiatives. This notebook shows the following:
 #
-# - **Basic**: the main SensingClues-functionality of **extracting observation and track data**. 
-# - **Advanced**: this section includes the usage of a hierarchy of available concepts (e.g. animal species or type of activity), which enhances reporting and analysis of the observation data. Further, we show how to collect and visualize layer data from SensingClues.
+# - **Core**: the main SensingClues-functionality of
+#     1. Extracting observation data
+#     2. Extracting track data
+# - **Advanced**: additional functionality including
+#     1. A hierarchy of available concepts (e.g. animal species or type of (illegal) activity), which enhances reporting and analysis of the observation data.
+#     2. Extraction and visualization of layer data from SensingClues.
 #
-# You can adapt this notebook to extract your own recordings. For more detail on what you can configure as a user, see the API-documentation of the `sensingcluespy`-package.
+# You can adapt this notebook to extract your own observation data. For more detail on what you can configure as a user, see the API-documentation of the `sensingcluespy`-package [here](https://sensingcluespy.readthedocs.io/en/latest/).
 #
 # ### Before you start
 #
 # To run this notebook, you should:
-# - create a personal account at SensingClues using the Cluey Data Collector app, which can be downloaded from the Google Playstore (not supported for iOS currently). Also see [here](https://sensingclues.org/portal).
-# - install the `sensingcluespy`-package in a virtual python environment (`pip install -e .` from the main directory of the repository).
-# - install the requirements in requirements.txt (if not already installed automatically in the previous step).
-# - create a file '.env' in the root of the wildcat-api-python-repository, containing your SensingClues credentials. These will be read in this notebook to log in. The file should look like this:
+# - Install the `sensingcluespy`-package in a virtual python environment (`pip install -e .` from the main directory of the repository).
+# - Install the requirements in requirements.txt (if not already installed automatically in the previous step).
+#   Install the requirements in requirements_dev.txt to create the plots in this notebook using the `matplotlib` and `folium`-packages (if not already installed automatically in the previous step).
+#
+# #### [Optional] Create your own user account
+#
+# For the purpose of this tutorial, we use a **read-only** user called "demo". If you want to continue using SensingClues for your own work (of course you want to! :-) ), then please do the following:
+# - Create a personal account at SensingClues using the Cluey Data Collector app, which can be downloaded from the Google Playstore (not supported for iOS currently). Also see [here](https://sensingclues.org/portal).
+# - Create a file '.env' in the root of the wildcat-api-python-repository, containing your SensingClues credentials. These will be read in this notebook to log in. The file should look like this:
 # ```
 # # SensingClues credentials
 # USERNAME=your_username
@@ -36,41 +45,46 @@
 
 # ## Configuration
 
+# +
+# N.B. While sensingcluespy does not require you to install visualization packages, this tutorial does.
+# To run this tutorial in full, please install matplotlib and folium (as contained in requirements_dev.txt).
+import folium
+import geopandas as gpd
+import matplotlib.pyplot as plt
 import os
-
 from dotenv import load_dotenv
 
 from sensingcluespy import sclogging
 from sensingcluespy.api_calls import SensingClues
 from sensingcluespy.src import helper_functions as helpers
+from sensingcluespy.src import visualization as viz
+# -
+
+plt.style.use("ggplot")
 
 logger = sclogging.get_sc_logger()
-sclogging.set_sc_log_level("DEBUG")
+sclogging.set_sc_log_level("INFO")
 
 load_dotenv()
 
 # %load_ext autoreload
 # %autoreload 2
 
-# N.B. you can place your credentials here as well, but this is not recommended.
-username = os.getenv("USERNAME")
-password = os.getenv("PASSWORD")
+# +
+# N.B. We recommend to place your credentials in an environment file and read them like so:
+# username = os.getenv("USERNAME")
+# password = os.getenv("PASSWORD")
+
+# However, for the purpose of this demo, we use a read-only demo user:
+username = "demo"
+password = "demo"
+# -
 
 
 # ## Connect to SensingClues
 
 sensing_clues = SensingClues(username, password)
 
-# +
-# you should have logged in automatically by calling the class.
-# if not, you can call the login-method separately.
-# status = sensing_clues.login(username, password)
-
-# +
-# It is not necessary to log out, but you can do so by calling:
-# sensing_clues.logout()
-# -
-
 # ## Check available data
 #
 # By default, you have access to several groups of data, such as a demo dataset and a large dataset offered by [Global Forest Watch](https://www.globalforestwatch.org).
@@ -78,42 +92,55 @@
 info = sensing_clues.get_groups()
 info
 
-# specify the group(s) to extract data from
+# Specify the group(s) to extract data from
+# For this tutorial, focus-project-1234 contains demo observations,
+# while focus-project-3494596 contains demo tracks.
 groups = [
+    "focus-project-3494596",
     "focus-project-1234",
 ]
 
-# ## Basic functionality
+# ## Core functionality
 #
-# - Get observation data
-# - Get track data
+# Time to collect and plot some observation and track data!
 
 # ### Get observations
 #
-# You can filter the extracted observations in multiple ways, such as data, coordinates (bounding box) and concepts. For full detail on the options, see the documentation of the API. Some key features are shown here:
+# You can filter the extracted observations in multiple ways, such as timestamps, coordinates (bounding box) and concepts. Some key features are shown here:
 #
 # - **Date and time**: set `date_from` and/or `date_until` (in format %Y-%m-%d, assumes UTC).
 # - **Coordinates**: set `coord`, e.g. {"north": 32, "east": 20, "south": 31, "west": 17}.
-# - **Concepts**: set `concepts` to include, e.g. 'animal'. *See example shown later in this notebook*.
+# - **Concepts**: set `concepts` to include, e.g. 'animal'. *See detailed example later in this notebook*.
+#
+# For full detail on the options, see the documentation of the API [here](https://sensingcluespy.readthedocs.io/en/latest/).
 #
-# #### Notes
-# - Each observation has a unique `entityId` and may have multiple concepts (labels) associated with it,
-#  in which case the number of records in the observations-dataframe is larger than
-#  the number of observations mentioned by the logger.
-# - Reading all data in a group can take minutes or longer, depending on the size of the dataset. If you want to do a quick test, you can limit the number of pages to read by setting `page_nbr_sample`. 
+# #### Usage notes
+# - Reading all data in a group can take minutes or longer, depending on the size of the dataset. If you want to do a quick test, you can limit the number of pages to read by setting `page_nbr_sample`.
+# - Each observation has a unique `entityId` and may have multiple concepts (labels) associated with it, in which case the number of records in the observations-dataframe is larger than the number of observations mentioned by the logger.
 
-# a quick test can be done like so
-obs_sample = sensing_clues.get_observations(groups=groups, page_nbr_sample=2)
+# A quick check of the number of available records
+obs_sample = sensing_clues.get_observations(groups=groups, page_nbr_sample=1)
 
-# see the API-documentation for a full description of filter possibilities
-# to filter on concepts, see example shown later in this notebook.
 observations = sensing_clues.get_observations(
     groups=groups,
-    date_until="2018-07-01",
-    coord={"north": 32, "east": 20, "south": 31.5, "west": 10}
+    date_from="2024-07-01",
+    coord={"north": -17, "east": 30, "south": -19, "west": 20}
+)
+
+# #### Visualize these observations
+#
+# The standard plotting-function `plot_observation` shows a separate layer for all observation types (typically ['community_work', 'animal', 'community', 'poi', 'hwc'], where 'poi' = 'point of interest' and 'hwc' = 'human-wildlife-conflict').
+
+viz.plot_observations(
+    observations, 
+    show_heatmap="hwc_animal", 
+    padding=(25, 25)
 )
 
-observations.head()
+# You can explore the observations per observationType like so:
+observation_type = "animal"
+# observation_type = "hwc"
+observations.loc[observations["observationType"] == observation_type, "conceptLabel"].value_counts()
 
 # ### Get tracks
 #
@@ -121,18 +148,21 @@
 
 tracks = sensing_clues.get_tracks(
     groups=groups,
-    date_until="2018-07-01",
-    coord={"north": 32, "east": 20, "south": 31.5, "west": 10}
+    # date_from="2024-07-01",
+    # coord={"north": -17, "east": 30, "south": -19, "west": 20}
 )
 
 tracks.head()
 
-# #### Add geosjon-data to tracks
+# #### Visualize tracks
 #
-# If available, you can add geojson-data (including geometries) to the tracks.
+# If available, you can add geojson-data (including geometries) to the tracks and subsequently visualize the tracks.
 
 tracks_geo = sensing_clues.add_geojson_to_tracks(tracks)
 
+track_map = viz.plot_tracks(tracks_geo["geometry"])
+track_map
+
 # ## Advanced functionality
 
 # ### Get all available concepts and their hierarchy
@@ -171,7 +201,7 @@
 oid = "https://sensingclues.poolparty.biz/SCCSSOntology/222"
 helpers.get_label_for_id(hierarchy, oid)
 
-# #### Does this Kite have any children?
+# #### Does this Kite have any child concepts?
 
 label = "Kite"
 children_label = helpers.get_children_for_label(hierarchy, label)
@@ -184,7 +214,7 @@
 
 # ### Filter observations on concept
 #
-# Here we show an example of filtering the data on these concepts.
+# Here we show an example of filtering the data on concepts. The example filters on the concepts of Impala and Giraffe.
 #
 # **Instructions:**
 # - Set `concepts` to include, e.g. 'animal', specified as a Pool Party URL, e.g. "https://sensingclues.poolparty.biz/SCCSSOntology/186".
@@ -192,7 +222,6 @@
 # - Further, if you want to exclude subconcepts, i.e. keep observations with the label 'animal' but exclude observations with the label 'elephant', set `include_subconcepts=False`.
 #
 
-# +
 concept_animal = [
     "https://sensingclues.poolparty.biz/SCCSSOntology/308", # Impala
     "https://sensingclues.poolparty.biz/SCCSSOntology/319", # Giraffe    
@@ -200,15 +229,14 @@
     # or infer the id using a label, for instance:
     # helpers.get_id_for_label(hierarchy, "Animal sighting"),
 ]
-observations = sensing_clues.get_observations(
+concept_observations = sensing_clues.get_observations(
     groups=groups,
-    date_until="2018-07-01",
     concepts=concept_animal,
-    coord={"north": 32, "east": 20, "south": 31.5, "west": 10}
+    # date_from="2024-07-01",
+    # coord={"north": -17, "east": 30, "south": -19, "west": 20}
 )
-# -
 
-observations.head()
+concept_observations.head()
 
 # ### Count concepts related to observations
 #
@@ -219,7 +247,7 @@
 # - A list of child concepts, e.g. by extracting children for the label "Animal sighting" from hierarchy (see example below).
 
 date_from = "2010-01-01"
-date_until = "2024-01-01"
+date_until = "2024-08-01"
 label = "Animal sighting"
 children_label = helpers.get_children_for_label(hierarchy, label)
 concept_counts = sensing_clues.get_concept_counts(
@@ -230,15 +258,6 @@
 # #### Example: visualize concept counts
 #
 # To make the visualization intelligible, you can add information on labels from the `hierarchy`-dataframe.
-#
-# To do so, first install matplotlib.
-
-# +
-# # !pip install matplotlib
-# -
-
-import matplotlib.pyplot as plt
-plt.style.use("ggplot")
 
 min_freq = 10
 if not concept_counts.empty:
@@ -265,27 +284,22 @@
 layers = sensing_clues.get_all_layers()
 layers
 
-# ### Get details for an individual layer
-
-layer = sensing_clues.get_layer_features(layer_name="test_multipolygon")
-
-layer.head()
-
-# #### Plot available geometries
+# ### Visualize an individual layer
 #
-# This requires installation of library to visualize geospatial data. Here, we use Folium.
-
-# +
-# # !pip install folium
-# -
+# Get features for an individual and visualize it.
 
-import folium
+layer = sensing_clues.get_layer_features(layer_name="Demo_countries")
+viz.plot_layer(layer)
 
-poly_map = folium.Map([51.9244, 4.4777], zoom_start=8, tiles="cartodbpositron")
-for _, geometry in layer["geometry"].items():
-    folium.GeoJson(geometry).add_to(poly_map)
-folium.LatLngPopup().add_to(poly_map)
-poly_map
+viz.plot_layers(layer)
 
+# ### Miscellaneous
 
+# +
+# You should have logged in automatically by calling the class.
+# If not, you can call the login-method separately.
+# status = sensing_clues.login(username, password)
 
+# +
+# It is not necessary to log out, but you can do so by calling:
+# sensing_clues.logout()
diff --git a/pyproject.toml b/pyproject.toml
index a2fa5cd..50e5475 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,6 @@ name = "sensingcluespy"
 authors = [
     {name = "sensing_clues", email = "sensingclues@typemail.com"},
 ]
-version = "0.2.3"
 description = "Python client to extract data from SensingClues"
 readme = "readme.md"
 requires-python = ">=3.9"
@@ -18,7 +17,7 @@ classifiers = [
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
-dynamic = ["dependencies", "optional-dependencies"]
+dynamic = ["dependencies", "optional-dependencies", "version"]
 
 [project.urls]
 Homepage = "https://sensingcluespy.readthedocs.io/en/latest/index.html"
@@ -26,6 +25,7 @@ Homepage = "https://sensingcluespy.readthedocs.io/en/latest/index.html"
 [tool.setuptools.dynamic]
 dependencies = {file = ["requirements.txt"]}
 optional-dependencies.dev = { file = ["requirements_dev.txt"] }
+version = {attr = "sensingcluespy.__version__"}
 
 [tool.setuptools.package-data]
 sensingclues = ["extractors/*"]
@@ -46,26 +46,3 @@ extend-exclude = '''
 
 [tool.isort]
 profile = "black"
-
-[tool.bumpver]
-current_version = "0.2.3"
-version_pattern = "MAJOR.MINOR.PATCH[-TAG]"
-commit_message = "bump version {old_version} -> {new_version}"
-tag_message = "{new_version}"
-tag_scope = "default"
-pre_commit_hook = ""
-post_commit_hook = ""
-commit = true
-tag = true
-push = true
-
-[tool.bumpver.file_patterns]
-"pyproject.toml" = [
-    'current_version = "{version}"',
-    'version = "{version}"',
-]
-"docs/conf.py" = [
-    'release = "{version}"',
-]
-
-
diff --git a/requirements_dev.txt b/requirements_dev.txt
index bd2bb17..f02bfd5 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -2,8 +2,10 @@ autopep8
 black
 bump2version
 flake8
-jupyterlab
+folium==0.16.0
+jupyterlab==4.2.5
 jupytext
+matplotlib==3.9.0
 mypy
 pip
 pre-commit
diff --git a/sensingcluespy/__init__.py b/sensingcluespy/__init__.py
index e69de29..3854f3f 100644
--- a/sensingcluespy/__init__.py
+++ b/sensingcluespy/__init__.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+"""Top-level package for sensingcluespy."""
+
+__author__ = "SensingClues"
+__version__ = "0.3.1"
diff --git a/sensingcluespy/api_calls.py b/sensingcluespy/api_calls.py
index f489450..5924c8f 100644
--- a/sensingcluespy/api_calls.py
+++ b/sensingcluespy/api_calls.py
@@ -5,7 +5,7 @@
 import warnings
 from typing import List, Union
 
-import geopandas
+import geopandas as gpd
 import numpy as np
 import pandas as pd
 import requests
@@ -94,7 +94,7 @@ def get_observations(
         groups: Union[str, List],
         include_subconcepts: bool = True,
         **kwargs,
-    ) -> pd.DataFrame:
+    ) -> gpd.GeoDataFrame:
         """Method to acquire observations data from SensingClues Focus
 
         Extra (filter) arguments can be passed to this method via kwargs.
@@ -131,6 +131,16 @@ def get_observations(
                 obs = obs.loc[obs["conceptId"] == concepts]
 
         obs = obs.rename(columns=col_trans)
+
+        obs["lon"] = obs["where"].apply(lambda x: x["coordinates"][0])
+        obs["lat"] = obs["where"].apply(lambda x: x["coordinates"][1])
+
+        obs = gpd.GeoDataFrame(
+            obs,
+            geometry=gpd.points_from_xy(obs["lon"], obs["lat"]),
+            crs="EPSG:4326"
+        )
+        
         return obs
 
     def get_tracks(
@@ -175,7 +185,7 @@ def get_tracks(
     def add_geojson_to_tracks(
         self,
         tracks: pd.DataFrame,
-    ) -> pd.DataFrame:
+    ) -> gpd.GeoDataFrame:
         """Add geojson data to track data
 
         For each unique track, extract geojson data
@@ -198,7 +208,7 @@ def add_geojson_to_tracks(
         for i, entity in enumerate(track_entities):
             payload = make_query(query_text=f"entityId:'{entity}'")
             req = self._api_call("post", url_addition, payload)
-            df_entity = geopandas.read_file(io.BytesIO(req.content))
+            df_entity = gpd.read_file(io.BytesIO(req.content))
             if not df_entity.empty:
                 logger.debug(f"Found geojson data for track {entity}.")
             else:
@@ -210,7 +220,9 @@ def add_geojson_to_tracks(
 
         tracks = tracks.merge(df, how="left", on="entityId")
 
-        return tracks
+        tracks_geo = gpd.GeoDataFrame(tracks, geometry=tracks["geometry"], crs="EPSG:4326")
+
+        return tracks_geo
 
     def get_all_layers(self, exclude_pids: list = None) -> pd.DataFrame:
         """Get layers to which the user has access
@@ -253,7 +265,7 @@ def get_layer_features(
         project_id: int = None,
         layer_id: int = None,
         exclude_pids: list = None,
-    ) -> geopandas.GeoDataFrame:
+    ) -> gpd.GeoDataFrame:
         """Extract details for a specific layer
 
         :param layer_name: Name of project to extract layer features for.
@@ -263,7 +275,7 @@ def get_layer_features(
         :param exclude_pids: List of pids to exclude, in addition to
             ['track', 'default'], which are always excluded. Default is None.
 
-        :returns: geopandas.DataFrame with features of the requested layer.
+        :returns: gpd.DataFrame with features of the requested layer.
 
         """
         all_layers = self.get_all_layers(exclude_pids=exclude_pids)
@@ -290,8 +302,8 @@ def get_layer_features(
         url_addition = f"map/all/{project_id}/{layer_id}/features/"
         req = self._api_call("post", url_addition)
 
-        # relevant geometry information can be read using geopandas
-        gdf = geopandas.read_file(io.BytesIO(req.content))
+        # relevant geometry information can be read using gpd
+        gdf = gpd.read_file(io.BytesIO(req.content))
 
         # TODO:
         #  some layers have additional columns, so implement option to extract
@@ -439,6 +451,8 @@ def _iterate_api(
     ) -> pd.DataFrame:
         """Make iterative calls to SensingClues Focus API to collect data
 
+        Calls are made per member group in groups.
+
         :param groups: Name(s) of groups to query from, passed as a string
             or as a list of strings, e.g. "focus-project-1234".
         :param extractor_name: Name of extractor configuration to use.
@@ -453,39 +467,75 @@ def _iterate_api(
         output_data = []
         extractor = DataExtractor(extractor_name)
 
-        # first, determine number of available records.
-        query = make_query(groups=groups, page_length=page_length, **kwargs)
+        # determine total number of available records (without filters)
+        query = make_query(
+            groups=groups,
+            page_length=page_length,
+            # page_nbr=1,
+        )
         req = self._api_call("post", "search/all/results", query)
-        nbr_pages = math.ceil(req.json()["total"] / page_length)
-        nbr_pages_decile = math.ceil(nbr_pages / 10)
+        n_records_all = req.json()['total']
         logger.info(
-            f"Scope '{groups}' contains {req.json()['total']} entities"
-            f" for data type '{extractor_name}'."
+            f"Scope {groups} contains {n_records_all} records for" 
+            f" data type '{extractor_name}', when not applying any filters."
         )
 
-        if page_nbr_sample:
-            nbr_pages = page_nbr_sample
-            logger.info(
-                f"Restricting number of pages to a sample of {nbr_pages}."
+        # determine total number of available records (with filters)
+        query = make_query(
+            groups=groups,
+            page_length=page_length,
+            page_nbr=1,
+            **kwargs,
+        )
+        req = self._api_call("post", "search/all/results", query)
+        n_records = req.json()['total']
+        logger.info(
+            f"When applying your filters, {n_records} records remain."
+        )
+
+        df = pd.DataFrame()
+        for group in groups:
+            query = make_query(
+                groups=group,
+                page_length=page_length,
+                **kwargs,
             )
+            req = self._api_call("post", "search/all/results", query)
+            n_records_group = req.json()['total']
+            nbr_pages = math.ceil(n_records_group / page_length)
+            nbr_pages_decile = math.ceil(nbr_pages / 10)
+            if n_records_group == 0:
+                logger.warning(
+                    f"No data available for '{extractor_name}',"
+                    f" returning empty dataframe for group {group}."
+                )
+                df = pd.concat(
+                    [df, pd.DataFrame()], axis=0, ignore_index=True
+                )
+                continue
+
+            if page_nbr_sample:
+                nbr_pages = np.min([nbr_pages, page_nbr_sample])
+                logger.debug(
+                    f"page_nbr_sample set to {page_nbr_sample},"
+                    f" restricting number of pages for group {group}."
+                )
 
-        # second, verify extractor definition is correct for this particular
-        # group by using the first record in the query results.
-        if req.json()["total"] > 0:
-            # content of the first record in query results
+            # verify extractor definition is correct for each
+            # group by using the first record in the query results.
             record_content = req.json()["results"][0]["extracted"]["content"]
             ext_clean_o = align_extractor(extractor.ext_clean, record_content)
             extractor.ext_clean = ext_clean_o
 
             # extract the data
-            logger.info("Started reading available records.")
+            logger.info(f"Started  reading available records for group {group}.")
             for i_page in range(nbr_pages):
                 if np.mod(i_page, nbr_pages_decile) == 0:
-                    logger.info(
+                    logger.debug(
                         f"Reading page {i_page:>3d} out of {nbr_pages} pages."
                     )
                 query = make_query(
-                    groups=groups,
+                    groups=group,
                     page_length=page_length,
                     page_nbr=i_page,
                     **kwargs,
@@ -493,12 +543,11 @@ def _iterate_api(
                 req = self._api_call("post", "search/all/results", query)
                 data = extractor.extract_data(req.json())
                 output_data.extend(data)
-            logger.info("Finished reading available records.")
-        else:
-            logger.warning(
-                f"No data available for '{extractor_name}',"
-                " returning empty dataframe."
+
+            df_group = pd.DataFrame(output_data)
+            df = pd.concat([df, df_group], axis=0, ignore_index=True)
+            logger.info(
+                f"Finished reading available records for group {group}."
             )
 
-        df = pd.DataFrame(output_data)
         return df
diff --git a/sensingcluespy/src/helper_functions.py b/sensingcluespy/src/helper_functions.py
index 0360ddd..794005c 100644
--- a/sensingcluespy/src/helper_functions.py
+++ b/sensingcluespy/src/helper_functions.py
@@ -321,6 +321,7 @@ def recursive_get_from_dict(nested_dict: dict, keys: list) -> Any:
     :returns: Value for deepest level in dictionary, or a call
         to this same function if the deepest level has not yet been reached.
     """
+
     head, *tail = keys
     if tail:
         return recursive_get_from_dict(nested_dict[head], tail)
diff --git a/sensingcluespy/src/visualization.py b/sensingcluespy/src/visualization.py
new file mode 100644
index 0000000..d5c5b41
--- /dev/null
+++ b/sensingcluespy/src/visualization.py
@@ -0,0 +1,210 @@
+"""Functions used for plotting of SensingClues-data"""
+
+import folium
+import geopandas as gpd
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from folium.plugins import HeatMap
+from typing import Tuple
+
+
+def plot_layer(
+    layer: gpd.GeoDataFrame,
+    geometry_col: str = "geometry",
+    name_col: str = "NAME"
+) -> folium.folium.Map:
+    """Visualize layer on a map
+
+    Parameters
+    ----------
+    layer : gpd.GeoDataFrame
+        GeoDataFrame with details of a layer.
+    geometry_col : str
+        Name of the column containing the layer geometries.
+    name_col : str
+        Name the column containing the layer names.
+
+    Returns
+    -------
+    poly_map : folium.folium.Map
+        Map object.
+
+    """
+    poly_map = folium.Map(tiles="cartodbpositron")
+    colors = plt.rcParams['axes.prop_cycle'].by_key()["color"]
+
+    feature_groups = {}
+    for i, geometry in layer[geometry_col].items():
+        color_id = i % (len(colors) - 1)
+        name = layer[name_col].iloc[i]
+        feature_groups[name] = folium.FeatureGroup(name=name)
+        folium.GeoJson(
+            geometry,
+            color=colors[color_id],
+            name=name
+        ).add_to(feature_groups[name])
+
+    for fg in feature_groups.values():
+        poly_map.add_child(fg)
+    folium.LatLngPopup().add_to(poly_map)
+    poly_map.fit_bounds(poly_map.get_bounds())
+    poly_map.add_child(folium.map.LayerControl(collapsed=False))
+
+    return poly_map
+
+
+def plot_observations(
+    observations: gpd.GeoDataFrame,
+    show_heatmap: str | None = "all",
+    padding: Tuple[int, int] | None = None,
+) -> folium.folium.Map:
+    """Visualize observations on a map
+
+    Parameters
+    ----------
+    observations : gpd.GeoDataFrame
+        GeoDataFrame with observations.
+    show_heatmap : {"all", "hwc_animal"}
+        If "all", include a heatmap of all observations.
+        if "hwc_only", show a heatmap of human-wildlife-conflicts
+        and animal observations only.
+    padding : Tuple[int, int]
+        Optional padding of the map, which is by default fit to the
+        bounds of the observations.
+
+    Returns
+    -------
+    poly_map : folium.folium.Map
+        Map object.
+
+    """
+    poly_map = folium.Map(tiles="cartodbpositron")
+
+    feature_groups = {
+        "community_work": folium.FeatureGroup(name='Community'),
+        "community": folium.FeatureGroup(name='Community'),
+        "animal": folium.FeatureGroup(name='Animal sighting'),
+        "hwc": folium.FeatureGroup(name='Human-wildlife-conflict'),
+        "poi": folium.FeatureGroup(name='Point of interest'),
+    }
+
+    for _, obs in observations.iterrows():
+        obs_type = obs["observationType"]
+        if obs_type == "animal":
+            icon_fmt = {
+                "icon": "fa-paw",
+                "color": "orange",
+            }
+        elif obs_type in ["community", "community_work"]:
+            icon_fmt = {
+                "icon": "fa-people-group",
+                "color": "darkblue",
+            }
+        elif obs_type == "hwc":
+            icon_fmt = {
+                "icon": "fa-triangle-exclamation",
+                "color": "red"
+            }
+        elif obs["observationType"] == "poi":
+            icon_fmt = {
+                "icon": "fa-leaf",
+                "color": "darkgreen",
+            }
+        else:
+            icon_fmt = {
+                "icon": None,
+                "color": "blue",
+            }
+
+        folium.Marker(
+            [obs["geometry"].y, obs["geometry"].x],
+            obs["conceptLabel"],
+            icon=folium.Icon(**icon_fmt, prefix='fa')
+        ).add_to(feature_groups[obs_type])
+
+    if show_heatmap == "all":
+        # add heatmap for observations of all types
+        lat_lon = observations["geometry"].apply(lambda geom: [geom.y, geom.x])
+        hm = HeatMap(lat_lon, name="Heatmap").add_to(folium.FeatureGroup())
+        poly_map.add_child(hm)
+    elif show_heatmap == "hwc_animal":
+        # add heatmap for observations of type human-wildlife conflict ("hwc")
+        lat_lon_hwc = observations.loc[
+            observations["observationType"] == "hwc", "geometry"
+        ].apply(lambda geom: [geom.y, geom.x])
+        hm_hwc = HeatMap(
+            lat_lon_hwc,
+            name="HWC heatmap",
+            gradient={
+                0.4: 'brown',
+                0.65: 'orange',
+                1: 'red'},
+        ).add_to(folium.FeatureGroup())
+        poly_map.add_child(hm_hwc)
+
+        # add heatmap for observations of "animal"
+        lat_lon_animal = observations.loc[
+            observations["observationType"] == "animal", "geometry"
+        ].apply(lambda geom: [geom.y, geom.x])
+        hm_animal = HeatMap(
+            lat_lon_animal,
+            name="Animal heatmap",
+            gradient={
+                0.4: 'blue',
+                0.65: 'lime',
+                1: 'green'}
+        ).add_to(folium.FeatureGroup())
+        poly_map.add_child(hm_animal)
+    else:
+        # Do not show any heatmap
+        pass
+
+    for fg in feature_groups.values():
+        poly_map.add_child(fg)
+
+    folium.LatLngPopup().add_to(poly_map)
+    poly_map.fit_bounds(poly_map.get_bounds(), padding=padding)
+    poly_map.add_child(folium.map.LayerControl(collapsed=False))
+
+    return poly_map
+
+
+def plot_tracks(track_geometry: gpd.GeoSeries) -> folium.folium.Map:
+    """Visualize tracks on a map
+
+    Parameters
+    ----------
+    track_geometry : gpd.GeoSeries
+        GeoSeries with geometry of each track.
+
+    Returns
+    -------
+    poly_map : folium.folium.Map
+        Map object.
+
+    """
+
+    # Process track geometry so folium.PolyLine can handle it.
+    tracks = track_geometry.explode(index_parts=True)
+    tracks = tracks.apply(lambda point: (point.y, point.x)).to_frame()
+    tracks = tracks.reset_index(level=0, names="track_id")
+
+    poly_map = folium.Map(tiles="cartodbpositron")
+
+    colors = plt.rcParams['axes.prop_cycle'].by_key()["color"]
+    track_ids = tracks["track_id"].unique().tolist()
+    for i, track_id in enumerate(track_ids):
+        track = tracks.loc[tracks["track_id"] == track_id, "geometry"]
+        color_id = i % (len(colors) - 1)  # cycle through default colors
+        folium.PolyLine(
+            track,
+            color=colors[color_id],
+            weight=5,
+            opacity=0.8,
+        ).add_to(poly_map)
+    folium.LatLngPopup().add_to(poly_map)
+    poly_map.fit_bounds(poly_map.get_bounds())
+
+    return poly_map
\ No newline at end of file